diff --git a/plugins/ingest-attachment/build.gradle b/plugins/ingest-attachment/build.gradle index c80a8a1db59..97b5a23f116 100644 --- a/plugins/ingest-attachment/build.gradle +++ b/plugins/ingest-attachment/build.gradle @@ -26,7 +26,8 @@ versions << [ 'tika': '1.14', 'pdfbox': '2.0.3', 'bouncycastle': '1.55', - 'poi': '3.15' + 'poi': '3.15', + 'mime4j': '0.7.2' ] dependencies { @@ -59,11 +60,19 @@ dependencies { compile "org.apache.poi:poi-scratchpad:${versions.poi}" // Apple iWork compile 'org.apache.commons:commons-compress:1.10' + // Outlook documents + compile "org.apache.james:apache-mime4j-core:${versions.mime4j}" + compile "org.apache.james:apache-mime4j-dom:${versions.mime4j}" } // TODO: stop using LanguageIdentifier... compileJava.options.compilerArgs << "-Xlint:-deprecation" + +dependencyLicenses { + mapping from: /apache-mime4j-.*/, to: 'apache-mime4j' +} + forbiddenPatterns { exclude '**/*.docx' exclude '**/*.pdf' @@ -530,25 +539,6 @@ thirdPartyAudit.excludes = [ 'org.apache.http.client.utils.URIBuilder', 'org.apache.http.entity.ByteArrayEntity', 'org.apache.http.impl.client.DefaultHttpClient', - 'org.apache.james.mime4j.MimeException', - 'org.apache.james.mime4j.codec.DecodeMonitor', - 'org.apache.james.mime4j.codec.DecoderUtil', - 'org.apache.james.mime4j.dom.FieldParser', - 'org.apache.james.mime4j.dom.address.Address', - 'org.apache.james.mime4j.dom.address.AddressList', - 'org.apache.james.mime4j.dom.address.Mailbox', - 'org.apache.james.mime4j.dom.address.MailboxList', - 'org.apache.james.mime4j.dom.field.AddressListField', - 'org.apache.james.mime4j.dom.field.DateTimeField', - 'org.apache.james.mime4j.dom.field.MailboxListField', - 'org.apache.james.mime4j.dom.field.ParsedField', - 'org.apache.james.mime4j.dom.field.UnstructuredField', - 'org.apache.james.mime4j.field.LenientFieldParser', - 'org.apache.james.mime4j.parser.ContentHandler', - 'org.apache.james.mime4j.parser.MimeStreamParser', - 'org.apache.james.mime4j.stream.BodyDescriptor', - 'org.apache.james.mime4j.stream.Field', - 'org.apache.james.mime4j.stream.MimeConfig', 'org.apache.jcp.xml.dsig.internal.dom.DOMDigestMethod', 'org.apache.jcp.xml.dsig.internal.dom.DOMKeyInfo', 'org.apache.jcp.xml.dsig.internal.dom.DOMReference', diff --git a/plugins/ingest-attachment/licenses/apache-mime4j-LICENSE.txt b/plugins/ingest-attachment/licenses/apache-mime4j-LICENSE.txt new file mode 100644 index 00000000000..63151ee1458 --- /dev/null +++ b/plugins/ingest-attachment/licenses/apache-mime4j-LICENSE.txt @@ -0,0 +1,361 @@ + Apache License + Version 2.0, January 2004 + http://www.apache.org/licenses/ + + TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION + + 1. Definitions. + + "License" shall mean the terms and conditions for use, reproduction, + and distribution as defined by Sections 1 through 9 of this document. + + "Licensor" shall mean the copyright owner or entity authorized by + the copyright owner that is granting the License. + + "Legal Entity" shall mean the union of the acting entity and all + other entities that control, are controlled by, or are under common + control with that entity. For the purposes of this definition, + "control" means (i) the power, direct or indirect, to cause the + direction or management of such entity, whether by contract or + otherwise, or (ii) ownership of fifty percent (50%) or more of the + outstanding shares, or (iii) beneficial ownership of such entity. + + "You" (or "Your") shall mean an individual or Legal Entity + exercising permissions granted by this License. + + "Source" form shall mean the preferred form for making modifications, + including but not limited to software source code, documentation + source, and configuration files. + + "Object" form shall mean any form resulting from mechanical + transformation or translation of a Source form, including but + not limited to compiled object code, generated documentation, + and conversions to other media types. + + "Work" shall mean the work of authorship, whether in Source or + Object form, made available under the License, as indicated by a + copyright notice that is included in or attached to the work + (an example is provided in the Appendix below). + + "Derivative Works" shall mean any work, whether in Source or Object + form, that is based on (or derived from) the Work and for which the + editorial revisions, annotations, elaborations, or other modifications + represent, as a whole, an original work of authorship. For the purposes + of this License, Derivative Works shall not include works that remain + separable from, or merely link (or bind by name) to the interfaces of, + the Work and Derivative Works thereof. + + "Contribution" shall mean any work of authorship, including + the original version of the Work and any modifications or additions + to that Work or Derivative Works thereof, that is intentionally + submitted to Licensor for inclusion in the Work by the copyright owner + or by an individual or Legal Entity authorized to submit on behalf of + the copyright owner. For the purposes of this definition, "submitted" + means any form of electronic, verbal, or written communication sent + to the Licensor or its representatives, including but not limited to + communication on electronic mailing lists, source code control systems, + and issue tracking systems that are managed by, or on behalf of, the + Licensor for the purpose of discussing and improving the Work, but + excluding communication that is conspicuously marked or otherwise + designated in writing by the copyright owner as "Not a Contribution." + + "Contributor" shall mean Licensor and any individual or Legal Entity + on behalf of whom a Contribution has been received by Licensor and + subsequently incorporated within the Work. + + 2. Grant of Copyright License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + copyright license to reproduce, prepare Derivative Works of, + publicly display, publicly perform, sublicense, and distribute the + Work and such Derivative Works in Source or Object form. + + 3. Grant of Patent License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + (except as stated in this section) patent license to make, have made, + use, offer to sell, sell, import, and otherwise transfer the Work, + where such license applies only to those patent claims licensable + by such Contributor that are necessarily infringed by their + Contribution(s) alone or by combination of their Contribution(s) + with the Work to which such Contribution(s) was submitted. If You + institute patent litigation against any entity (including a + cross-claim or counterclaim in a lawsuit) alleging that the Work + or a Contribution incorporated within the Work constitutes direct + or contributory patent infringement, then any patent licenses + granted to You under this License for that Work shall terminate + as of the date such litigation is filed. + + 4. Redistribution. You may reproduce and distribute copies of the + Work or Derivative Works thereof in any medium, with or without + modifications, and in Source or Object form, provided that You + meet the following conditions: + + (a) You must give any other recipients of the Work or + Derivative Works a copy of this License; and + + (b) You must cause any modified files to carry prominent notices + stating that You changed the files; and + + (c) You must retain, in the Source form of any Derivative Works + that You distribute, all copyright, patent, trademark, and + attribution notices from the Source form of the Work, + excluding those notices that do not pertain to any part of + the Derivative Works; and + + (d) If the Work includes a "NOTICE" text file as part of its + distribution, then any Derivative Works that You distribute must + include a readable copy of the attribution notices contained + within such NOTICE file, excluding those notices that do not + pertain to any part of the Derivative Works, in at least one + of the following places: within a NOTICE text file distributed + as part of the Derivative Works; within the Source form or + documentation, if provided along with the Derivative Works; or, + within a display generated by the Derivative Works, if and + wherever such third-party notices normally appear. The contents + of the NOTICE file are for informational purposes only and + do not modify the License. You may add Your own attribution + notices within Derivative Works that You distribute, alongside + or as an addendum to the NOTICE text from the Work, provided + that such additional attribution notices cannot be construed + as modifying the License. + + You may add Your own copyright statement to Your modifications and + may provide additional or different license terms and conditions + for use, reproduction, or distribution of Your modifications, or + for any such Derivative Works as a whole, provided Your use, + reproduction, and distribution of the Work otherwise complies with + the conditions stated in this License. + + 5. Submission of Contributions. Unless You explicitly state otherwise, + any Contribution intentionally submitted for inclusion in the Work + by You to the Licensor shall be under the terms and conditions of + this License, without any additional terms or conditions. + Notwithstanding the above, nothing herein shall supersede or modify + the terms of any separate license agreement you may have executed + with Licensor regarding such Contributions. + + 6. Trademarks. This License does not grant permission to use the trade + names, trademarks, service marks, or product names of the Licensor, + except as required for reasonable and customary use in describing the + origin of the Work and reproducing the content of the NOTICE file. + + 7. Disclaimer of Warranty. Unless required by applicable law or + agreed to in writing, Licensor provides the Work (and each + Contributor provides its Contributions) on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or + implied, including, without limitation, any warranties or conditions + of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A + PARTICULAR PURPOSE. You are solely responsible for determining the + appropriateness of using or redistributing the Work and assume any + risks associated with Your exercise of permissions under this License. + + 8. Limitation of Liability. In no event and under no legal theory, + whether in tort (including negligence), contract, or otherwise, + unless required by applicable law (such as deliberate and grossly + negligent acts) or agreed to in writing, shall any Contributor be + liable to You for damages, including any direct, indirect, special, + incidental, or consequential damages of any character arising as a + result of this License or out of the use or inability to use the + Work (including but not limited to damages for loss of goodwill, + work stoppage, computer failure or malfunction, or any and all + other commercial damages or losses), even if such Contributor + has been advised of the possibility of such damages. + + 9. Accepting Warranty or Additional Liability. While redistributing + the Work or Derivative Works thereof, You may choose to offer, + and charge a fee for, acceptance of support, warranty, indemnity, + or other liability obligations and/or rights consistent with this + License. However, in accepting such obligations, You may act only + on Your own behalf and on Your sole responsibility, not on behalf + of any other Contributor, and only if You agree to indemnify, + defend, and hold each Contributor harmless for any liability + incurred by, or claims asserted against, such Contributor by reason + of your accepting any such warranty or additional liability. + + END OF TERMS AND CONDITIONS + + + + + THIS PRODUCT ALSO INCLUDES THIRD PARTY SOFTWARE REDISTRIBUTED UNDER THE + FOLLOWING LICENSES: + + Apache Commons Logging, + The Apache Software License, Version 1.1 (commons-logging-1.1.1.jar) + + The Apache Software License, Version 1.1 + + Redistribution and use in source and binary forms, with or without + modification, are permitted provided that the following conditions + are met: + + 1. Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + + 2. Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in + the documentation and/or other materials provided with the + distribution. + + 3. The end-user documentation included with the redistribution, + if any, must include the following acknowledgment: + "This product includes software developed by the + Apache Software Foundation (http://www.apache.org/)." + Alternately, this acknowledgment may appear in the software itself, + if and wherever such third-party acknowledgments normally appear. + + 4. The names "Apache" and "Apache Software Foundation" must + not be used to endorse or promote products derived from this + software without prior written permission. For written + permission, please contact apache@apache.org. + + 5. Products derived from this software may not be called "Apache", + nor may "Apache" appear in their name, without prior written + permission of the Apache Software Foundation. + + THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESSED OR IMPLIED + WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES + OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE + DISCLAIMED. IN NO EVENT SHALL THE APACHE SOFTWARE FOUNDATION OR + ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF + USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND + ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, + OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT + OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + SUCH DAMAGE. + + + Test messages from the Perl-MIME-Tools project, + + The "Artistic License" + + Preamble + + The intent of this document is to state the conditions under which a + Package may be copied, such that the Copyright Holder maintains some + semblance of artistic control over the development of the package, + while giving the users of the package the right to use and distribute + the Package in a more-or-less customary fashion, plus the right to make + reasonable modifications. + + Definitions: + + "Package" refers to the collection of files distributed by the + Copyright Holder, and derivatives of that collection of files + created through textual modification. + + "Standard Version" refers to such a Package if it has not been + modified, or has been modified in accordance with the wishes + of the Copyright Holder as specified below. + + "Copyright Holder" is whoever is named in the copyright or + copyrights for the package. + + "You" is you, if you're thinking about copying or distributing + this Package. + + "Reasonable copying fee" is whatever you can justify on the + basis of media cost, duplication charges, time of people involved, + and so on. (You will not be required to justify it to the + Copyright Holder, but only to the computing community at large + as a market that must bear the fee.) + + "Freely Available" means that no fee is charged for the item + itself, though there may be fees involved in handling the item. + It also means that recipients of the item may redistribute it + under the same conditions they received it. + + 1. You may make and give away verbatim copies of the source form of the + Standard Version of this Package without restriction, provided that you + duplicate all of the original copyright notices and associated disclaimers. + + 2. You may apply bug fixes, portability fixes and other modifications + derived from the Public Domain or from the Copyright Holder. A Package + modified in such a way shall still be considered the Standard Version. + + 3. You may otherwise modify your copy of this Package in any way, provided + that you insert a prominent notice in each changed file stating how and + when you changed that file, and provided that you do at least ONE of the + following: + + a) place your modifications in the Public Domain or otherwise make them + Freely Available, such as by posting said modifications to Usenet or + an equivalent medium, or placing the modifications on a major archive + site such as uunet.uu.net, or by allowing the Copyright Holder to include + your modifications in the Standard Version of the Package. + + b) use the modified Package only within your corporation or organization. + + c) rename any non-standard executables so the names do not conflict + with standard executables, which must also be provided, and provide + a separate manual page for each non-standard executable that clearly + documents how it differs from the Standard Version. + + d) make other distribution arrangements with the Copyright Holder. + + 4. You may distribute the programs of this Package in object code or + executable form, provided that you do at least ONE of the following: + + a) distribute a Standard Version of the executables and library files, + together with instructions (in the manual page or equivalent) on where + to get the Standard Version. + + b) accompany the distribution with the machine-readable source of + the Package with your modifications. + + c) give non-standard executables non-standard names, and clearly + document the differences in manual pages (or equivalent), together + with instructions on where to get the Standard Version. + + d) make other distribution arrangements with the Copyright Holder. + + 5. You may charge a reasonable copying fee for any distribution of this + Package. You may charge any fee you choose for support of this + Package. You may not charge a fee for this Package itself. However, + you may distribute this Package in aggregate with other (possibly + commercial) programs as part of a larger (possibly commercial) software + distribution provided that you do not advertise this Package as a + product of your own. You may embed this Package's interpreter within + an executable of yours (by linking); this shall be construed as a mere + form of aggregation, provided that the complete Standard Version of the + interpreter is so embedded. + + 6. The scripts and library files supplied as input to or produced as + output from the programs of this Package do not automatically fall + under the copyright of this Package, but belong to whoever generated + them, and may be sold commercially, and may be aggregated with this + Package. If such scripts or library files are aggregated with this + Package via the so-called "undump" or "unexec" methods of producing a + binary executable image, then distribution of such an image shall + neither be construed as a distribution of this Package nor shall it + fall under the restrictions of Paragraphs 3 and 4, provided that you do + not represent such an executable image as a Standard Version of this + Package. + + 7. C subroutines (or comparably compiled subroutines in other + languages) supplied by you and linked into this Package in order to + emulate subroutines and variables of the language defined by this + Package shall not be considered part of this Package, but are the + equivalent of input as in Paragraph 6, provided these subroutines do + not change the language in any way that would cause it to fail the + regression tests for the language. + + 8. Aggregation of this Package with a commercial distribution is always + permitted provided that the use of this Package is embedded; that is, + when no overt attempt is made to make this Package's interfaces visible + to the end user of the commercial distribution. Such use shall not be + construed as a distribution of this Package. + + 9. The name of the Copyright Holder may not be used to endorse or promote + products derived from this software without specific prior written permission. + + 10. THIS PACKAGE IS PROVIDED "AS IS" AND WITHOUT ANY EXPRESS OR + IMPLIED WARRANTIES, INCLUDING, WITHOUT LIMITATION, THE IMPLIED + WARRANTIES OF MERCHANTIBILITY AND FITNESS FOR A PARTICULAR PURPOSE. + + The End + + diff --git a/plugins/ingest-attachment/licenses/apache-mime4j-NOTICE.txt b/plugins/ingest-attachment/licenses/apache-mime4j-NOTICE.txt new file mode 100644 index 00000000000..61523975eb2 --- /dev/null +++ b/plugins/ingest-attachment/licenses/apache-mime4j-NOTICE.txt @@ -0,0 +1,13 @@ + ========================================================================= + == NOTICE file for use with the Apache License, Version 2.0, == + ========================================================================= + + Apache JAMES Mime4j + Copyright 2004-2010 The Apache Software Foundation + + This product includes software developed at + The Apache Software Foundation (http://www.apache.org/). + + This product test suite includes data (mimetools-testmsgs folder) developed + by Eryq and ZeeGee Software Inc as part of the "MIME-tools" Perl5 toolkit + and licensed under the Artistic License diff --git a/plugins/ingest-attachment/licenses/apache-mime4j-core-0.7.2.jar.sha1 b/plugins/ingest-attachment/licenses/apache-mime4j-core-0.7.2.jar.sha1 new file mode 100644 index 00000000000..8210fc7fc16 --- /dev/null +++ b/plugins/ingest-attachment/licenses/apache-mime4j-core-0.7.2.jar.sha1 @@ -0,0 +1 @@ +a81264fe0265ebe8fd1d8128aad06dc320de6eef \ No newline at end of file diff --git a/plugins/ingest-attachment/licenses/apache-mime4j-dom-0.7.2.jar.sha1 b/plugins/ingest-attachment/licenses/apache-mime4j-dom-0.7.2.jar.sha1 new file mode 100644 index 00000000000..3ede85a0191 --- /dev/null +++ b/plugins/ingest-attachment/licenses/apache-mime4j-dom-0.7.2.jar.sha1 @@ -0,0 +1 @@ +1c289aa264548a0a1f1b43685a9cb2ab23f67287 \ No newline at end of file diff --git a/plugins/ingest-attachment/src/test/resources/org/elasticsearch/ingest/attachment/test/tika-files.zip b/plugins/ingest-attachment/src/test/resources/org/elasticsearch/ingest/attachment/test/tika-files.zip index 10f5d507677..cfc2e54b79b 100644 Binary files a/plugins/ingest-attachment/src/test/resources/org/elasticsearch/ingest/attachment/test/tika-files.zip and b/plugins/ingest-attachment/src/test/resources/org/elasticsearch/ingest/attachment/test/tika-files.zip differ