David Pilato 8701f7a3ce Add missing mime4j library
In some cases (apparently with outlook files), mime4j library is needed.
We removed it in the past which can cause elasticsearch to crash when you are using ingest-attachment (and probably mapper-attachments as well in 2.x series) with a file which requires this library.

 Similar problem as the one reported at #22077.
2017-01-24 10:25:02 +01:00

2072 lines
155 KiB

* Licensed to Elasticsearch under one or more contributor
* license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright
* ownership. Elasticsearch licenses this file to you under
* the Apache License, Version 2.0 (the "License"); you may
* not use this file except in compliance with the License.
* You may obtain a copy of the License at
* http://www.apache.org/licenses/LICENSE-2.0
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
esplugin {
description 'Ingest processor that uses Apache Tika to extract contents'
classname 'org.elasticsearch.ingest.attachment.IngestAttachmentPlugin'
versions << [
'tika': '1.14',
'pdfbox': '2.0.3',
'bouncycastle': '1.55',
'poi': '3.15',
'mime4j': '0.7.2'
dependencies {
// mandatory for tika
compile "org.apache.tika:tika-core:${versions.tika}"
compile "org.apache.tika:tika-parsers:${versions.tika}"
compile 'commons-io:commons-io:2.4'
// character set detection
compile 'com.googlecode.juniversalchardet:juniversalchardet:1.0.3'
// external parser libraries
compile 'org.ccil.cowan.tagsoup:tagsoup:1.2.1'
// Adobe PDF
compile "org.apache.pdfbox:pdfbox:${versions.pdfbox}"
compile "org.apache.pdfbox:fontbox:${versions.pdfbox}"
compile "org.apache.pdfbox:jempbox:1.8.12"
compile "commons-logging:commons-logging:${versions.commonslogging}"
compile "org.bouncycastle:bcmail-jdk15on:${versions.bouncycastle}"
compile "org.bouncycastle:bcprov-jdk15on:${versions.bouncycastle}"
compile "org.bouncycastle:bcpkix-jdk15on:${versions.bouncycastle}"
// OpenOffice
compile "org.apache.poi:poi-ooxml:${versions.poi}"
compile "org.apache.poi:poi:${versions.poi}"
compile "org.apache.poi:poi-ooxml-schemas:${versions.poi}"
compile "commons-codec:commons-codec:${versions.commonscodec}"
compile 'org.apache.xmlbeans:xmlbeans:2.6.0'
// MS Office
compile "org.apache.poi:poi-scratchpad:${versions.poi}"
// Apple iWork
compile 'org.apache.commons:commons-compress:1.10'
// Outlook documents
compile "org.apache.james:apache-mime4j-core:${versions.mime4j}"
compile "org.apache.james:apache-mime4j-dom:${versions.mime4j}"
// TODO: stop using LanguageIdentifier...
compileJava.options.compilerArgs << "-Xlint:-deprecation"
dependencyLicenses {
mapping from: /apache-mime4j-.*/, to: 'apache-mime4j'
forbiddenPatterns {
exclude '**/*.docx'
exclude '**/*.pdf'
exclude '**/*.epub'
thirdPartyAudit.excludes = [
// classes are missing: some due to our whitelisting of parsers
// Missing openxml schema classes are explained by the fact we use the smaller jar:
// "The full jar of all of the schemas is ooxml-schemas-xx.jar, and it is currently around 15mb.
// The smaller poi-ooxml-schemas jar is only about 4mb.
// This latter jar file only contains the typically used parts though."
// http://poi.apache.org/faq.html#faq-N10025