Ingest Attachment: Upgrade Tika to 1.18 (#31252)

Fixes ES from hanging when a bad zip file is loaded through Tika.
This commit is contained in:
Jack Conradson 2018-06-24 11:08:45 -07:00 committed by Colin Goodheart-Smithe
parent 43c2e2fed1
commit 0f956290ea
No known key found for this signature in database
GPG Key ID: F975E7BDD739B3C7
19 changed files with 27 additions and 14 deletions

View File

@ -23,8 +23,8 @@ esplugin {
} }
versions << [ versions << [
'tika': '1.17', 'tika': '1.18',
'pdfbox': '2.0.8', 'pdfbox': '2.0.9',
'bouncycastle': '1.55', 'bouncycastle': '1.55',
'poi': '3.17', 'poi': '3.17',
'mime4j': '0.8.1' 'mime4j': '0.8.1'
@ -33,9 +33,10 @@ versions << [
dependencies { dependencies {
// mandatory for tika // mandatory for tika
compile "org.apache.tika:tika-core:${versions.tika}" compile "org.apache.tika:tika-core:${versions.tika}"
// build against Jackson 2.9.5, but still works on our current version
compile "org.apache.tika:tika-parsers:${versions.tika}" compile "org.apache.tika:tika-parsers:${versions.tika}"
compile 'org.tukaani:xz:1.6' compile 'org.tukaani:xz:1.8'
compile 'commons-io:commons-io:2.5' compile 'commons-io:commons-io:2.6'
compile "org.slf4j:slf4j-api:${versions.slf4j}" compile "org.slf4j:slf4j-api:${versions.slf4j}"
// character set detection // character set detection
@ -62,7 +63,7 @@ dependencies {
// MS Office // MS Office
compile "org.apache.poi:poi-scratchpad:${versions.poi}" compile "org.apache.poi:poi-scratchpad:${versions.poi}"
// Apple iWork // Apple iWork
compile 'org.apache.commons:commons-compress:1.14' compile 'org.apache.commons:commons-compress:1.16.1'
// Outlook documents // Outlook documents
compile "org.apache.james:apache-mime4j-core:${versions.mime4j}" compile "org.apache.james:apache-mime4j-core:${versions.mime4j}"
compile "org.apache.james:apache-mime4j-dom:${versions.mime4j}" compile "org.apache.james:apache-mime4j-dom:${versions.mime4j}"
@ -118,6 +119,10 @@ thirdPartyAudit.excludes = [
'com.drew.metadata.jpeg.JpegDirectory', 'com.drew.metadata.jpeg.JpegDirectory',
'com.github.junrar.Archive', 'com.github.junrar.Archive',
'com.github.junrar.rarfile.FileHeader', 'com.github.junrar.rarfile.FileHeader',
'com.github.luben.zstd.ZstdInputStream',
'com.github.luben.zstd.ZstdOutputStream',
'com.github.openjson.JSONArray',
'com.github.openjson.JSONObject',
'com.google.common.reflect.TypeToken', 'com.google.common.reflect.TypeToken',
'com.google.gson.Gson', 'com.google.gson.Gson',
'com.googlecode.mp4parser.DataSource', 'com.googlecode.mp4parser.DataSource',
@ -531,6 +536,7 @@ thirdPartyAudit.excludes = [
'org.apache.commons.exec.PumpStreamHandler', 'org.apache.commons.exec.PumpStreamHandler',
'org.apache.commons.exec.environment.EnvironmentUtils', 'org.apache.commons.exec.environment.EnvironmentUtils',
'org.apache.commons.lang.StringUtils', 'org.apache.commons.lang.StringUtils',
'org.apache.commons.lang.SystemUtils',
'org.apache.ctakes.typesystem.type.refsem.UmlsConcept', 'org.apache.ctakes.typesystem.type.refsem.UmlsConcept',
'org.apache.ctakes.typesystem.type.textsem.IdentifiedAnnotation', 'org.apache.ctakes.typesystem.type.textsem.IdentifiedAnnotation',
'org.apache.cxf.jaxrs.client.WebClient', 'org.apache.cxf.jaxrs.client.WebClient',
@ -635,8 +641,6 @@ thirdPartyAudit.excludes = [
'org.etsi.uri.x01903.v13.impl.UnsignedSignaturePropertiesTypeImpl$1SignatureTimeStampList', 'org.etsi.uri.x01903.v13.impl.UnsignedSignaturePropertiesTypeImpl$1SignatureTimeStampList',
'org.etsi.uri.x01903.v14.ValidationDataType$Factory', 'org.etsi.uri.x01903.v14.ValidationDataType$Factory',
'org.etsi.uri.x01903.v14.ValidationDataType', 'org.etsi.uri.x01903.v14.ValidationDataType',
'org.json.JSONArray',
'org.json.JSONObject',
'org.json.simple.JSONArray', 'org.json.simple.JSONArray',
'org.json.simple.JSONObject', 'org.json.simple.JSONObject',
'org.json.simple.parser.JSONParser', 'org.json.simple.parser.JSONParser',

View File

@ -1 +0,0 @@
7b18320d668ab080758bf5383d6d8fcf750babce

View File

@ -0,0 +1 @@
7b5cdabadb4cf12f5ee0f801399e70635583193f

View File

@ -1 +0,0 @@
2852e6e05fbb95076fc091f6d1780f1f8fe35e0f

View File

@ -0,0 +1 @@
815893df5f31da2ece4040fe0a12fd44b577afaf

View File

@ -1 +0,0 @@
52f852fcfc7481d45efdffd224eb78b85981b17b

View File

@ -0,0 +1 @@
f961f17ebdbc307e9055e3cf7c0e207f0895ae55

View File

@ -1 +0,0 @@
17bdf273d66f3afe41eedb9d3ab6a7b819c44a0c

View File

@ -0,0 +1 @@
d0425578218624388f2ec84a0b3a11efd55df0f5

View File

@ -1 +0,0 @@
b450102c2aee98107474d2f92661d947b9cef183

View File

@ -0,0 +1 @@
69556697de96cf0b22df846e970dafd29866eee0

View File

@ -1 +0,0 @@
4277c54fcaed542fbc8a0001fdb4c23baccc0132

View File

@ -0,0 +1 @@
7d9b6dea91d783165f3313d320d3aaaa9a4dfc13

View File

@ -1 +0,0 @@
05b6f921f1810bdf90e25471968f741f87168b64

View File

@ -0,0 +1 @@
c4f7d054303948eb6a4066194253886c8af07128

View File

@ -159,6 +159,7 @@ final class TikaImpl {
perms.add(new SecurityPermission("putProviderProperty.BC")); perms.add(new SecurityPermission("putProviderProperty.BC"));
perms.add(new SecurityPermission("insertProvider")); perms.add(new SecurityPermission("insertProvider"));
perms.add(new ReflectPermission("suppressAccessChecks")); perms.add(new ReflectPermission("suppressAccessChecks"));
perms.add(new RuntimePermission("accessClassInPackage.sun.java2d.cmm.kcms"));
// xmlbeans, use by POI, needs to get the context classloader // xmlbeans, use by POI, needs to get the context classloader
perms.add(new RuntimePermission("getClassLoader")); perms.add(new RuntimePermission("getClassLoader"));
// ZipFile needs accessDeclaredMembers on JDK 10; cf. https://bugs.openjdk.java.net/browse/JDK-8187485 // ZipFile needs accessDeclaredMembers on JDK 10; cf. https://bugs.openjdk.java.net/browse/JDK-8187485

View File

@ -31,4 +31,6 @@ grant {
permission java.lang.RuntimePermission "getClassLoader"; permission java.lang.RuntimePermission "getClassLoader";
// ZipFile needs accessDeclaredMembers on Java 10 // ZipFile needs accessDeclaredMembers on Java 10
permission java.lang.RuntimePermission "accessDeclaredMembers"; permission java.lang.RuntimePermission "accessDeclaredMembers";
// PDFBox checks for the existence of this class
permission java.lang.RuntimePermission "accessClassInPackage.sun.java2d.cmm.kcms";
}; };

View File

@ -214,6 +214,12 @@ public class AttachmentProcessorTests extends ESTestCase {
assertThat(attachmentData.get("content_type").toString(), containsString("text/plain")); assertThat(attachmentData.get("content_type").toString(), containsString("text/plain"));
} }
// See (https://issues.apache.org/jira/browse/COMPRESS-432) for information
// about the issue that causes a zip file to hang in Tika versions prior to 1.18.
public void testZipFileDoesNotHang() {
expectThrows(Exception.class, () -> parseDocument("bad_tika.zip", processor));
}
public void testParseAsBytesArray() throws Exception { public void testParseAsBytesArray() throws Exception {
String path = "/org/elasticsearch/ingest/attachment/test/sample-files/text-in-english.txt"; String path = "/org/elasticsearch/ingest/attachment/test/sample-files/text-in-english.txt";
byte[] bytes; byte[] bytes;