Ingest Attachment: Upgrade Tika to 1.18 (#31252)
Fixes ES from hanging when a bad zip file is loaded through Tika.
This commit is contained in:
parent
43c2e2fed1
commit
0f956290ea
|
@ -23,8 +23,8 @@ esplugin {
|
||||||
}
|
}
|
||||||
|
|
||||||
versions << [
|
versions << [
|
||||||
'tika': '1.17',
|
'tika': '1.18',
|
||||||
'pdfbox': '2.0.8',
|
'pdfbox': '2.0.9',
|
||||||
'bouncycastle': '1.55',
|
'bouncycastle': '1.55',
|
||||||
'poi': '3.17',
|
'poi': '3.17',
|
||||||
'mime4j': '0.8.1'
|
'mime4j': '0.8.1'
|
||||||
|
@ -33,9 +33,10 @@ versions << [
|
||||||
dependencies {
|
dependencies {
|
||||||
// mandatory for tika
|
// mandatory for tika
|
||||||
compile "org.apache.tika:tika-core:${versions.tika}"
|
compile "org.apache.tika:tika-core:${versions.tika}"
|
||||||
|
// build against Jackson 2.9.5, but still works on our current version
|
||||||
compile "org.apache.tika:tika-parsers:${versions.tika}"
|
compile "org.apache.tika:tika-parsers:${versions.tika}"
|
||||||
compile 'org.tukaani:xz:1.6'
|
compile 'org.tukaani:xz:1.8'
|
||||||
compile 'commons-io:commons-io:2.5'
|
compile 'commons-io:commons-io:2.6'
|
||||||
compile "org.slf4j:slf4j-api:${versions.slf4j}"
|
compile "org.slf4j:slf4j-api:${versions.slf4j}"
|
||||||
|
|
||||||
// character set detection
|
// character set detection
|
||||||
|
@ -62,7 +63,7 @@ dependencies {
|
||||||
// MS Office
|
// MS Office
|
||||||
compile "org.apache.poi:poi-scratchpad:${versions.poi}"
|
compile "org.apache.poi:poi-scratchpad:${versions.poi}"
|
||||||
// Apple iWork
|
// Apple iWork
|
||||||
compile 'org.apache.commons:commons-compress:1.14'
|
compile 'org.apache.commons:commons-compress:1.16.1'
|
||||||
// Outlook documents
|
// Outlook documents
|
||||||
compile "org.apache.james:apache-mime4j-core:${versions.mime4j}"
|
compile "org.apache.james:apache-mime4j-core:${versions.mime4j}"
|
||||||
compile "org.apache.james:apache-mime4j-dom:${versions.mime4j}"
|
compile "org.apache.james:apache-mime4j-dom:${versions.mime4j}"
|
||||||
|
@ -118,6 +119,10 @@ thirdPartyAudit.excludes = [
|
||||||
'com.drew.metadata.jpeg.JpegDirectory',
|
'com.drew.metadata.jpeg.JpegDirectory',
|
||||||
'com.github.junrar.Archive',
|
'com.github.junrar.Archive',
|
||||||
'com.github.junrar.rarfile.FileHeader',
|
'com.github.junrar.rarfile.FileHeader',
|
||||||
|
'com.github.luben.zstd.ZstdInputStream',
|
||||||
|
'com.github.luben.zstd.ZstdOutputStream',
|
||||||
|
'com.github.openjson.JSONArray',
|
||||||
|
'com.github.openjson.JSONObject',
|
||||||
'com.google.common.reflect.TypeToken',
|
'com.google.common.reflect.TypeToken',
|
||||||
'com.google.gson.Gson',
|
'com.google.gson.Gson',
|
||||||
'com.googlecode.mp4parser.DataSource',
|
'com.googlecode.mp4parser.DataSource',
|
||||||
|
@ -531,6 +536,7 @@ thirdPartyAudit.excludes = [
|
||||||
'org.apache.commons.exec.PumpStreamHandler',
|
'org.apache.commons.exec.PumpStreamHandler',
|
||||||
'org.apache.commons.exec.environment.EnvironmentUtils',
|
'org.apache.commons.exec.environment.EnvironmentUtils',
|
||||||
'org.apache.commons.lang.StringUtils',
|
'org.apache.commons.lang.StringUtils',
|
||||||
|
'org.apache.commons.lang.SystemUtils',
|
||||||
'org.apache.ctakes.typesystem.type.refsem.UmlsConcept',
|
'org.apache.ctakes.typesystem.type.refsem.UmlsConcept',
|
||||||
'org.apache.ctakes.typesystem.type.textsem.IdentifiedAnnotation',
|
'org.apache.ctakes.typesystem.type.textsem.IdentifiedAnnotation',
|
||||||
'org.apache.cxf.jaxrs.client.WebClient',
|
'org.apache.cxf.jaxrs.client.WebClient',
|
||||||
|
@ -635,8 +641,6 @@ thirdPartyAudit.excludes = [
|
||||||
'org.etsi.uri.x01903.v13.impl.UnsignedSignaturePropertiesTypeImpl$1SignatureTimeStampList',
|
'org.etsi.uri.x01903.v13.impl.UnsignedSignaturePropertiesTypeImpl$1SignatureTimeStampList',
|
||||||
'org.etsi.uri.x01903.v14.ValidationDataType$Factory',
|
'org.etsi.uri.x01903.v14.ValidationDataType$Factory',
|
||||||
'org.etsi.uri.x01903.v14.ValidationDataType',
|
'org.etsi.uri.x01903.v14.ValidationDataType',
|
||||||
'org.json.JSONArray',
|
|
||||||
'org.json.JSONObject',
|
|
||||||
'org.json.simple.JSONArray',
|
'org.json.simple.JSONArray',
|
||||||
'org.json.simple.JSONObject',
|
'org.json.simple.JSONObject',
|
||||||
'org.json.simple.parser.JSONParser',
|
'org.json.simple.parser.JSONParser',
|
||||||
|
|
|
@ -1 +0,0 @@
|
||||||
7b18320d668ab080758bf5383d6d8fcf750babce
|
|
|
@ -0,0 +1 @@
|
||||||
|
7b5cdabadb4cf12f5ee0f801399e70635583193f
|
|
@ -1 +0,0 @@
|
||||||
2852e6e05fbb95076fc091f6d1780f1f8fe35e0f
|
|
|
@ -0,0 +1 @@
|
||||||
|
815893df5f31da2ece4040fe0a12fd44b577afaf
|
|
@ -1 +0,0 @@
|
||||||
52f852fcfc7481d45efdffd224eb78b85981b17b
|
|
|
@ -0,0 +1 @@
|
||||||
|
f961f17ebdbc307e9055e3cf7c0e207f0895ae55
|
|
@ -1 +0,0 @@
|
||||||
17bdf273d66f3afe41eedb9d3ab6a7b819c44a0c
|
|
|
@ -0,0 +1 @@
|
||||||
|
d0425578218624388f2ec84a0b3a11efd55df0f5
|
|
@ -1 +0,0 @@
|
||||||
b450102c2aee98107474d2f92661d947b9cef183
|
|
|
@ -0,0 +1 @@
|
||||||
|
69556697de96cf0b22df846e970dafd29866eee0
|
|
@ -1 +0,0 @@
|
||||||
4277c54fcaed542fbc8a0001fdb4c23baccc0132
|
|
|
@ -0,0 +1 @@
|
||||||
|
7d9b6dea91d783165f3313d320d3aaaa9a4dfc13
|
|
@ -1 +0,0 @@
|
||||||
05b6f921f1810bdf90e25471968f741f87168b64
|
|
|
@ -0,0 +1 @@
|
||||||
|
c4f7d054303948eb6a4066194253886c8af07128
|
|
@ -159,6 +159,7 @@ final class TikaImpl {
|
||||||
perms.add(new SecurityPermission("putProviderProperty.BC"));
|
perms.add(new SecurityPermission("putProviderProperty.BC"));
|
||||||
perms.add(new SecurityPermission("insertProvider"));
|
perms.add(new SecurityPermission("insertProvider"));
|
||||||
perms.add(new ReflectPermission("suppressAccessChecks"));
|
perms.add(new ReflectPermission("suppressAccessChecks"));
|
||||||
|
perms.add(new RuntimePermission("accessClassInPackage.sun.java2d.cmm.kcms"));
|
||||||
// xmlbeans, use by POI, needs to get the context classloader
|
// xmlbeans, use by POI, needs to get the context classloader
|
||||||
perms.add(new RuntimePermission("getClassLoader"));
|
perms.add(new RuntimePermission("getClassLoader"));
|
||||||
// ZipFile needs accessDeclaredMembers on JDK 10; cf. https://bugs.openjdk.java.net/browse/JDK-8187485
|
// ZipFile needs accessDeclaredMembers on JDK 10; cf. https://bugs.openjdk.java.net/browse/JDK-8187485
|
||||||
|
|
|
@ -31,4 +31,6 @@ grant {
|
||||||
permission java.lang.RuntimePermission "getClassLoader";
|
permission java.lang.RuntimePermission "getClassLoader";
|
||||||
// ZipFile needs accessDeclaredMembers on Java 10
|
// ZipFile needs accessDeclaredMembers on Java 10
|
||||||
permission java.lang.RuntimePermission "accessDeclaredMembers";
|
permission java.lang.RuntimePermission "accessDeclaredMembers";
|
||||||
|
// PDFBox checks for the existence of this class
|
||||||
|
permission java.lang.RuntimePermission "accessClassInPackage.sun.java2d.cmm.kcms";
|
||||||
};
|
};
|
||||||
|
|
|
@ -214,6 +214,12 @@ public class AttachmentProcessorTests extends ESTestCase {
|
||||||
assertThat(attachmentData.get("content_type").toString(), containsString("text/plain"));
|
assertThat(attachmentData.get("content_type").toString(), containsString("text/plain"));
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// See (https://issues.apache.org/jira/browse/COMPRESS-432) for information
|
||||||
|
// about the issue that causes a zip file to hang in Tika versions prior to 1.18.
|
||||||
|
public void testZipFileDoesNotHang() {
|
||||||
|
expectThrows(Exception.class, () -> parseDocument("bad_tika.zip", processor));
|
||||||
|
}
|
||||||
|
|
||||||
public void testParseAsBytesArray() throws Exception {
|
public void testParseAsBytesArray() throws Exception {
|
||||||
String path = "/org/elasticsearch/ingest/attachment/test/sample-files/text-in-english.txt";
|
String path = "/org/elasticsearch/ingest/attachment/test/sample-files/text-in-english.txt";
|
||||||
byte[] bytes;
|
byte[] bytes;
|
||||||
|
|
Binary file not shown.
Loading…
Reference in New Issue