diff --git a/plugins/ingest-attachment/build.gradle b/plugins/ingest-attachment/build.gradle index 3bca078bd59..f000fdfeef5 100644 --- a/plugins/ingest-attachment/build.gradle +++ b/plugins/ingest-attachment/build.gradle @@ -23,8 +23,8 @@ esplugin { } versions << [ - 'tika': '1.17', - 'pdfbox': '2.0.8', + 'tika': '1.18', + 'pdfbox': '2.0.9', 'bouncycastle': '1.55', 'poi': '3.17', 'mime4j': '0.8.1' @@ -33,9 +33,10 @@ versions << [ dependencies { // mandatory for tika compile "org.apache.tika:tika-core:${versions.tika}" + // build against Jackson 2.9.5, but still works on our current version compile "org.apache.tika:tika-parsers:${versions.tika}" - compile 'org.tukaani:xz:1.6' - compile 'commons-io:commons-io:2.5' + compile 'org.tukaani:xz:1.8' + compile 'commons-io:commons-io:2.6' compile "org.slf4j:slf4j-api:${versions.slf4j}" // character set detection @@ -62,7 +63,7 @@ dependencies { // MS Office compile "org.apache.poi:poi-scratchpad:${versions.poi}" // Apple iWork - compile 'org.apache.commons:commons-compress:1.14' + compile 'org.apache.commons:commons-compress:1.16.1' // Outlook documents compile "org.apache.james:apache-mime4j-core:${versions.mime4j}" compile "org.apache.james:apache-mime4j-dom:${versions.mime4j}" @@ -118,6 +119,10 @@ thirdPartyAudit.excludes = [ 'com.drew.metadata.jpeg.JpegDirectory', 'com.github.junrar.Archive', 'com.github.junrar.rarfile.FileHeader', + 'com.github.luben.zstd.ZstdInputStream', + 'com.github.luben.zstd.ZstdOutputStream', + 'com.github.openjson.JSONArray', + 'com.github.openjson.JSONObject', 'com.google.common.reflect.TypeToken', 'com.google.gson.Gson', 'com.googlecode.mp4parser.DataSource', @@ -531,6 +536,7 @@ thirdPartyAudit.excludes = [ 'org.apache.commons.exec.PumpStreamHandler', 'org.apache.commons.exec.environment.EnvironmentUtils', 'org.apache.commons.lang.StringUtils', + 'org.apache.commons.lang.SystemUtils', 'org.apache.ctakes.typesystem.type.refsem.UmlsConcept', 'org.apache.ctakes.typesystem.type.textsem.IdentifiedAnnotation', 'org.apache.cxf.jaxrs.client.WebClient', @@ -635,8 +641,6 @@ thirdPartyAudit.excludes = [ 'org.etsi.uri.x01903.v13.impl.UnsignedSignaturePropertiesTypeImpl$1SignatureTimeStampList', 'org.etsi.uri.x01903.v14.ValidationDataType$Factory', 'org.etsi.uri.x01903.v14.ValidationDataType', - 'org.json.JSONArray', - 'org.json.JSONObject', 'org.json.simple.JSONArray', 'org.json.simple.JSONObject', 'org.json.simple.parser.JSONParser', diff --git a/plugins/ingest-attachment/licenses/commons-compress-1.14.jar.sha1 b/plugins/ingest-attachment/licenses/commons-compress-1.14.jar.sha1 deleted file mode 100644 index a93cac2243e..00000000000 --- a/plugins/ingest-attachment/licenses/commons-compress-1.14.jar.sha1 +++ /dev/null @@ -1 +0,0 @@ -7b18320d668ab080758bf5383d6d8fcf750babce \ No newline at end of file diff --git a/plugins/ingest-attachment/licenses/commons-compress-1.16.1.jar.sha1 b/plugins/ingest-attachment/licenses/commons-compress-1.16.1.jar.sha1 new file mode 100644 index 00000000000..93be07c90a4 --- /dev/null +++ b/plugins/ingest-attachment/licenses/commons-compress-1.16.1.jar.sha1 @@ -0,0 +1 @@ +7b5cdabadb4cf12f5ee0f801399e70635583193f \ No newline at end of file diff --git a/plugins/ingest-attachment/licenses/commons-io-2.5.jar.sha1 b/plugins/ingest-attachment/licenses/commons-io-2.5.jar.sha1 deleted file mode 100644 index b7f1d93e897..00000000000 --- a/plugins/ingest-attachment/licenses/commons-io-2.5.jar.sha1 +++ /dev/null @@ -1 +0,0 @@ -2852e6e05fbb95076fc091f6d1780f1f8fe35e0f \ No newline at end of file diff --git a/plugins/ingest-attachment/licenses/commons-io-2.6.jar.sha1 b/plugins/ingest-attachment/licenses/commons-io-2.6.jar.sha1 new file mode 100644 index 00000000000..75f7934c082 --- /dev/null +++ b/plugins/ingest-attachment/licenses/commons-io-2.6.jar.sha1 @@ -0,0 +1 @@ +815893df5f31da2ece4040fe0a12fd44b577afaf \ No newline at end of file diff --git a/plugins/ingest-attachment/licenses/fontbox-2.0.8.jar.sha1 b/plugins/ingest-attachment/licenses/fontbox-2.0.8.jar.sha1 deleted file mode 100644 index f8abddbc755..00000000000 --- a/plugins/ingest-attachment/licenses/fontbox-2.0.8.jar.sha1 +++ /dev/null @@ -1 +0,0 @@ -52f852fcfc7481d45efdffd224eb78b85981b17b \ No newline at end of file diff --git a/plugins/ingest-attachment/licenses/fontbox-2.0.9.jar.sha1 b/plugins/ingest-attachment/licenses/fontbox-2.0.9.jar.sha1 new file mode 100644 index 00000000000..4ded3b54888 --- /dev/null +++ b/plugins/ingest-attachment/licenses/fontbox-2.0.9.jar.sha1 @@ -0,0 +1 @@ +f961f17ebdbc307e9055e3cf7c0e207f0895ae55 \ No newline at end of file diff --git a/plugins/ingest-attachment/licenses/pdfbox-2.0.8.jar.sha1 b/plugins/ingest-attachment/licenses/pdfbox-2.0.8.jar.sha1 deleted file mode 100644 index 1c346871e21..00000000000 --- a/plugins/ingest-attachment/licenses/pdfbox-2.0.8.jar.sha1 +++ /dev/null @@ -1 +0,0 @@ -17bdf273d66f3afe41eedb9d3ab6a7b819c44a0c \ No newline at end of file diff --git a/plugins/ingest-attachment/licenses/pdfbox-2.0.9.jar.sha1 b/plugins/ingest-attachment/licenses/pdfbox-2.0.9.jar.sha1 new file mode 100644 index 00000000000..9bf91e07976 --- /dev/null +++ b/plugins/ingest-attachment/licenses/pdfbox-2.0.9.jar.sha1 @@ -0,0 +1 @@ +d0425578218624388f2ec84a0b3a11efd55df0f5 \ No newline at end of file diff --git a/plugins/ingest-attachment/licenses/tika-core-1.17.jar.sha1 b/plugins/ingest-attachment/licenses/tika-core-1.17.jar.sha1 deleted file mode 100644 index 571314b3378..00000000000 --- a/plugins/ingest-attachment/licenses/tika-core-1.17.jar.sha1 +++ /dev/null @@ -1 +0,0 @@ -b450102c2aee98107474d2f92661d947b9cef183 \ No newline at end of file diff --git a/plugins/ingest-attachment/licenses/tika-core-1.18.jar.sha1 b/plugins/ingest-attachment/licenses/tika-core-1.18.jar.sha1 new file mode 100644 index 00000000000..ef162f03439 --- /dev/null +++ b/plugins/ingest-attachment/licenses/tika-core-1.18.jar.sha1 @@ -0,0 +1 @@ +69556697de96cf0b22df846e970dafd29866eee0 \ No newline at end of file diff --git a/plugins/ingest-attachment/licenses/tika-parsers-1.17.jar.sha1 b/plugins/ingest-attachment/licenses/tika-parsers-1.17.jar.sha1 deleted file mode 100644 index c4487e4970f..00000000000 --- a/plugins/ingest-attachment/licenses/tika-parsers-1.17.jar.sha1 +++ /dev/null @@ -1 +0,0 @@ -4277c54fcaed542fbc8a0001fdb4c23baccc0132 \ No newline at end of file diff --git a/plugins/ingest-attachment/licenses/tika-parsers-1.18.jar.sha1 b/plugins/ingest-attachment/licenses/tika-parsers-1.18.jar.sha1 new file mode 100644 index 00000000000..6441e8b64e7 --- /dev/null +++ b/plugins/ingest-attachment/licenses/tika-parsers-1.18.jar.sha1 @@ -0,0 +1 @@ +7d9b6dea91d783165f3313d320d3aaaa9a4dfc13 \ No newline at end of file diff --git a/plugins/ingest-attachment/licenses/xz-1.6.jar.sha1 b/plugins/ingest-attachment/licenses/xz-1.6.jar.sha1 deleted file mode 100644 index d91cd44c0b4..00000000000 --- a/plugins/ingest-attachment/licenses/xz-1.6.jar.sha1 +++ /dev/null @@ -1 +0,0 @@ -05b6f921f1810bdf90e25471968f741f87168b64 \ No newline at end of file diff --git a/plugins/ingest-attachment/licenses/xz-1.8.jar.sha1 b/plugins/ingest-attachment/licenses/xz-1.8.jar.sha1 new file mode 100644 index 00000000000..7455feac798 --- /dev/null +++ b/plugins/ingest-attachment/licenses/xz-1.8.jar.sha1 @@ -0,0 +1 @@ +c4f7d054303948eb6a4066194253886c8af07128 \ No newline at end of file diff --git a/plugins/ingest-attachment/src/main/java/org/elasticsearch/ingest/attachment/TikaImpl.java b/plugins/ingest-attachment/src/main/java/org/elasticsearch/ingest/attachment/TikaImpl.java index 97ca1c0b197..6606d1bc727 100644 --- a/plugins/ingest-attachment/src/main/java/org/elasticsearch/ingest/attachment/TikaImpl.java +++ b/plugins/ingest-attachment/src/main/java/org/elasticsearch/ingest/attachment/TikaImpl.java @@ -159,6 +159,7 @@ final class TikaImpl { perms.add(new SecurityPermission("putProviderProperty.BC")); perms.add(new SecurityPermission("insertProvider")); perms.add(new ReflectPermission("suppressAccessChecks")); + perms.add(new RuntimePermission("accessClassInPackage.sun.java2d.cmm.kcms")); // xmlbeans, use by POI, needs to get the context classloader perms.add(new RuntimePermission("getClassLoader")); // ZipFile needs accessDeclaredMembers on JDK 10; cf. https://bugs.openjdk.java.net/browse/JDK-8187485 diff --git a/plugins/ingest-attachment/src/main/plugin-metadata/plugin-security.policy b/plugins/ingest-attachment/src/main/plugin-metadata/plugin-security.policy index 0cd359a9973..bcc5eef3193 100644 --- a/plugins/ingest-attachment/src/main/plugin-metadata/plugin-security.policy +++ b/plugins/ingest-attachment/src/main/plugin-metadata/plugin-security.policy @@ -31,4 +31,6 @@ grant { permission java.lang.RuntimePermission "getClassLoader"; // ZipFile needs accessDeclaredMembers on Java 10 permission java.lang.RuntimePermission "accessDeclaredMembers"; + // PDFBox checks for the existence of this class + permission java.lang.RuntimePermission "accessClassInPackage.sun.java2d.cmm.kcms"; }; diff --git a/plugins/ingest-attachment/src/test/java/org/elasticsearch/ingest/attachment/AttachmentProcessorTests.java b/plugins/ingest-attachment/src/test/java/org/elasticsearch/ingest/attachment/AttachmentProcessorTests.java index 598d3f4e817..654bc361f53 100644 --- a/plugins/ingest-attachment/src/test/java/org/elasticsearch/ingest/attachment/AttachmentProcessorTests.java +++ b/plugins/ingest-attachment/src/test/java/org/elasticsearch/ingest/attachment/AttachmentProcessorTests.java @@ -214,6 +214,12 @@ public class AttachmentProcessorTests extends ESTestCase { assertThat(attachmentData.get("content_type").toString(), containsString("text/plain")); } + // See (https://issues.apache.org/jira/browse/COMPRESS-432) for information + // about the issue that causes a zip file to hang in Tika versions prior to 1.18. + public void testZipFileDoesNotHang() { + expectThrows(Exception.class, () -> parseDocument("bad_tika.zip", processor)); + } + public void testParseAsBytesArray() throws Exception { String path = "/org/elasticsearch/ingest/attachment/test/sample-files/text-in-english.txt"; byte[] bytes; diff --git a/plugins/ingest-attachment/src/test/resources/org/elasticsearch/ingest/attachment/test/sample-files/bad_tika.zip b/plugins/ingest-attachment/src/test/resources/org/elasticsearch/ingest/attachment/test/sample-files/bad_tika.zip new file mode 100644 index 00000000000..58ebd8411ed Binary files /dev/null and b/plugins/ingest-attachment/src/test/resources/org/elasticsearch/ingest/attachment/test/sample-files/bad_tika.zip differ