update ingest-attachment to use Tika 1.17 and newer deps (#27824)

- this pr updates tika and its dependencies
- updates the SHAs
- updates the class excludes
This commit is contained in:
Tal Levy 2017-12-15 13:47:26 -08:00 committed by GitHub
parent 717e2ddf42
commit 43ff38c5da
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
26 changed files with 144 additions and 116 deletions

View File

@ -23,11 +23,11 @@ esplugin {
} }
versions << [ versions << [
'tika': '1.15', 'tika': '1.17',
'pdfbox': '2.0.3', 'pdfbox': '2.0.8',
'bouncycastle': '1.55', 'bouncycastle': '1.55',
'poi': '3.16', 'poi': '3.17',
'mime4j': '0.7.2' 'mime4j': '0.8.1'
] ]
dependencies { dependencies {
@ -35,7 +35,7 @@ dependencies {
compile "org.apache.tika:tika-core:${versions.tika}" compile "org.apache.tika:tika-core:${versions.tika}"
compile "org.apache.tika:tika-parsers:${versions.tika}" compile "org.apache.tika:tika-parsers:${versions.tika}"
compile 'org.tukaani:xz:1.6' compile 'org.tukaani:xz:1.6'
compile 'commons-io:commons-io:2.4' compile 'commons-io:commons-io:2.5'
compile "org.slf4j:slf4j-api:${versions.slf4j}" compile "org.slf4j:slf4j-api:${versions.slf4j}"
// character set detection // character set detection
@ -47,7 +47,7 @@ dependencies {
// Adobe PDF // Adobe PDF
compile "org.apache.pdfbox:pdfbox:${versions.pdfbox}" compile "org.apache.pdfbox:pdfbox:${versions.pdfbox}"
compile "org.apache.pdfbox:fontbox:${versions.pdfbox}" compile "org.apache.pdfbox:fontbox:${versions.pdfbox}"
compile "org.apache.pdfbox:jempbox:1.8.12" compile "org.apache.pdfbox:jempbox:1.8.13"
compile "commons-logging:commons-logging:${versions.commonslogging}" compile "commons-logging:commons-logging:${versions.commonslogging}"
compile "org.bouncycastle:bcmail-jdk15on:${versions.bouncycastle}" compile "org.bouncycastle:bcmail-jdk15on:${versions.bouncycastle}"
compile "org.bouncycastle:bcprov-jdk15on:${versions.bouncycastle}" compile "org.bouncycastle:bcprov-jdk15on:${versions.bouncycastle}"
@ -546,6 +546,7 @@ thirdPartyAudit.excludes = [
'org.apache.http.client.utils.URIBuilder', 'org.apache.http.client.utils.URIBuilder',
'org.apache.http.entity.ByteArrayEntity', 'org.apache.http.entity.ByteArrayEntity',
'org.apache.http.impl.client.DefaultHttpClient', 'org.apache.http.impl.client.DefaultHttpClient',
'org.apache.jcp.xml.dsig.internal.dom.ApacheNodeSetData',
'org.apache.jcp.xml.dsig.internal.dom.DOMDigestMethod', 'org.apache.jcp.xml.dsig.internal.dom.DOMDigestMethod',
'org.apache.jcp.xml.dsig.internal.dom.DOMKeyInfo', 'org.apache.jcp.xml.dsig.internal.dom.DOMKeyInfo',
'org.apache.jcp.xml.dsig.internal.dom.DOMReference', 'org.apache.jcp.xml.dsig.internal.dom.DOMReference',
@ -588,6 +589,7 @@ thirdPartyAudit.excludes = [
'org.apache.uima.util.XmlCasSerializer', 'org.apache.uima.util.XmlCasSerializer',
'org.apache.xml.security.Init', 'org.apache.xml.security.Init',
'org.apache.xml.security.c14n.Canonicalizer', 'org.apache.xml.security.c14n.Canonicalizer',
'org.apache.xml.security.signature.XMLSignatureInput',
'org.apache.xml.security.utils.Base64', 'org.apache.xml.security.utils.Base64',
'org.brotli.dec.BrotliInputStream', 'org.brotli.dec.BrotliInputStream',
'org.etsi.uri.x01903.v13.AnyType', 'org.etsi.uri.x01903.v13.AnyType',
@ -635,11 +637,9 @@ thirdPartyAudit.excludes = [
'org.etsi.uri.x01903.v14.ValidationDataType', 'org.etsi.uri.x01903.v14.ValidationDataType',
'org.json.JSONArray', 'org.json.JSONArray',
'org.json.JSONObject', 'org.json.JSONObject',
'org.json.XML',
'org.json.simple.JSONArray', 'org.json.simple.JSONArray',
'org.json.simple.JSONObject', 'org.json.simple.JSONObject',
'org.json.simple.parser.JSONParser', 'org.json.simple.parser.JSONParser',
'org.junit.Assert',
'org.junit.Test', 'org.junit.Test',
'org.junit.internal.TextListener', 'org.junit.internal.TextListener',
'org.junit.runner.JUnitCore', 'org.junit.runner.JUnitCore',
@ -690,7 +690,6 @@ thirdPartyAudit.excludes = [
'org.openxmlformats.schemas.drawingml.x2006.chart.CTDLbls', 'org.openxmlformats.schemas.drawingml.x2006.chart.CTDLbls',
'org.openxmlformats.schemas.drawingml.x2006.chart.CTDPt', 'org.openxmlformats.schemas.drawingml.x2006.chart.CTDPt',
'org.openxmlformats.schemas.drawingml.x2006.chart.CTDTable', 'org.openxmlformats.schemas.drawingml.x2006.chart.CTDTable',
'org.openxmlformats.schemas.drawingml.x2006.chart.CTDateAx',
'org.openxmlformats.schemas.drawingml.x2006.chart.CTDispBlanksAs', 'org.openxmlformats.schemas.drawingml.x2006.chart.CTDispBlanksAs',
'org.openxmlformats.schemas.drawingml.x2006.chart.CTDispUnits', 'org.openxmlformats.schemas.drawingml.x2006.chart.CTDispUnits',
'org.openxmlformats.schemas.drawingml.x2006.chart.CTDoughnutChart', 'org.openxmlformats.schemas.drawingml.x2006.chart.CTDoughnutChart',
@ -720,6 +719,7 @@ thirdPartyAudit.excludes = [
'org.openxmlformats.schemas.drawingml.x2006.chart.CTSurface3DChart', 'org.openxmlformats.schemas.drawingml.x2006.chart.CTSurface3DChart',
'org.openxmlformats.schemas.drawingml.x2006.chart.CTSurfaceChart', 'org.openxmlformats.schemas.drawingml.x2006.chart.CTSurfaceChart',
'org.openxmlformats.schemas.drawingml.x2006.chart.CTTextLanguageID', 'org.openxmlformats.schemas.drawingml.x2006.chart.CTTextLanguageID',
'org.openxmlformats.schemas.drawingml.x2006.chart.CTTimeUnit',
'org.openxmlformats.schemas.drawingml.x2006.chart.CTTrendline', 'org.openxmlformats.schemas.drawingml.x2006.chart.CTTrendline',
'org.openxmlformats.schemas.drawingml.x2006.chart.CTUpDownBars', 'org.openxmlformats.schemas.drawingml.x2006.chart.CTUpDownBars',
'org.openxmlformats.schemas.drawingml.x2006.chart.CTView3D', 'org.openxmlformats.schemas.drawingml.x2006.chart.CTView3D',
@ -828,8 +828,8 @@ thirdPartyAudit.excludes = [
'org.openxmlformats.schemas.drawingml.x2006.main.STOnOffStyleType$Enum', 'org.openxmlformats.schemas.drawingml.x2006.main.STOnOffStyleType$Enum',
'org.openxmlformats.schemas.drawingml.x2006.main.STPanose', 'org.openxmlformats.schemas.drawingml.x2006.main.STPanose',
'org.openxmlformats.schemas.drawingml.x2006.main.STPathFillMode', 'org.openxmlformats.schemas.drawingml.x2006.main.STPathFillMode',
'org.openxmlformats.schemas.drawingml.x2006.main.STPresetPatternVal',
'org.openxmlformats.schemas.drawingml.x2006.main.STPresetPatternVal$Enum', 'org.openxmlformats.schemas.drawingml.x2006.main.STPresetPatternVal$Enum',
'org.openxmlformats.schemas.drawingml.x2006.main.STPresetPatternVal',
'org.openxmlformats.schemas.drawingml.x2006.main.STRectAlignment', 'org.openxmlformats.schemas.drawingml.x2006.main.STRectAlignment',
'org.openxmlformats.schemas.drawingml.x2006.main.STTextColumnCount', 'org.openxmlformats.schemas.drawingml.x2006.main.STTextColumnCount',
'org.openxmlformats.schemas.drawingml.x2006.main.STTextNonNegativePoint', 'org.openxmlformats.schemas.drawingml.x2006.main.STTextNonNegativePoint',
@ -937,34 +937,6 @@ thirdPartyAudit.excludes = [
'org.openxmlformats.schemas.drawingml.x2006.main.impl.CTPresetColorImpl$1SatOffList', 'org.openxmlformats.schemas.drawingml.x2006.main.impl.CTPresetColorImpl$1SatOffList',
'org.openxmlformats.schemas.drawingml.x2006.main.impl.CTPresetColorImpl$1ShadeList', 'org.openxmlformats.schemas.drawingml.x2006.main.impl.CTPresetColorImpl$1ShadeList',
'org.openxmlformats.schemas.drawingml.x2006.main.impl.CTPresetColorImpl$1TintList', 'org.openxmlformats.schemas.drawingml.x2006.main.impl.CTPresetColorImpl$1TintList',
'org.openxmlformats.schemas.drawingml.x2006.main.impl.CTScRgbColorImpl$1AlphaList',
'org.openxmlformats.schemas.drawingml.x2006.main.impl.CTScRgbColorImpl$1AlphaModList',
'org.openxmlformats.schemas.drawingml.x2006.main.impl.CTScRgbColorImpl$1AlphaOffList',
'org.openxmlformats.schemas.drawingml.x2006.main.impl.CTScRgbColorImpl$1BlueList',
'org.openxmlformats.schemas.drawingml.x2006.main.impl.CTScRgbColorImpl$1BlueModList',
'org.openxmlformats.schemas.drawingml.x2006.main.impl.CTScRgbColorImpl$1BlueOffList',
'org.openxmlformats.schemas.drawingml.x2006.main.impl.CTScRgbColorImpl$1CompList',
'org.openxmlformats.schemas.drawingml.x2006.main.impl.CTScRgbColorImpl$1GammaList',
'org.openxmlformats.schemas.drawingml.x2006.main.impl.CTScRgbColorImpl$1GrayList',
'org.openxmlformats.schemas.drawingml.x2006.main.impl.CTScRgbColorImpl$1GreenList',
'org.openxmlformats.schemas.drawingml.x2006.main.impl.CTScRgbColorImpl$1GreenModList',
'org.openxmlformats.schemas.drawingml.x2006.main.impl.CTScRgbColorImpl$1GreenOffList',
'org.openxmlformats.schemas.drawingml.x2006.main.impl.CTScRgbColorImpl$1HueList',
'org.openxmlformats.schemas.drawingml.x2006.main.impl.CTScRgbColorImpl$1HueModList',
'org.openxmlformats.schemas.drawingml.x2006.main.impl.CTScRgbColorImpl$1HueOffList',
'org.openxmlformats.schemas.drawingml.x2006.main.impl.CTScRgbColorImpl$1InvGammaList',
'org.openxmlformats.schemas.drawingml.x2006.main.impl.CTScRgbColorImpl$1InvList',
'org.openxmlformats.schemas.drawingml.x2006.main.impl.CTScRgbColorImpl$1LumList',
'org.openxmlformats.schemas.drawingml.x2006.main.impl.CTScRgbColorImpl$1LumModList',
'org.openxmlformats.schemas.drawingml.x2006.main.impl.CTScRgbColorImpl$1LumOffList',
'org.openxmlformats.schemas.drawingml.x2006.main.impl.CTScRgbColorImpl$1RedList',
'org.openxmlformats.schemas.drawingml.x2006.main.impl.CTScRgbColorImpl$1RedModList',
'org.openxmlformats.schemas.drawingml.x2006.main.impl.CTScRgbColorImpl$1RedOffList',
'org.openxmlformats.schemas.drawingml.x2006.main.impl.CTScRgbColorImpl$1SatList',
'org.openxmlformats.schemas.drawingml.x2006.main.impl.CTScRgbColorImpl$1SatModList',
'org.openxmlformats.schemas.drawingml.x2006.main.impl.CTScRgbColorImpl$1SatOffList',
'org.openxmlformats.schemas.drawingml.x2006.main.impl.CTScRgbColorImpl$1ShadeList',
'org.openxmlformats.schemas.drawingml.x2006.main.impl.CTScRgbColorImpl$1TintList',
'org.openxmlformats.schemas.drawingml.x2006.main.impl.CTSRgbColorImpl$1AlphaList', 'org.openxmlformats.schemas.drawingml.x2006.main.impl.CTSRgbColorImpl$1AlphaList',
'org.openxmlformats.schemas.drawingml.x2006.main.impl.CTSRgbColorImpl$1AlphaModList', 'org.openxmlformats.schemas.drawingml.x2006.main.impl.CTSRgbColorImpl$1AlphaModList',
'org.openxmlformats.schemas.drawingml.x2006.main.impl.CTSRgbColorImpl$1AlphaOffList', 'org.openxmlformats.schemas.drawingml.x2006.main.impl.CTSRgbColorImpl$1AlphaOffList',
@ -993,6 +965,34 @@ thirdPartyAudit.excludes = [
'org.openxmlformats.schemas.drawingml.x2006.main.impl.CTSRgbColorImpl$1SatOffList', 'org.openxmlformats.schemas.drawingml.x2006.main.impl.CTSRgbColorImpl$1SatOffList',
'org.openxmlformats.schemas.drawingml.x2006.main.impl.CTSRgbColorImpl$1ShadeList', 'org.openxmlformats.schemas.drawingml.x2006.main.impl.CTSRgbColorImpl$1ShadeList',
'org.openxmlformats.schemas.drawingml.x2006.main.impl.CTSRgbColorImpl$1TintList', 'org.openxmlformats.schemas.drawingml.x2006.main.impl.CTSRgbColorImpl$1TintList',
'org.openxmlformats.schemas.drawingml.x2006.main.impl.CTScRgbColorImpl$1AlphaList',
'org.openxmlformats.schemas.drawingml.x2006.main.impl.CTScRgbColorImpl$1AlphaModList',
'org.openxmlformats.schemas.drawingml.x2006.main.impl.CTScRgbColorImpl$1AlphaOffList',
'org.openxmlformats.schemas.drawingml.x2006.main.impl.CTScRgbColorImpl$1BlueList',
'org.openxmlformats.schemas.drawingml.x2006.main.impl.CTScRgbColorImpl$1BlueModList',
'org.openxmlformats.schemas.drawingml.x2006.main.impl.CTScRgbColorImpl$1BlueOffList',
'org.openxmlformats.schemas.drawingml.x2006.main.impl.CTScRgbColorImpl$1CompList',
'org.openxmlformats.schemas.drawingml.x2006.main.impl.CTScRgbColorImpl$1GammaList',
'org.openxmlformats.schemas.drawingml.x2006.main.impl.CTScRgbColorImpl$1GrayList',
'org.openxmlformats.schemas.drawingml.x2006.main.impl.CTScRgbColorImpl$1GreenList',
'org.openxmlformats.schemas.drawingml.x2006.main.impl.CTScRgbColorImpl$1GreenModList',
'org.openxmlformats.schemas.drawingml.x2006.main.impl.CTScRgbColorImpl$1GreenOffList',
'org.openxmlformats.schemas.drawingml.x2006.main.impl.CTScRgbColorImpl$1HueList',
'org.openxmlformats.schemas.drawingml.x2006.main.impl.CTScRgbColorImpl$1HueModList',
'org.openxmlformats.schemas.drawingml.x2006.main.impl.CTScRgbColorImpl$1HueOffList',
'org.openxmlformats.schemas.drawingml.x2006.main.impl.CTScRgbColorImpl$1InvGammaList',
'org.openxmlformats.schemas.drawingml.x2006.main.impl.CTScRgbColorImpl$1InvList',
'org.openxmlformats.schemas.drawingml.x2006.main.impl.CTScRgbColorImpl$1LumList',
'org.openxmlformats.schemas.drawingml.x2006.main.impl.CTScRgbColorImpl$1LumModList',
'org.openxmlformats.schemas.drawingml.x2006.main.impl.CTScRgbColorImpl$1LumOffList',
'org.openxmlformats.schemas.drawingml.x2006.main.impl.CTScRgbColorImpl$1RedList',
'org.openxmlformats.schemas.drawingml.x2006.main.impl.CTScRgbColorImpl$1RedModList',
'org.openxmlformats.schemas.drawingml.x2006.main.impl.CTScRgbColorImpl$1RedOffList',
'org.openxmlformats.schemas.drawingml.x2006.main.impl.CTScRgbColorImpl$1SatList',
'org.openxmlformats.schemas.drawingml.x2006.main.impl.CTScRgbColorImpl$1SatModList',
'org.openxmlformats.schemas.drawingml.x2006.main.impl.CTScRgbColorImpl$1SatOffList',
'org.openxmlformats.schemas.drawingml.x2006.main.impl.CTScRgbColorImpl$1ShadeList',
'org.openxmlformats.schemas.drawingml.x2006.main.impl.CTScRgbColorImpl$1TintList',
'org.openxmlformats.schemas.drawingml.x2006.main.impl.CTSchemeColorImpl$1AlphaList', 'org.openxmlformats.schemas.drawingml.x2006.main.impl.CTSchemeColorImpl$1AlphaList',
'org.openxmlformats.schemas.drawingml.x2006.main.impl.CTSchemeColorImpl$1AlphaModList', 'org.openxmlformats.schemas.drawingml.x2006.main.impl.CTSchemeColorImpl$1AlphaModList',
'org.openxmlformats.schemas.drawingml.x2006.main.impl.CTSchemeColorImpl$1AlphaOffList', 'org.openxmlformats.schemas.drawingml.x2006.main.impl.CTSchemeColorImpl$1AlphaOffList',
@ -1058,7 +1058,6 @@ thirdPartyAudit.excludes = [
'org.openxmlformats.schemas.drawingml.x2006.main.impl.CTTextParagraphImpl$1FldList', 'org.openxmlformats.schemas.drawingml.x2006.main.impl.CTTextParagraphImpl$1FldList',
'org.openxmlformats.schemas.drawingml.x2006.main.impl.CTTextParagraphImpl$1RList', 'org.openxmlformats.schemas.drawingml.x2006.main.impl.CTTextParagraphImpl$1RList',
'org.openxmlformats.schemas.drawingml.x2006.main.impl.CTTextTabStopListImpl$1TabList', 'org.openxmlformats.schemas.drawingml.x2006.main.impl.CTTextTabStopListImpl$1TabList',
'org.openxmlformats.schemas.drawingml.x2006.spreadsheetDrawing.CTAbsoluteAnchor',
'org.openxmlformats.schemas.drawingml.x2006.spreadsheetDrawing.impl.CTDrawingImpl$1AbsoluteAnchorList', 'org.openxmlformats.schemas.drawingml.x2006.spreadsheetDrawing.impl.CTDrawingImpl$1AbsoluteAnchorList',
'org.openxmlformats.schemas.drawingml.x2006.spreadsheetDrawing.impl.CTDrawingImpl$1OneCellAnchorList', 'org.openxmlformats.schemas.drawingml.x2006.spreadsheetDrawing.impl.CTDrawingImpl$1OneCellAnchorList',
'org.openxmlformats.schemas.drawingml.x2006.spreadsheetDrawing.impl.CTDrawingImpl$1TwoCellAnchorList', 'org.openxmlformats.schemas.drawingml.x2006.spreadsheetDrawing.impl.CTDrawingImpl$1TwoCellAnchorList',
@ -1183,7 +1182,6 @@ thirdPartyAudit.excludes = [
'org.openxmlformats.schemas.spreadsheetml.x2006.main.CTChartsheetViews', 'org.openxmlformats.schemas.spreadsheetml.x2006.main.CTChartsheetViews',
'org.openxmlformats.schemas.spreadsheetml.x2006.main.CTColHierarchiesUsage', 'org.openxmlformats.schemas.spreadsheetml.x2006.main.CTColHierarchiesUsage',
'org.openxmlformats.schemas.spreadsheetml.x2006.main.CTColItems', 'org.openxmlformats.schemas.spreadsheetml.x2006.main.CTColItems',
'org.openxmlformats.schemas.spreadsheetml.x2006.main.CTColors',
'org.openxmlformats.schemas.spreadsheetml.x2006.main.CTConditionalFormats', 'org.openxmlformats.schemas.spreadsheetml.x2006.main.CTConditionalFormats',
'org.openxmlformats.schemas.spreadsheetml.x2006.main.CTConsolidation', 'org.openxmlformats.schemas.spreadsheetml.x2006.main.CTConsolidation',
'org.openxmlformats.schemas.spreadsheetml.x2006.main.CTControls', 'org.openxmlformats.schemas.spreadsheetml.x2006.main.CTControls',
@ -1208,6 +1206,7 @@ thirdPartyAudit.excludes = [
'org.openxmlformats.schemas.spreadsheetml.x2006.main.CTFormats', 'org.openxmlformats.schemas.spreadsheetml.x2006.main.CTFormats',
'org.openxmlformats.schemas.spreadsheetml.x2006.main.CTFunctionGroups', 'org.openxmlformats.schemas.spreadsheetml.x2006.main.CTFunctionGroups',
'org.openxmlformats.schemas.spreadsheetml.x2006.main.CTGradientFill', 'org.openxmlformats.schemas.spreadsheetml.x2006.main.CTGradientFill',
'org.openxmlformats.schemas.spreadsheetml.x2006.main.CTMRUColors',
'org.openxmlformats.schemas.spreadsheetml.x2006.main.CTMeasureDimensionMaps', 'org.openxmlformats.schemas.spreadsheetml.x2006.main.CTMeasureDimensionMaps',
'org.openxmlformats.schemas.spreadsheetml.x2006.main.CTMeasureGroups', 'org.openxmlformats.schemas.spreadsheetml.x2006.main.CTMeasureGroups',
'org.openxmlformats.schemas.spreadsheetml.x2006.main.CTMissing', 'org.openxmlformats.schemas.spreadsheetml.x2006.main.CTMissing',
@ -1231,7 +1230,6 @@ thirdPartyAudit.excludes = [
'org.openxmlformats.schemas.spreadsheetml.x2006.main.CTSortState', 'org.openxmlformats.schemas.spreadsheetml.x2006.main.CTSortState',
'org.openxmlformats.schemas.spreadsheetml.x2006.main.CTString', 'org.openxmlformats.schemas.spreadsheetml.x2006.main.CTString',
'org.openxmlformats.schemas.spreadsheetml.x2006.main.CTTableFormula', 'org.openxmlformats.schemas.spreadsheetml.x2006.main.CTTableFormula',
'org.openxmlformats.schemas.spreadsheetml.x2006.main.CTTableStyles',
'org.openxmlformats.schemas.spreadsheetml.x2006.main.CTTupleCache', 'org.openxmlformats.schemas.spreadsheetml.x2006.main.CTTupleCache',
'org.openxmlformats.schemas.spreadsheetml.x2006.main.CTWebPublishItems', 'org.openxmlformats.schemas.spreadsheetml.x2006.main.CTWebPublishItems',
'org.openxmlformats.schemas.spreadsheetml.x2006.main.CTWebPublishObjects', 'org.openxmlformats.schemas.spreadsheetml.x2006.main.CTWebPublishObjects',
@ -1333,14 +1331,14 @@ thirdPartyAudit.excludes = [
'org.openxmlformats.schemas.spreadsheetml.x2006.main.impl.CTSharedItemsImpl$1NList', 'org.openxmlformats.schemas.spreadsheetml.x2006.main.impl.CTSharedItemsImpl$1NList',
'org.openxmlformats.schemas.spreadsheetml.x2006.main.impl.CTSharedItemsImpl$1SList', 'org.openxmlformats.schemas.spreadsheetml.x2006.main.impl.CTSharedItemsImpl$1SList',
'org.openxmlformats.schemas.spreadsheetml.x2006.main.impl.CTSheetDataImpl$1RowList', 'org.openxmlformats.schemas.spreadsheetml.x2006.main.impl.CTSheetDataImpl$1RowList',
'org.openxmlformats.schemas.spreadsheetml.x2006.main.impl.CTSheetsImpl$1SheetList',
'org.openxmlformats.schemas.spreadsheetml.x2006.main.impl.CTSheetViewImpl$1PivotSelectionList', 'org.openxmlformats.schemas.spreadsheetml.x2006.main.impl.CTSheetViewImpl$1PivotSelectionList',
'org.openxmlformats.schemas.spreadsheetml.x2006.main.impl.CTSheetViewImpl$1SelectionList', 'org.openxmlformats.schemas.spreadsheetml.x2006.main.impl.CTSheetViewImpl$1SelectionList',
'org.openxmlformats.schemas.spreadsheetml.x2006.main.impl.CTSheetViewsImpl$1SheetViewList', 'org.openxmlformats.schemas.spreadsheetml.x2006.main.impl.CTSheetViewsImpl$1SheetViewList',
'org.openxmlformats.schemas.spreadsheetml.x2006.main.impl.CTSheetsImpl$1SheetList',
'org.openxmlformats.schemas.spreadsheetml.x2006.main.impl.CTSingleXmlCellsImpl$1SingleXmlCellList', 'org.openxmlformats.schemas.spreadsheetml.x2006.main.impl.CTSingleXmlCellsImpl$1SingleXmlCellList',
'org.openxmlformats.schemas.spreadsheetml.x2006.main.impl.CTSstImpl$1SiList', 'org.openxmlformats.schemas.spreadsheetml.x2006.main.impl.CTSstImpl$1SiList',
'org.openxmlformats.schemas.spreadsheetml.x2006.main.impl.CTTableColumnsImpl$1TableColumnList',
'org.openxmlformats.schemas.spreadsheetml.x2006.main.impl.CTTablePartsImpl$1TablePartList', 'org.openxmlformats.schemas.spreadsheetml.x2006.main.impl.CTTablePartsImpl$1TablePartList',
'org.openxmlformats.schemas.spreadsheetml.x2006.main.impl.CTTableStylesImpl$1TableStyleList',
'org.openxmlformats.schemas.spreadsheetml.x2006.main.impl.CTWorkbookImpl$1FileRecoveryPrList', 'org.openxmlformats.schemas.spreadsheetml.x2006.main.impl.CTWorkbookImpl$1FileRecoveryPrList',
'org.openxmlformats.schemas.spreadsheetml.x2006.main.impl.CTWorksheetImpl$1ColsList', 'org.openxmlformats.schemas.spreadsheetml.x2006.main.impl.CTWorksheetImpl$1ColsList',
'org.openxmlformats.schemas.spreadsheetml.x2006.main.impl.CTWorksheetImpl$1ConditionalFormattingList', 'org.openxmlformats.schemas.spreadsheetml.x2006.main.impl.CTWorksheetImpl$1ConditionalFormattingList',
@ -1400,7 +1398,6 @@ thirdPartyAudit.excludes = [
'org.openxmlformats.schemas.wordprocessingml.x2006.main.CTProof', 'org.openxmlformats.schemas.wordprocessingml.x2006.main.CTProof',
'org.openxmlformats.schemas.wordprocessingml.x2006.main.CTRPrChange', 'org.openxmlformats.schemas.wordprocessingml.x2006.main.CTRPrChange',
'org.openxmlformats.schemas.wordprocessingml.x2006.main.CTReadingModeInkLockDown', 'org.openxmlformats.schemas.wordprocessingml.x2006.main.CTReadingModeInkLockDown',
'org.openxmlformats.schemas.wordprocessingml.x2006.main.CTRuby',
'org.openxmlformats.schemas.wordprocessingml.x2006.main.CTSaveThroughXslt', 'org.openxmlformats.schemas.wordprocessingml.x2006.main.CTSaveThroughXslt',
'org.openxmlformats.schemas.wordprocessingml.x2006.main.CTSdtComboBox', 'org.openxmlformats.schemas.wordprocessingml.x2006.main.CTSdtComboBox',
'org.openxmlformats.schemas.wordprocessingml.x2006.main.CTSdtDate', 'org.openxmlformats.schemas.wordprocessingml.x2006.main.CTSdtDate',
@ -1441,6 +1438,7 @@ thirdPartyAudit.excludes = [
'org.openxmlformats.schemas.wordprocessingml.x2006.main.STPTabLeader', 'org.openxmlformats.schemas.wordprocessingml.x2006.main.STPTabLeader',
'org.openxmlformats.schemas.wordprocessingml.x2006.main.STPTabRelativeTo', 'org.openxmlformats.schemas.wordprocessingml.x2006.main.STPTabRelativeTo',
'org.openxmlformats.schemas.wordprocessingml.x2006.main.STProofErr', 'org.openxmlformats.schemas.wordprocessingml.x2006.main.STProofErr',
'org.openxmlformats.schemas.wordprocessingml.x2006.main.STRubyAlign',
'org.openxmlformats.schemas.wordprocessingml.x2006.main.STShortHexNumber', 'org.openxmlformats.schemas.wordprocessingml.x2006.main.STShortHexNumber',
'org.openxmlformats.schemas.wordprocessingml.x2006.main.STThemeColor', 'org.openxmlformats.schemas.wordprocessingml.x2006.main.STThemeColor',
'org.openxmlformats.schemas.wordprocessingml.x2006.main.STUcharHexNumber', 'org.openxmlformats.schemas.wordprocessingml.x2006.main.STUcharHexNumber',
@ -1708,6 +1706,32 @@ thirdPartyAudit.excludes = [
'org.openxmlformats.schemas.wordprocessingml.x2006.main.impl.CTRowImpl$1ProofErrList', 'org.openxmlformats.schemas.wordprocessingml.x2006.main.impl.CTRowImpl$1ProofErrList',
'org.openxmlformats.schemas.wordprocessingml.x2006.main.impl.CTRowImpl$1SdtList', 'org.openxmlformats.schemas.wordprocessingml.x2006.main.impl.CTRowImpl$1SdtList',
'org.openxmlformats.schemas.wordprocessingml.x2006.main.impl.CTRowImpl$1TcList', 'org.openxmlformats.schemas.wordprocessingml.x2006.main.impl.CTRowImpl$1TcList',
'org.openxmlformats.schemas.wordprocessingml.x2006.main.impl.CTRubyContentImpl$1BookmarkEndList',
'org.openxmlformats.schemas.wordprocessingml.x2006.main.impl.CTRubyContentImpl$1BookmarkStartList',
'org.openxmlformats.schemas.wordprocessingml.x2006.main.impl.CTRubyContentImpl$1CommentRangeEndList',
'org.openxmlformats.schemas.wordprocessingml.x2006.main.impl.CTRubyContentImpl$1CommentRangeStartList',
'org.openxmlformats.schemas.wordprocessingml.x2006.main.impl.CTRubyContentImpl$1CustomXmlDelRangeEndList',
'org.openxmlformats.schemas.wordprocessingml.x2006.main.impl.CTRubyContentImpl$1CustomXmlDelRangeStartList',
'org.openxmlformats.schemas.wordprocessingml.x2006.main.impl.CTRubyContentImpl$1CustomXmlInsRangeEndList',
'org.openxmlformats.schemas.wordprocessingml.x2006.main.impl.CTRubyContentImpl$1CustomXmlInsRangeStartList',
'org.openxmlformats.schemas.wordprocessingml.x2006.main.impl.CTRubyContentImpl$1CustomXmlMoveFromRangeEndList',
'org.openxmlformats.schemas.wordprocessingml.x2006.main.impl.CTRubyContentImpl$1CustomXmlMoveFromRangeStartList',
'org.openxmlformats.schemas.wordprocessingml.x2006.main.impl.CTRubyContentImpl$1CustomXmlMoveToRangeEndList',
'org.openxmlformats.schemas.wordprocessingml.x2006.main.impl.CTRubyContentImpl$1CustomXmlMoveToRangeStartList',
'org.openxmlformats.schemas.wordprocessingml.x2006.main.impl.CTRubyContentImpl$1DelList',
'org.openxmlformats.schemas.wordprocessingml.x2006.main.impl.CTRubyContentImpl$1InsList',
'org.openxmlformats.schemas.wordprocessingml.x2006.main.impl.CTRubyContentImpl$1MoveFromList',
'org.openxmlformats.schemas.wordprocessingml.x2006.main.impl.CTRubyContentImpl$1MoveFromRangeEndList',
'org.openxmlformats.schemas.wordprocessingml.x2006.main.impl.CTRubyContentImpl$1MoveFromRangeStartList',
'org.openxmlformats.schemas.wordprocessingml.x2006.main.impl.CTRubyContentImpl$1MoveToList',
'org.openxmlformats.schemas.wordprocessingml.x2006.main.impl.CTRubyContentImpl$1MoveToRangeEndList',
'org.openxmlformats.schemas.wordprocessingml.x2006.main.impl.CTRubyContentImpl$1MoveToRangeStartList',
'org.openxmlformats.schemas.wordprocessingml.x2006.main.impl.CTRubyContentImpl$1OMathList',
'org.openxmlformats.schemas.wordprocessingml.x2006.main.impl.CTRubyContentImpl$1OMathParaList',
'org.openxmlformats.schemas.wordprocessingml.x2006.main.impl.CTRubyContentImpl$1PermEndList',
'org.openxmlformats.schemas.wordprocessingml.x2006.main.impl.CTRubyContentImpl$1PermStartList',
'org.openxmlformats.schemas.wordprocessingml.x2006.main.impl.CTRubyContentImpl$1ProofErrList',
'org.openxmlformats.schemas.wordprocessingml.x2006.main.impl.CTRubyContentImpl$1RList',
'org.openxmlformats.schemas.wordprocessingml.x2006.main.impl.CTRunTrackChangeImpl$1AccList', 'org.openxmlformats.schemas.wordprocessingml.x2006.main.impl.CTRunTrackChangeImpl$1AccList',
'org.openxmlformats.schemas.wordprocessingml.x2006.main.impl.CTRunTrackChangeImpl$1BarList', 'org.openxmlformats.schemas.wordprocessingml.x2006.main.impl.CTRunTrackChangeImpl$1BarList',
'org.openxmlformats.schemas.wordprocessingml.x2006.main.impl.CTRunTrackChangeImpl$1BookmarkEndList', 'org.openxmlformats.schemas.wordprocessingml.x2006.main.impl.CTRunTrackChangeImpl$1BookmarkEndList',
@ -2054,7 +2078,6 @@ thirdPartyAudit.excludes = [
'org.sqlite.SQLiteConfig', 'org.sqlite.SQLiteConfig',
'org.w3.x2000.x09.xmldsig.KeyInfoType', 'org.w3.x2000.x09.xmldsig.KeyInfoType',
'org.w3.x2000.x09.xmldsig.SignatureMethodType', 'org.w3.x2000.x09.xmldsig.SignatureMethodType',
'org.w3.x2000.x09.xmldsig.SignatureValueType',
'org.w3.x2000.x09.xmldsig.TransformsType', 'org.w3.x2000.x09.xmldsig.TransformsType',
'org.w3.x2000.x09.xmldsig.impl.SignatureTypeImpl$1ObjectList', 'org.w3.x2000.x09.xmldsig.impl.SignatureTypeImpl$1ObjectList',
'org.w3.x2000.x09.xmldsig.impl.SignedInfoTypeImpl$1ReferenceList', 'org.w3.x2000.x09.xmldsig.impl.SignedInfoTypeImpl$1ReferenceList',

View File

@ -1 +0,0 @@
a81264fe0265ebe8fd1d8128aad06dc320de6eef

View File

@ -0,0 +1 @@
c62dfe18a3b827a2c626ade0ffba44562ddf3f61

View File

@ -1 +0,0 @@
1c289aa264548a0a1f1b43685a9cb2ab23f67287

View File

@ -0,0 +1 @@
f2d653c617004193f3350330d907f77b60c88c56

View File

@ -1 +0,0 @@
b1b6ea3b7e4aa4f492509a4952029cd8e48019ad

View File

@ -0,0 +1 @@
2852e6e05fbb95076fc091f6d1780f1f8fe35e0f

View File

@ -1 +0,0 @@
448ee588d0136121cf5c4dd397384cccb9db1ad7

View File

@ -0,0 +1 @@
52f852fcfc7481d45efdffd224eb78b85981b17b

View File

@ -1 +0,0 @@
426450c573c19f6f2c751a7a52c11931b712c9f6

View File

@ -0,0 +1 @@
a874cef0ed0e2a8c4cc5ed52c23ba3e6d78eca4e

View File

@ -1 +0,0 @@
be7b09de93f7c7795c57f4fbf14db60ab93806b4

View File

@ -0,0 +1 @@
17bdf273d66f3afe41eedb9d3ab6a7b819c44a0c

View File

@ -1 +0,0 @@
ad21c123ee5d6b5b2a8f0d4ed23b3ffe6759a889

View File

@ -0,0 +1 @@
0ae92292a2043888b40d418da97dc0b669fde326

View File

@ -1 +0,0 @@
76e20fe22404cc4da55ddfdaaaadee32bbfa3bdd

View File

@ -0,0 +1 @@
07d8c44407178b73246462842bf1e206e99c8e0a

View File

@ -1 +0,0 @@
9828a49307fc6bebfd42185b677d88b6e4994c63

View File

@ -0,0 +1 @@
890114bfa82f5b6380ea0e9b0bf49b0af797b414

View File

@ -1 +0,0 @@
69d6dda524e38a491b362d0f94ef74a514faf70a

View File

@ -0,0 +1 @@
85d86a0e26c7f5c0db4ee63e8c7728e51c5d64ce

View File

@ -1 +0,0 @@
17850c2224e4e3867e588060dc8ce6ba3bcfab2a

View File

@ -0,0 +1 @@
b450102c2aee98107474d2f92661d947b9cef183

View File

@ -1 +0,0 @@
aa07c2cda051709e5fe70fd6e244386fc93b0a1e

View File

@ -0,0 +1 @@
4277c54fcaed542fbc8a0001fdb4c23baccc0132

View File

@ -19,6 +19,7 @@
package org.elasticsearch.ingest.attachment; package org.elasticsearch.ingest.attachment;
import org.apache.tika.exception.ZeroByteFileException;
import org.apache.tika.language.LanguageIdentifier; import org.apache.tika.language.LanguageIdentifier;
import org.apache.tika.metadata.Metadata; import org.apache.tika.metadata.Metadata;
import org.apache.tika.metadata.TikaCoreProperties; import org.apache.tika.metadata.TikaCoreProperties;
@ -81,70 +82,74 @@ public final class AttachmentProcessor extends AbstractProcessor {
throw new IllegalArgumentException("field [" + field + "] is null, cannot parse."); throw new IllegalArgumentException("field [" + field + "] is null, cannot parse.");
} }
Metadata metadata = new Metadata();
String parsedContent = "";
try { try {
Metadata metadata = new Metadata(); parsedContent = TikaImpl.parse(input, metadata, indexedChars);
String parsedContent = TikaImpl.parse(input, metadata, indexedChars); } catch (ZeroByteFileException e) {
// tika 1.17 throws an exception when the InputStream has 0 bytes.
if (properties.contains(Property.CONTENT) && Strings.hasLength(parsedContent)) { // previously, it did not mind. This is here to preserve that behavior.
// somehow tika seems to append a newline at the end automatically, lets remove that again
additionalFields.put(Property.CONTENT.toLowerCase(), parsedContent.trim());
}
if (properties.contains(Property.LANGUAGE) && Strings.hasLength(parsedContent)) {
LanguageIdentifier identifier = new LanguageIdentifier(parsedContent);
String language = identifier.getLanguage();
additionalFields.put(Property.LANGUAGE.toLowerCase(), language);
}
if (properties.contains(Property.DATE)) {
String createdDate = metadata.get(TikaCoreProperties.CREATED);
if (createdDate != null) {
additionalFields.put(Property.DATE.toLowerCase(), createdDate);
}
}
if (properties.contains(Property.TITLE)) {
String title = metadata.get(TikaCoreProperties.TITLE);
if (Strings.hasLength(title)) {
additionalFields.put(Property.TITLE.toLowerCase(), title);
}
}
if (properties.contains(Property.AUTHOR)) {
String author = metadata.get("Author");
if (Strings.hasLength(author)) {
additionalFields.put(Property.AUTHOR.toLowerCase(), author);
}
}
if (properties.contains(Property.KEYWORDS)) {
String keywords = metadata.get("Keywords");
if (Strings.hasLength(keywords)) {
additionalFields.put(Property.KEYWORDS.toLowerCase(), keywords);
}
}
if (properties.contains(Property.CONTENT_TYPE)) {
String contentType = metadata.get(Metadata.CONTENT_TYPE);
if (Strings.hasLength(contentType)) {
additionalFields.put(Property.CONTENT_TYPE.toLowerCase(), contentType);
}
}
if (properties.contains(Property.CONTENT_LENGTH)) {
String contentLength = metadata.get(Metadata.CONTENT_LENGTH);
long length;
if (Strings.hasLength(contentLength)) {
length = Long.parseLong(contentLength);
} else {
length = parsedContent.length();
}
additionalFields.put(Property.CONTENT_LENGTH.toLowerCase(), length);
}
} catch (Exception e) { } catch (Exception e) {
throw new ElasticsearchParseException("Error parsing document in field [{}]", e, field); throw new ElasticsearchParseException("Error parsing document in field [{}]", e, field);
} }
if (properties.contains(Property.CONTENT) && Strings.hasLength(parsedContent)) {
// somehow tika seems to append a newline at the end automatically, lets remove that again
additionalFields.put(Property.CONTENT.toLowerCase(), parsedContent.trim());
}
if (properties.contains(Property.LANGUAGE) && Strings.hasLength(parsedContent)) {
LanguageIdentifier identifier = new LanguageIdentifier(parsedContent);
String language = identifier.getLanguage();
additionalFields.put(Property.LANGUAGE.toLowerCase(), language);
}
if (properties.contains(Property.DATE)) {
String createdDate = metadata.get(TikaCoreProperties.CREATED);
if (createdDate != null) {
additionalFields.put(Property.DATE.toLowerCase(), createdDate);
}
}
if (properties.contains(Property.TITLE)) {
String title = metadata.get(TikaCoreProperties.TITLE);
if (Strings.hasLength(title)) {
additionalFields.put(Property.TITLE.toLowerCase(), title);
}
}
if (properties.contains(Property.AUTHOR)) {
String author = metadata.get("Author");
if (Strings.hasLength(author)) {
additionalFields.put(Property.AUTHOR.toLowerCase(), author);
}
}
if (properties.contains(Property.KEYWORDS)) {
String keywords = metadata.get("Keywords");
if (Strings.hasLength(keywords)) {
additionalFields.put(Property.KEYWORDS.toLowerCase(), keywords);
}
}
if (properties.contains(Property.CONTENT_TYPE)) {
String contentType = metadata.get(Metadata.CONTENT_TYPE);
if (Strings.hasLength(contentType)) {
additionalFields.put(Property.CONTENT_TYPE.toLowerCase(), contentType);
}
}
if (properties.contains(Property.CONTENT_LENGTH)) {
String contentLength = metadata.get(Metadata.CONTENT_LENGTH);
long length;
if (Strings.hasLength(contentLength)) {
length = Long.parseLong(contentLength);
} else {
length = parsedContent.length();
}
additionalFields.put(Property.CONTENT_LENGTH.toLowerCase(), length);
}
ingestDocument.setFieldValue(targetField, additionalFields); ingestDocument.setFieldValue(targetField, additionalFields);
} }