minor attachments cleanups: IDE test support and EPUB format

The documentation says we support EPUB, but the parser is not enabled.
This parser does not require any external dependencies, so I think its ok?

Separately, test-framework drags in an ancient commons-codec (via httpclient), which gradle
"upgrades", but IDEs can't handle this case and just hit jar hell. So just wire that to 1.9,
this allows running tests in the IDE for this plugin.
This commit is contained in:
Robert Muir 2015-11-09 11:30:58 -05:00
parent 013347be9b
commit ac612208b7
5 changed files with 14 additions and 1 deletions

View File

@ -55,4 +55,5 @@ compileJava.options.compilerArgs << '-Xlint:-cast,-deprecation,-rawtypes'
forbiddenPatterns {
exclude '**/*.docx'
exclude '**/*.pdf'
exclude '**/*.epub'
}

View File

@ -67,6 +67,7 @@ final class TikaImpl {
new org.apache.tika.parser.odf.OpenDocumentParser(),
new org.apache.tika.parser.iwork.IWorkPackageParser(),
new org.apache.tika.parser.xml.DcXMLParser(),
new org.apache.tika.parser.epub.EpubParser(),
};
/** autodetector based on this subset */

View File

@ -97,6 +97,14 @@ public class VariousDocTests extends AttachmentUnitTestCase {
testMapper("text-in-english.txt", false);
}
/**
* Test for .epub
*/
public void testEpubDocument() throws Exception {
assertParseable("testEPUB.epub");
testMapper("testEPUB.epub", false);
}
/**
* Test for ASCIIDOC
* Not yet supported by Tika: https://github.com/elasticsearch/elasticsearch-mapper-attachments/issues/29

View File

@ -33,7 +33,10 @@ dependencies {
compile('org.hamcrest:hamcrest-all:1.3') {
exclude group: 'org.hamcrest', module: 'hamcrest-core'
}
compile "org.apache.httpcomponents:httpclient:${versions.httpclient}"
compile("org.apache.httpcomponents:httpclient:${versions.httpclient}") {
exclude group: 'commons-codec', module: 'commons-codec'
}
compile "commons-codec:commons-codec:1.9"
}
compileJava.options.compilerArgs << '-Xlint:-cast,-deprecation,-fallthrough,-overrides,-rawtypes,-serial,-try,-unchecked'