mirror of https://github.com/apache/lucene.git
SOLR-7076: TikaEntityProcessor should have support for onError=skip
git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/trunk@1658664 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
parent
ac50da1613
commit
9a77ceee6a
|
@ -140,6 +140,10 @@ public class TikaEntityProcessor extends EntityProcessorBase {
|
||||||
}
|
}
|
||||||
tikaParser.parse(is, contentHandler, metadata , context);
|
tikaParser.parse(is, contentHandler, metadata , context);
|
||||||
} catch (Exception e) {
|
} catch (Exception e) {
|
||||||
|
if(SKIP.equals(onError)) {
|
||||||
|
throw new DataImportHandlerException(DataImportHandlerException.SKIP_ROW,
|
||||||
|
"Document skipped :" + e.getMessage());
|
||||||
|
}
|
||||||
wrapAndThrow(SEVERE, e, "Unable to read content");
|
wrapAndThrow(SEVERE, e, "Unable to read content");
|
||||||
}
|
}
|
||||||
IOUtils.closeQuietly(is);
|
IOUtils.closeQuietly(is);
|
||||||
|
|
Binary file not shown.
|
@ -49,6 +49,19 @@ public class TestTikaEntityProcessor extends AbstractDataImportHandlerTestCase {
|
||||||
" </document>" +
|
" </document>" +
|
||||||
"</dataConfig>";
|
"</dataConfig>";
|
||||||
|
|
||||||
|
private String skipOnErrConf =
|
||||||
|
"<dataConfig>" +
|
||||||
|
" <dataSource type=\"BinFileDataSource\"/>" +
|
||||||
|
" <document>" +
|
||||||
|
" <entity name=\"Tika\" onError=\"skip\" processor=\"TikaEntityProcessor\" url=\"" + getFile("dihextras/bad.doc").getAbsolutePath() + "\" >" +
|
||||||
|
"<field column=\"content\" name=\"text\"/>" +
|
||||||
|
" </entity>" +
|
||||||
|
" <entity name=\"Tika\" processor=\"TikaEntityProcessor\" url=\"" + getFile("dihextras/solr-word.pdf").getAbsolutePath() + "\" >" +
|
||||||
|
" <field column=\"text\"/>" +
|
||||||
|
"</entity>" +
|
||||||
|
" </document>" +
|
||||||
|
"</dataConfig>";
|
||||||
|
|
||||||
private String[] tests = {
|
private String[] tests = {
|
||||||
"//*[@numFound='1']"
|
"//*[@numFound='1']"
|
||||||
,"//str[@name='author'][.='Grant Ingersoll']"
|
,"//str[@name='author'][.='Grant Ingersoll']"
|
||||||
|
@ -85,6 +98,12 @@ public class TestTikaEntityProcessor extends AbstractDataImportHandlerTestCase {
|
||||||
assertQ(req("*:*"), tests );
|
assertQ(req("*:*"), tests );
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@Test
|
||||||
|
public void testSkip() throws Exception {
|
||||||
|
runFullImport(skipOnErrConf);
|
||||||
|
assertQ(req("*:*"), "//*[@numFound='1']");
|
||||||
|
}
|
||||||
|
|
||||||
@Test
|
@Test
|
||||||
public void testTikaHTMLMapperEmpty() throws Exception {
|
public void testTikaHTMLMapperEmpty() throws Exception {
|
||||||
runFullImport(getConfigHTML(null));
|
runFullImport(getConfigHTML(null));
|
||||||
|
|
Loading…
Reference in New Issue