SOLR-7076: TikaEntityProcessor should have support for onError=skip

git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/trunk@1658664 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
Noble Paul 2015-02-10 10:49:11 +00:00
parent ac50da1613
commit 9a77ceee6a
3 changed files with 23 additions and 0 deletions

View File

@ -140,6 +140,10 @@ public class TikaEntityProcessor extends EntityProcessorBase {
} }
tikaParser.parse(is, contentHandler, metadata , context); tikaParser.parse(is, contentHandler, metadata , context);
} catch (Exception e) { } catch (Exception e) {
if(SKIP.equals(onError)) {
throw new DataImportHandlerException(DataImportHandlerException.SKIP_ROW,
"Document skipped :" + e.getMessage());
}
wrapAndThrow(SEVERE, e, "Unable to read content"); wrapAndThrow(SEVERE, e, "Unable to read content");
} }
IOUtils.closeQuietly(is); IOUtils.closeQuietly(is);

View File

@ -49,6 +49,19 @@ public class TestTikaEntityProcessor extends AbstractDataImportHandlerTestCase {
" </document>" + " </document>" +
"</dataConfig>"; "</dataConfig>";
private String skipOnErrConf =
"<dataConfig>" +
" <dataSource type=\"BinFileDataSource\"/>" +
" <document>" +
" <entity name=\"Tika\" onError=\"skip\" processor=\"TikaEntityProcessor\" url=\"" + getFile("dihextras/bad.doc").getAbsolutePath() + "\" >" +
"<field column=\"content\" name=\"text\"/>" +
" </entity>" +
" <entity name=\"Tika\" processor=\"TikaEntityProcessor\" url=\"" + getFile("dihextras/solr-word.pdf").getAbsolutePath() + "\" >" +
" <field column=\"text\"/>" +
"</entity>" +
" </document>" +
"</dataConfig>";
private String[] tests = { private String[] tests = {
"//*[@numFound='1']" "//*[@numFound='1']"
,"//str[@name='author'][.='Grant Ingersoll']" ,"//str[@name='author'][.='Grant Ingersoll']"
@ -85,6 +98,12 @@ public class TestTikaEntityProcessor extends AbstractDataImportHandlerTestCase {
assertQ(req("*:*"), tests ); assertQ(req("*:*"), tests );
} }
@Test
public void testSkip() throws Exception {
runFullImport(skipOnErrConf);
assertQ(req("*:*"), "//*[@numFound='1']");
}
@Test @Test
public void testTikaHTMLMapperEmpty() throws Exception { public void testTikaHTMLMapperEmpty() throws Exception {
runFullImport(getConfigHTML(null)); runFullImport(getConfigHTML(null));