diff --git a/solr/contrib/dataimporthandler-extras/src/java/org/apache/solr/handler/dataimport/TikaEntityProcessor.java b/solr/contrib/dataimporthandler-extras/src/java/org/apache/solr/handler/dataimport/TikaEntityProcessor.java index 6c15f448d50..1636f0aaf05 100644 --- a/solr/contrib/dataimporthandler-extras/src/java/org/apache/solr/handler/dataimport/TikaEntityProcessor.java +++ b/solr/contrib/dataimporthandler-extras/src/java/org/apache/solr/handler/dataimport/TikaEntityProcessor.java @@ -140,6 +140,10 @@ public class TikaEntityProcessor extends EntityProcessorBase { } tikaParser.parse(is, contentHandler, metadata , context); } catch (Exception e) { + if(SKIP.equals(onError)) { + throw new DataImportHandlerException(DataImportHandlerException.SKIP_ROW, + "Document skipped :" + e.getMessage()); + } wrapAndThrow(SEVERE, e, "Unable to read content"); } IOUtils.closeQuietly(is); diff --git a/solr/contrib/dataimporthandler-extras/src/test-files/dihextras/bad.doc b/solr/contrib/dataimporthandler-extras/src/test-files/dihextras/bad.doc new file mode 100644 index 00000000000..5944c24b2cf Binary files /dev/null and b/solr/contrib/dataimporthandler-extras/src/test-files/dihextras/bad.doc differ diff --git a/solr/contrib/dataimporthandler-extras/src/test/org/apache/solr/handler/dataimport/TestTikaEntityProcessor.java b/solr/contrib/dataimporthandler-extras/src/test/org/apache/solr/handler/dataimport/TestTikaEntityProcessor.java index b6179577185..ded01134cb8 100644 --- a/solr/contrib/dataimporthandler-extras/src/test/org/apache/solr/handler/dataimport/TestTikaEntityProcessor.java +++ b/solr/contrib/dataimporthandler-extras/src/test/org/apache/solr/handler/dataimport/TestTikaEntityProcessor.java @@ -49,6 +49,19 @@ public class TestTikaEntityProcessor extends AbstractDataImportHandlerTestCase { " " + ""; + private String skipOnErrConf = + "" + + " " + + " " + + " " + + "" + + " " + + " " + + " " + + "" + + " " + + ""; + private String[] tests = { "//*[@numFound='1']" ,"//str[@name='author'][.='Grant Ingersoll']" @@ -85,6 +98,12 @@ public class TestTikaEntityProcessor extends AbstractDataImportHandlerTestCase { assertQ(req("*:*"), tests ); } + @Test + public void testSkip() throws Exception { + runFullImport(skipOnErrConf); + assertQ(req("*:*"), "//*[@numFound='1']"); + } + @Test public void testTikaHTMLMapperEmpty() throws Exception { runFullImport(getConfigHTML(null));