mirror of https://github.com/apache/lucene.git
SOLR-1318: Added ICU4J to extraction and test for Arabic
git-svn-id: https://svn.apache.org/repos/asf/lucene/solr/trunk@921425 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
parent
4f5166fdae
commit
ca50eef4ca
|
@ -29,6 +29,7 @@ $Id:$
|
|||
|
||||
* SOLR-1738: Upgrade to Tika 0.6 (gsingers)
|
||||
|
||||
* SOLR-18913: Add ICU4j to libs and add tests for Arabic extraction (Robert Muir via gsingers)
|
||||
|
||||
================== Release 1.4.0 ==================
|
||||
|
||||
|
|
|
@ -0,0 +1,2 @@
|
|||
AnyObjectId[bf0d532cb19e6ce3972f370a13a1940d1a8d1db8] was removed in git history.
|
||||
Apache SVN contains full history.
|
|
@ -322,6 +322,22 @@ public class ExtractingRequestHandlerTest extends AbstractSolrTestCase {
|
|||
assertTrue(val + " is not equal to " + "linkNews", val.equals("linkNews") == true);//there are two <a> tags, and they get collapesd
|
||||
}
|
||||
|
||||
/** test arabic PDF extraction is functional */
|
||||
public void testArabicPDF() throws Exception {
|
||||
ExtractingRequestHandler handler = (ExtractingRequestHandler)
|
||||
h.getCore().getRequestHandler("/update/extract");
|
||||
assertTrue("handler is null and it shouldn't be", handler != null);
|
||||
|
||||
loadLocal("arabic.pdf", "fmap.created", "extractedDate", "fmap.producer", "extractedProducer",
|
||||
"fmap.creator", "extractedCreator", "fmap.Keywords", "extractedKeywords",
|
||||
"fmap.Author", "extractedAuthor",
|
||||
"fmap.content", "wdf_nocase",
|
||||
"literal.id", "one",
|
||||
"fmap.Last-Modified", "extractedDate");
|
||||
assertQ(req("wdf_nocase:السلم"), "//result[@numFound=0]");
|
||||
assertU(commit());
|
||||
assertQ(req("wdf_nocase:السلم"), "//result[@numFound=1]");
|
||||
}
|
||||
|
||||
SolrQueryResponse loadLocal(String filename, String... args) throws Exception {
|
||||
LocalSolrQueryRequest req = (LocalSolrQueryRequest) req(args);
|
||||
|
|
Binary file not shown.
Loading…
Reference in New Issue