SOLR-1310: Upgrade to Tika 0.4

git-svn-id: https://svn.apache.org/repos/asf/lucene/solr/trunk@798253 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
Grant Ingersoll 2009-07-27 18:48:58 +00:00
parent 4a4fc9a5c9
commit 1c2c5ed19f
22 changed files with 36 additions and 20 deletions

View File

@ -28,4 +28,9 @@ $Id:$
3. SOLR-1075: Upgrade to Tika 0.3. See http://www.apache.org/dist/lucene/tika/CHANGES-0.3.txt (gsingers) 3. SOLR-1075: Upgrade to Tika 0.3. See http://www.apache.org/dist/lucene/tika/CHANGES-0.3.txt (gsingers)
4. SOLR-1128: Added metadata output to "extract only" option. (gsingers) 4. SOLR-1128: Added metadata output to "extract only" option. (gsingers)
5. SOLR-1310: Upgrade to Tika 0.4. Note there are some differences in detecting Languages now.
See http://www.lucidimagination.com/search/document/d6f1899a85b2a45c/vote_apache_tika_0_4_release_candidate_2#d6f1899a85b2a45c
for discussion on language detection.
See http://www.apache.org/dist/lucene/tika/CHANGES-0.4.txt. (gsingers)

View File

@ -1,2 +0,0 @@
AnyObjectId[680f8c60c1f0393f7e56595e24b29b3ceb46e933] was removed in git history.
Apache SVN contains full history.

View File

@ -0,0 +1,2 @@
AnyObjectId[3f78d2a6b7154d80e06bb96441dcf4faa60518c5] was removed in git history.
Apache SVN contains full history.

View File

@ -1,2 +0,0 @@
AnyObjectId[552721d0e8deb28f2909cfc5ec900a5e35736795] was removed in git history.
Apache SVN contains full history.

View File

@ -0,0 +1,2 @@
AnyObjectId[c1cf5b3c233fabfc9765d0cb641dcb7e903ec179] was removed in git history.
Apache SVN contains full history.

View File

@ -0,0 +1,2 @@
AnyObjectId[78d832c11c42023d4bc12077a1d9b7b5025217bc] was removed in git history.
Apache SVN contains full history.

View File

@ -1,2 +0,0 @@
AnyObjectId[b73a80fab641131e6fbe3ae833549efb3c540d17] was removed in git history.
Apache SVN contains full history.

View File

@ -0,0 +1,2 @@
AnyObjectId[1deef144cb17ed2c11c6cdcdcb2d9530fa8d0b47] was removed in git history.
Apache SVN contains full history.

View File

@ -1,2 +0,0 @@
AnyObjectId[c9030febd2ae484532407db9ef98247cbe61b779] was removed in git history.
Apache SVN contains full history.

View File

@ -0,0 +1,2 @@
AnyObjectId[acb1d7b1d4e00241adba2188235bc85e51ab010c] was removed in git history.
Apache SVN contains full history.

View File

@ -0,0 +1,2 @@
AnyObjectId[0d6d37423b2e2b53aba04bb09845233502619cb6] was removed in git history.
Apache SVN contains full history.

View File

@ -0,0 +1,2 @@
AnyObjectId[953613cba49cc76b0562116af797c68ccf2a0a52] was removed in git history.
Apache SVN contains full history.

View File

@ -1,2 +1,2 @@
AnyObjectId[f821d644766c4d5c95e53db4b83cc6cb37b553f6] was removed in git history. AnyObjectId[9bfe670f69cbf240764b6afd74c166afc7d62256] was removed in git history.
Apache SVN contains full history. Apache SVN contains full history.

View File

@ -1,2 +0,0 @@
AnyObjectId[0e3279b961c07d4d4ed909dade956aacfbf1e785] was removed in git history.
Apache SVN contains full history.

View File

@ -0,0 +1,2 @@
AnyObjectId[3281c6e43309fc39dda416d47f632e10f2367c1b] was removed in git history.
Apache SVN contains full history.

View File

@ -0,0 +1,2 @@
AnyObjectId[206d9920d36cbe51e1cd7c801294734ae1401505] was removed in git history.
Apache SVN contains full history.

View File

@ -1,2 +0,0 @@
AnyObjectId[6877c97853cafab42e5c0a88f3b39b6cd31165ca] was removed in git history.
Apache SVN contains full history.

View File

@ -0,0 +1,2 @@
AnyObjectId[c1a27b5fef037f0d81a115a8ef377b95356bbbed] was removed in git history.
Apache SVN contains full history.

View File

@ -1,2 +0,0 @@
AnyObjectId[4dd90139eda215b4489ae99c1eb6cb97b953c884] was removed in git history.
Apache SVN contains full history.

View File

@ -0,0 +1,2 @@
AnyObjectId[32b269484b5fa5f7e695e66346df089cfb740780] was removed in git history.
Apache SVN contains full history.

View File

@ -0,0 +1,2 @@
AnyObjectId[8451a7cc91b2c3c25429446d0d3adf89af2e31c8] was removed in git history.
Apache SVN contains full history.

View File

@ -79,8 +79,9 @@ public class ExtractingRequestHandlerTest extends AbstractSolrTestCase {
"literal.id","simple2", "literal.id","simple2",
"uprefix", "t_", "uprefix", "t_",
"lowernames", "true", "lowernames", "true",
"captureAttr", "true", "map.a","t_href", "captureAttr", "true",
"map.content_language", "abcxyz", // test that lowernames is applied before mapping, and uprefix is applied after mapping "map.a","t_href",
"map.content_type", "abcxyz", // test that lowernames is applied before mapping, and uprefix is applied after mapping
"commit", "true" // test immediate commit "commit", "true" // test immediate commit
); );
@ -88,7 +89,7 @@ public class ExtractingRequestHandlerTest extends AbstractSolrTestCase {
// assertQ(req("q","id:simple2","indent","true"), "//*[@numFound='0']"); // assertQ(req("q","id:simple2","indent","true"), "//*[@numFound='0']");
// test both lowernames and unknown field mapping // test both lowernames and unknown field mapping
assertQ(req("+id:simple2 +t_content_type:[* TO *]"), "//*[@numFound='1']"); //assertQ(req("+id:simple2 +t_content_type:[* TO *]"), "//*[@numFound='1']");
assertQ(req("+id:simple2 +t_href:[* TO *]"), "//*[@numFound='1']"); assertQ(req("+id:simple2 +t_href:[* TO *]"), "//*[@numFound='1']");
assertQ(req("+id:simple2 +t_abcxyz:[* TO *]"), "//*[@numFound='1']"); assertQ(req("+id:simple2 +t_abcxyz:[* TO *]"), "//*[@numFound='1']");
@ -98,7 +99,6 @@ public class ExtractingRequestHandlerTest extends AbstractSolrTestCase {
"uprefix", "t_", "uprefix", "t_",
"lowernames", "true", "lowernames", "true",
"captureAttr", "true", "map.a","t_href", "captureAttr", "true", "map.a","t_href",
"map.content_language", "abcxyz",
"commit", "true" "commit", "true"
,"boost.t_href", "100.0" ,"boost.t_href", "100.0"
@ -106,6 +106,7 @@ public class ExtractingRequestHandlerTest extends AbstractSolrTestCase {
assertQ(req("t_href:http"), "//*[@numFound='2']"); assertQ(req("t_href:http"), "//*[@numFound='2']");
assertQ(req("t_href:http"), "//doc[1]/str[.='simple3']"); assertQ(req("t_href:http"), "//doc[1]/str[.='simple3']");
assertQ(req("+id:simple3 +t_content_type:[* TO *]"), "//*[@numFound='1']");//test lowercase and then uprefix
// test capture // test capture
loadLocal("simple.html", loadLocal("simple.html",