From acf2b4c60cb2563426eceed96901c6a98822e11f Mon Sep 17 00:00:00 2001 From: Erik Hatcher Date: Sat, 30 Apr 2005 00:07:27 +0000 Subject: [PATCH] Remove outdated sandbox code git-svn-id: https://svn.apache.org/repos/asf/lucene/java/trunk@165365 13f79535-47bb-0310-9956-ffa450edef68 --- .../XML-Indexing-Demo/IndexingRequest.xml | 10 - .../XML-Indexing-Demo/README.txt | 6 - .../XML-Indexing-Demo/XMLIndexingDemo.zip | Bin 7368 -> 0 bytes .../contributions/XML-Indexing-Demo/build.xml | 10 - .../xmlindexingdemo/IndexFiles.java | 111 -------- .../xmlindexingdemo/SearchFiles.java | 126 --------- .../XMLDocumentHandlerDOM.java | 131 ---------- .../XMLDocumentHandlerSAX.java | 106 -------- .../xdocs/about-LuceneIndexingDemo.xml | 17 -- sandbox/contributions/build.xml | 26 -- sandbox/contributions/common.xml | 241 ------------------ sandbox/contributions/parsers/build.xml | 19 -- sandbox/contributions/parsers/lib/readme.txt | 1 - .../lucene/parsers/pdf/PdfTextExtractor.java | 172 ------------- 14 files changed, 976 deletions(-) delete mode 100644 sandbox/contributions/XML-Indexing-Demo/IndexingRequest.xml delete mode 100644 sandbox/contributions/XML-Indexing-Demo/README.txt delete mode 100644 sandbox/contributions/XML-Indexing-Demo/XMLIndexingDemo.zip delete mode 100644 sandbox/contributions/XML-Indexing-Demo/build.xml delete mode 100644 sandbox/contributions/XML-Indexing-Demo/src/java/org/apache/lucenesandbox/xmlindexingdemo/IndexFiles.java delete mode 100644 sandbox/contributions/XML-Indexing-Demo/src/java/org/apache/lucenesandbox/xmlindexingdemo/SearchFiles.java delete mode 100644 sandbox/contributions/XML-Indexing-Demo/src/java/org/apache/lucenesandbox/xmlindexingdemo/XMLDocumentHandlerDOM.java delete mode 100644 sandbox/contributions/XML-Indexing-Demo/src/java/org/apache/lucenesandbox/xmlindexingdemo/XMLDocumentHandlerSAX.java delete mode 100644 sandbox/contributions/XML-Indexing-Demo/xdocs/about-LuceneIndexingDemo.xml delete mode 100644 sandbox/contributions/build.xml delete mode 100644 sandbox/contributions/common.xml delete mode 100644 sandbox/contributions/parsers/build.xml delete mode 100644 sandbox/contributions/parsers/lib/readme.txt delete mode 100644 sandbox/contributions/parsers/src/java/org/apache/lucene/parsers/pdf/PdfTextExtractor.java diff --git a/sandbox/contributions/XML-Indexing-Demo/IndexingRequest.xml b/sandbox/contributions/XML-Indexing-Demo/IndexingRequest.xml deleted file mode 100644 index ef89135ff8d..00000000000 --- a/sandbox/contributions/XML-Indexing-Demo/IndexingRequest.xml +++ /dev/null @@ -1,10 +0,0 @@ - - - - - - - - - - diff --git a/sandbox/contributions/XML-Indexing-Demo/README.txt b/sandbox/contributions/XML-Indexing-Demo/README.txt deleted file mode 100644 index 26173a12609..00000000000 --- a/sandbox/contributions/XML-Indexing-Demo/README.txt +++ /dev/null @@ -1,6 +0,0 @@ -This is the README file for XML Indexing Demo contributed by Aruna Raghavan. - -$Id$ - -Lucene Indexing Demo illustrates how one can parse and index XML documents -using a SAX2 or DOM parser with Lucene. diff --git a/sandbox/contributions/XML-Indexing-Demo/XMLIndexingDemo.zip b/sandbox/contributions/XML-Indexing-Demo/XMLIndexingDemo.zip deleted file mode 100644 index aa42058d175a64953b8996012d0c80c7cc15a879..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 7368 zcmb_>byQSq+y2lU(j^U2(kUSW3_WxsAl==dpnx<;w+cwNbPh;|bPpX;A`B^#A0E&8 z>Ctn3@B5wgtyyc<-hW)z^*q5rd_15lt^dz%kr0oO$d!^ma}Hkmq~< z(0W_-0>K1{3haOfI$b~t%JU+m^n`WI(ek=(*&+*cfz4)xxHA=UHHMwk51M12D=fP3 z>Z=_o0t)t?GWH`aS+uVgdsv-4cR z&-6In>FBM#da$mlK^aO*=`zr~svKLyu&p)Ch`-P1Ir5qJV&BmVUgg#k^jjAI0H{u0Q^ zi{*E`%s*=XiYNFLZ|r1jYGwYvrJ?-?8asDWa|d%*V+S)6N3Z`xYW9mS-2WBnZP8i( zS%+rk_Kx30KKzY=-|JZTR-`0=7u<#n08rur0Kk7G@<-)MTicntvVVJOe?0K#x0+f% zReg=Ns^gqAo}bdQ%aG@W@_d-Dj`s{#Po1izW)bOCWz;+Z9z7ph?}rMA$R6ACn|2C6d)5#i|`H4;H(HOEo*5yyqayR~I--kzA^CjlE=EzEXa=aVK2-K}6D9zN%h5neUi#tgD+BDp6=W zp=7{H#<#T(h&|S|rcxKj<+uCmtK$dBs%IT*?CnzI30)Xmn@SWgP3hbi(xnNiM|TW~ zysW2m196^W&e(Zq|GF7Kojn$)H+5x9*(k@nJgpdy4!OzkVPhKH!fxb#*F>b0|>k+~ILDS&<5 zp)Mye02m_IGp+d94aJm+t|o9^`qkTBWcRG%61JwZd!TD5i<%{&AW8LCEIZiyqdvVr zYz0e!8kB`hLL_(Dx&Tkn9q_wld2%u0)H8X$Z2UF~3}nI=l0jN2jHJ!$shSyN;Z>J8mg&tp{7{MI)z(?MOawwjGVVLI5CYc`mS{Vy zc}pv2UHDI)v<)0J6Iuvg-ou4%He`4k32;zN^yG{cb7s+}CzMmQ9EBikmu#j5-tfFR1cYtb|TX_1M-x z35bZxQ~nf;Zwsl00=V|FoT77?>#;YO8MLq6Y_v~_uThP9p4=OC5j!2{Orv8Ur&q)$ z10C^VKNkzfZ$y2z)S7~PA27m+z4kc4B`?434imzzHLG(asx-|tz!c8qWNnyTJ-A*D zHq3y@C1>itVT6n(q)b``JTCzzjB;aJWMEmzClSk$icz(uxeN8BeKps#KYKH?2s`xu~0GN$Qkb%C?42x{!V)s|0(UeqeMGh}=YTU9I(L z#Nk94DT4!fI`K2(o@PUrbs%|$@~iPdL-YC~r*|6lZFZ>Dfq2?mNsj7vpY-1i^-tbZ zG=<@aR?F&9v^(XG7=ntf5jNMttfW3&t<`aqx9ax~)ypMLN^fsin|?BP*V>D%wIq9l zp32_54qH9?P%iPNb=f}Mm}7mIJi~eF%<6i7SL`U~kg<|r6G!H~#def)vYsz5XLn3g zdu_eCAA*yZXkA_Ihd!skSV?@{XI{F(Xat4(kjuzL*bwIJ8*H+q21<$!*)ZwAtl4Wl zhzwRLKZ}y_lEX%!c&K{c!=sHVG2=NneIHR#ByWH;TX;nOJ4d?iT6cPAjBstY39BC0;IP#r-Uh+cNIsYH$1@CA#W;ge&w7uJsRik zHDP!&!GnW`qm4{W-~%wiCNNTR)+B6bYz0uV?7U3BGMg$&)HC|wK_wpf@*v%sJnCrmI{I+i=YkQw+-O?1v8kNc^jwscvrUVrunE z(?nbRrD^J@y3Xs6Y%(y$Kflc0b?$tFdu@fn9=pK?ct!X59V(V9$4ZX5Ugk@;6Dd%^|Y_8Gb zd-p#KzhwUyvATOwJY60Q$vP^Bd0&M?vb=`+CWDz-EDoP1W(31iYRu=68){|uZg&c) zw(DhSvrXsLrz=#(v9}0|iG5}3&eZwH96#a_I?qN|s%{+`r`_DVSvVdLEJB#b*5#JV zi~4T7XqDcjC|onu>B@}7MXStODb zGI!=sV_VOVyIcnJSFXZ!s;R9Ps>pXg%&lKOtajh;^jX9=sP4;(;G z2S@nUXu9UA!ul}K9PC3w!?^)08By+cH_B`)(##auyc&!@EkJ}hn?TBI*d|J?a9JMX6yR#u^IQat-=!Q$#JbNK#Rp z)p2%bMtQ`fn82z9t8cE5D0k?2%Q)b`o5jMt?Bh{XBNhwfkS;s`_~cVCrLib>AbIQl z$#$~QJSxs&GmYm&&kfG{6(@EJQ@bH@xPY6?yL}M<*SCvKV_#&F`3)3J z$dI_Ou52F$Yf;QpZ1NT9+mJDkjMNaHmce>kU?F~&Kz+)y15_<*z+3jH1QY7e~G>l)#~eijU&@@dflHR_&-lg?A75y+U9Z>|w@ZqR56Ts~&) ze@5=Tq)Z(O-f5cN%O#Gj8BRf-wZM|w221Sgfe_NN`#=O22lS^~+^FZV@GqNT-TU67 zq*w?QgqXa&^oMpj;Rc@pkJ}ifDj+FRkc~;IE<+Nf?(i4v21s?(Sdd2iL@WBXb&DKH zln~$MYSx|m5|{M$!?dv2{NOUpm8%HPgL_jmj8B?&ub_a!B{Dk zw?WS2%Yd|uN`Ydy zM)ng$J*A(^o?WZ0G>(mTK8-CMv3CkAE~{#oCC1byWKTk3=fryMtg?)5P&h}>SUDxIw-Pw{He>eov_i9+mrU{M8x6#PVwjs#(JyKfy)(q18XG8aM+dhN+hYn z^4R0Fn!LLmMw+qCHq_jTaQ@WW)sMhDu`0RJRfv;pY6&I1_j9n;CSN`@E`k&{N)sK? z?lc?X@xiH0vJEkB;#A|eOpfjYU+w9>omvmksp7PGd1UEcvm|X@H!)#gli=}2zx(oS zQB}4(!`JpZ-kxh+9TXHX3f`RtcQsT$NPDk7Rz^i4#%Z%jkq6oXJiNPF z2OUI+v`K=xbH85dYnTL~WY^s@u)71Mxk`^6FUa9R$}D9QQaH)wvfC-3f$Na!h&@<| zjnH*)#~kkGJ4O;z2tl5Uec?RIt+D_GYfA8HKOCBCp8RNT-IdSA{0eLrQbCW=$9)q0 zcvrx(XlEkN!mFxJJTZs$k;cQ9EwD{4C=ccv3RXI&6tY;|IDePn3nH(t1KR zn`9RlQ*(G^p+wRu;s!iIoLd z3anjq^ssr4!Mbm~)_bGij57EI01^RZMe3;qx3!_v}Z=TgI zN>aXd9Q5=rZ5UMudekZok$jbGT7=x)%D=U5^3^<&zTRh)Zf;!qxbRx*v+)CfzsA^Re&rH=4k+ldt@Q@(TWaxR2=JZO>|ull0FsaZ0L6bb1n7Vjr5sJ& z?adwBWN%XeJ98Ixpw5qP=wHWyC=IaV94Dq|Piuh8J=BDSz`J8LUFc?1;+{;LyO}eQ zrQjq?IVBN-tNpiZUyBtQznY}3(@{||Pm5f1VIbP0ciX;B>@|zek{HnR8@X@G%B&Gx zssf5mTx1L@u(5v>LVd5Q*}V8OgKb!)v}w7Q=Mbs06~*2Qcgap=zf}WOEmu=IAM`h| zW(`AW=!vFv2NhQ@XMCS?Q&pYEDOzzW%tQFao;29W6NSJ8*gLiKI`WdpKD|%kVl)AjgoLD?Y<}%BW-C*ai*7_e6b3 zAIIVVPu<;F{v{J8f+3sS=^VcI709=P?`20l-v40SOey^tT8qIh%Lx0@B=TPCuO7TG{OTk83BTUs;iJt18c@m{B4(TJtTpXl6ApdShbQC9mkgAGFH zaNFZ>Lo>x3U}tS0STI$Ct}J8z%8N+h{} zdZ1ygx-hE$#w0ayek61&F7Nv63Plil$rlPKcSXWmf*61P;P{n2!1#@g1pRP&mJg-D}ovdS1%WaxSCTT*FA! zhW>&1$Q7p2G9DH;FflGeZlBO`ccyC~Wr%rzv^wj94hX;qPOy75dwF1|dHJr&UB%Gx zbQD}`MkF{-(fk$KDb5qK>=s$BtOlpF%mfW8x%iB?a+G(}zN)NAzz7G!xStbjNrl0S z3r8KAoDX8gXh)R>Zt(14aL2Qitkzj$TsF-Vw}wk4Uzv#<>z0Hq=NZ5TD?EAL>yfBt zn3&V8qB_t&#WfXmel)fnGk{g1Gx#CGd6|P&wq`JW^9$uw{(H%~O^fsHp#W+fwIGWS z1!|*+K7vqC<-|N*?)3h`7i181rORweZ4em-4?Bo*q%aIZ*@ns%pcSEEhc)7uaL;@s z@JX4C>I(-E9~f(vh({W{QCgY4S}lpXiAgXOE|;OP1;ABQk%5#iDZTPgctS~VsZIPv z82qC?4~%yL3${1s)-z29`;^F_y91%Npy^Rgw_VH0qdPW9Ye=)W^lSZtW4%cLJmV?Z z_p;uYBEbevq~)Wjqn)?Q-r4k&)9~}n4BhYd97)$}ja|7*!(v<;che#7>~-;mG$l4GCf^Czg|rs0rQA3=08_NeC@83HYw7B0JFIl zUTWZx7#zHJGTtf6g7Q)+$en#I=xGrp&}rlHiMgw-Kvp6h9+XogU+9B@(3X970M6wV zf{FqhJTbyQ9X(+E5xC!;LI7%?f4ltKg9xPW2N9ZHfZG?uH@1HY?Z2}D0Jwk~Ai*u^ z_Xz(M{w>1)75;m8{{wynRQMDAXOw@-@h!^#mE+eC9t-d@N660{-y{87o^O%v6-RSSp$*uUeS?TY@|2m}o zEDneFZ^Zw9BilbLoAJTuWq+dPweHlNR^6fIdHRbPaAl;tq000=bKOsZ_ Kz? - - - - - Example of Lucene XML indexing - - - - diff --git a/sandbox/contributions/XML-Indexing-Demo/src/java/org/apache/lucenesandbox/xmlindexingdemo/IndexFiles.java b/sandbox/contributions/XML-Indexing-Demo/src/java/org/apache/lucenesandbox/xmlindexingdemo/IndexFiles.java deleted file mode 100644 index 1dace90ed79..00000000000 --- a/sandbox/contributions/XML-Indexing-Demo/src/java/org/apache/lucenesandbox/xmlindexingdemo/IndexFiles.java +++ /dev/null @@ -1,111 +0,0 @@ -package org.apache.lucenesandbox.xmlindexingdemo; - -/* ==================================================================== - * The Apache Software License, Version 1.1 - * - * Copyright (c) 2001 The Apache Software Foundation. All rights - * reserved. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions - * are met: - * - * 1. Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * - * 2. Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in - * the documentation and/or other materials provided with the - * distribution. - * - * 3. The end-user documentation included with the redistribution, - * if any, must include the following acknowledgment: - * "This product includes software developed by the - * Apache Software Foundation (http://www.apache.org/)." - * Alternately, this acknowledgment may appear in the software itself, - * if and wherever such third-party acknowledgments normally appear. - * - * 4. The names "Apache" and "Apache Software Foundation" and - * "Apache Lucene" must not be used to endorse or promote products - * derived from this software without prior written permission. For - * written permission, please contact apache@apache.org. - * - * 5. Products derived from this software may not be called "Apache", - * "Apache Lucene", nor may "Apache" appear in their name, without - * prior written permission of the Apache Software Foundation. - * - * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESSED OR IMPLIED - * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES - * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE - * DISCLAIMED. IN NO EVENT SHALL THE APACHE SOFTWARE FOUNDATION OR - * ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, - * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT - * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF - * USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND - * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, - * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT - * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF - * SUCH DAMAGE. - * ==================================================================== - * - * This software consists of voluntary contributions made by many - * individuals on behalf of the Apache Software Foundation. For more - * information on the Apache Software Foundation, please see - * . - */ - -import org.apache.lucene.analysis.standard.StandardAnalyzer; -import org.apache.lucene.index.IndexWriter; - -import java.io.File; -import java.util.Date; - -class IndexFiles -{ - public static void main(String[] args) - throws Exception - { - try - { - Date start = new Date(); - - IndexWriter writer = new IndexWriter("index", new StandardAnalyzer(), true); - indexDocs(writer, new File(args[0])); - - writer.optimize(); - writer.close(); - - Date end = new Date(); - - System.out.print(end.getTime() - start.getTime()); - System.out.println(" total milliseconds"); - - } - catch (Exception e) - { - System.out.println(" caught a " + e.getClass() + - "\n with message: " + e.getMessage()); - throw e; - } - } - - public static void indexDocs(IndexWriter writer, File file) - throws Exception - { - if (file.isDirectory()) - { - String[] files = file.list(); - for (int i = 0; i < files.length; i++) - indexDocs(writer, new File(file, files[i])); - } - else - { - System.out.println("adding " + file); - XMLDocumentHandlerSAX hdlr = new XMLDocumentHandlerSAX(file); - writer.addDocument(hdlr.getDocument()); - // For DOM, use - // XMLDocumentHandlerDOM hdlr = new XMLDocumentHandlerDOM(); - // writer.addDocument(hdlr.createXMLDocument(file)); - } - } -} diff --git a/sandbox/contributions/XML-Indexing-Demo/src/java/org/apache/lucenesandbox/xmlindexingdemo/SearchFiles.java b/sandbox/contributions/XML-Indexing-Demo/src/java/org/apache/lucenesandbox/xmlindexingdemo/SearchFiles.java deleted file mode 100644 index 047cfefb73b..00000000000 --- a/sandbox/contributions/XML-Indexing-Demo/src/java/org/apache/lucenesandbox/xmlindexingdemo/SearchFiles.java +++ /dev/null @@ -1,126 +0,0 @@ -package org.apache.lucenesandbox.xmlindexingdemo; - -/* ==================================================================== - * The Apache Software License, Version 1.1 - * - * Copyright (c) 2001 The Apache Software Foundation. All rights - * reserved. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions - * are met: - * - * 1. Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * - * 2. Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in - * the documentation and/or other materials provided with the - * distribution. - * - * 3. The end-user documentation included with the redistribution, - * if any, must include the following acknowledgment: - * "This product includes software developed by the - * Apache Software Foundation (http://www.apache.org/)." - * Alternately, this acknowledgment may appear in the software itself, - * if and wherever such third-party acknowledgments normally appear. - * - * 4. The names "Apache" and "Apache Software Foundation" and - * "Apache Lucene" must not be used to endorse or promote products - * derived from this software without prior written permission. For - * written permission, please contact apache@apache.org. - * - * 5. Products derived from this software may not be called "Apache", - * "Apache Lucene", nor may "Apache" appear in their name, without - * prior written permission of the Apache Software Foundation. - * - * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESSED OR IMPLIED - * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES - * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE - * DISCLAIMED. IN NO EVENT SHALL THE APACHE SOFTWARE FOUNDATION OR - * ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, - * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT - * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF - * USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND - * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, - * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT - * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF - * SUCH DAMAGE. - * ==================================================================== - * - * This software consists of voluntary contributions made by many - * individuals on behalf of the Apache Software Foundation. For more - * information on the Apache Software Foundation, please see - * . - */ - -import java.io.IOException; -import java.io.BufferedReader; -import java.io.InputStreamReader; - -import org.apache.lucene.analysis.Analyzer; -import org.apache.lucene.analysis.standard.StandardAnalyzer; -import org.apache.lucene.document.Document; -import org.apache.lucene.search.Searcher; -import org.apache.lucene.search.IndexSearcher; -import org.apache.lucene.search.Query; -import org.apache.lucene.search.Hits; -import org.apache.lucene.queryParser.QueryParser; - -class SearchFiles { - public static void main(String[] args) { - try { - Searcher searcher = new IndexSearcher("index"); - Analyzer analyzer = new StandardAnalyzer(); - - BufferedReader in = new BufferedReader(new InputStreamReader(System.in)); - while (true) { - System.out.print("Query: "); - String line = in.readLine(); - - if (line.length() == -1) - break; - - Query query = QueryParser.parse(line, "name", analyzer); - System.out.println("Searching for: " + query.toString("name")); - - Hits hits = searcher.search(query); - System.out.println(hits.length() + " total matching documents"); - - final int HITS_PER_PAGE = 10; - for (int start = 0; start < hits.length(); start += HITS_PER_PAGE) - { - int end = Math.min(hits.length(), start + HITS_PER_PAGE); - for (int i = start; i < end; i++) - { - Document doc = hits.doc(i); - String name = doc.get("name"); - System.out.println(name); - System.out.println(doc.get("profession")); - System.out.println(doc.get("addressLine1")); - System.out.println(doc.get("addressLine2")); - System.out.print(doc.get("city")); - System.out.print(" "); - System.out.print(doc.get("state")); - System.out.print(" "); - System.out.print(doc.get("zip")); - System.out.println(doc.get("country")); - - } - - if (hits.length() > end) { - System.out.print("more (y/n) ? "); - line = in.readLine(); - if (line.length() == 0 || line.charAt(0) == 'n') - break; - } - } - } - searcher.close(); - - } catch (Exception e) { - System.out.println(" caught a " + e.getClass() + - "\n with message: " + e.getMessage()); - } - } -} diff --git a/sandbox/contributions/XML-Indexing-Demo/src/java/org/apache/lucenesandbox/xmlindexingdemo/XMLDocumentHandlerDOM.java b/sandbox/contributions/XML-Indexing-Demo/src/java/org/apache/lucenesandbox/xmlindexingdemo/XMLDocumentHandlerDOM.java deleted file mode 100644 index f7c57b782ea..00000000000 --- a/sandbox/contributions/XML-Indexing-Demo/src/java/org/apache/lucenesandbox/xmlindexingdemo/XMLDocumentHandlerDOM.java +++ /dev/null @@ -1,131 +0,0 @@ -package org.apache.lucenesandbox.xmlindexingdemo; - -import org.w3c.dom.*; -import org.w3c.dom.Node; -import javax.xml.parsers.*; -import org.apache.lucene.document.Field; - -import java.io.File; - -/** - * - */ -public class XMLDocumentHandlerDOM { - public org.apache.lucene.document.Document createXMLDocument(File f) { - org.apache.lucene.document.Document document = new org.apache.lucene.document.Document(); - DocumentBuilderFactory dbf = DocumentBuilderFactory.newInstance(); - try { - DocumentBuilder df = dbf.newDocumentBuilder(); - org.w3c.dom.Document d = df.parse(f); - Node root = d.getDocumentElement(); - traverseTree(root, document); - } catch (Exception e) { - System.out.println("error: " + e); - e.printStackTrace(); - } - return document; - } - - static private void traverseTree(Node node, org.apache.lucene.document.Document document) { - NodeList nl = node.getChildNodes(); - if (nl.getLength() == 0) { - if (node.getNodeType() == Node.TEXT_NODE) { - Node parentNode = node.getParentNode(); - if (parentNode.getNodeType() == Node.ELEMENT_NODE) { -// String parentNodeName = parentNode.getNodeName(); -// String nodeValue = node.getNodeValue(); -// if (parentNodeName.equals("name")) -// { - Node siblingNode = node.getNextSibling(); - if (siblingNode != null) { - if (siblingNode.getNodeType() == Node.CDATA_SECTION_NODE) { - document.add(Field.Text("name", siblingNode.getNodeValue())); - } - } -// } -// else if (parentNodeName.equals("profession")) -// { -// Node siblingNode = node.getNextSibling(); -// if (siblingNode != null) -// { -// if (siblingNode.getNodeType() == Node.CDATA_SECTION_NODE) -// { -// document.add(Field.Text([arentNodeName, siblingNode.getNodeValue())); -// } -// } -// } -// else if (parentNodeName == "addressLine1") -// { -// Node siblingNode = node.getNextSibling(); -// if(siblingNode != null) -// { -// if (siblingNode.getNodeType() == Node.CDATA_SECTION_NODE) -// { -// document.add(Field.Text("addressLine1", siblingNode.getNodeValue())); -// } -// } -// } -// else if (parentNodeName.equals("addressLine2")) -// { -// Node siblingNode = node.getNextSibling(); -// if (siblingNode != null) -// { -// if (siblingNode.getNodeType() == Node.CDATA_SECTION_NODE) -// { -// document.add(Field.Text("addressLine2", siblingNode.getNodeValue())); -// } -// } -// } -// if (parentNodeName.equals("city")) -// { -// Node siblingNode = node.getNextSibling(); -// if (siblingNode != null) -// { -// if (siblingNode.getNodeType() == Node.CDATA_SECTION_NODE) -// { -// document.add(Field.Text("city", siblingNode.getNodeValue())); -// } -// } -// } -// else if (parentNodeName.equals("zip")) -// { -// Node siblingNode = node.getNextSibling(); -// if (siblingNode != null) -// { -// if (siblingNode.getNodeType() == Node.CDATA_SECTION_NODE) -// { -// document.add(Field.Text("zip", siblingNode.getNodeValue())); -// } -// } -// } -// else if (parentNodeName.equals("state")) -// { -// Node siblingNode = node.getNextSibling(); -// if (siblingNode != null) -// { -// if (siblingNode.getNodeType() == Node.CDATA_SECTION_NODE) -// { -// document.add(Field.Text("state", siblingNode.getNodeValue())); -// } -// } -// } -// else if (parentNodeName.equals("country")) -// { -// Node siblingNode = node.getNextSibling(); -// if (siblingNode != null) -// { -// if (siblingNode.getNodeType() == Node.CDATA_SECTION_NODE) -// { -// document.add(Field.Text("country", siblingNode.getNodeValue())); -// } -// } -// } - } - } - } else { - for (int i = 0; i < nl.getLength(); i++) { - traverseTree(nl.item(i), document); - } - } - } -} diff --git a/sandbox/contributions/XML-Indexing-Demo/src/java/org/apache/lucenesandbox/xmlindexingdemo/XMLDocumentHandlerSAX.java b/sandbox/contributions/XML-Indexing-Demo/src/java/org/apache/lucenesandbox/xmlindexingdemo/XMLDocumentHandlerSAX.java deleted file mode 100644 index 32170daa22e..00000000000 --- a/sandbox/contributions/XML-Indexing-Demo/src/java/org/apache/lucenesandbox/xmlindexingdemo/XMLDocumentHandlerSAX.java +++ /dev/null @@ -1,106 +0,0 @@ -package org.apache.lucenesandbox.xmlindexingdemo; - -/** - * Copyright 2004 The Apache Software Foundation - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -import org.apache.lucene.document.Document; -import org.apache.lucene.document.Field; - -import java.io.File; -import java.io.IOException; - -import javax.xml.parsers.ParserConfigurationException; -import javax.xml.parsers.SAXParser; -import javax.xml.parsers.SAXParserFactory; -import org.xml.sax.Attributes; -import org.xml.sax.SAXException; -import org.xml.sax.helpers.DefaultHandler; - -public class XMLDocumentHandlerSAX extends DefaultHandler { - /** A buffer for each XML element */ - private StringBuffer elementBuffer = new StringBuffer(); - - private Document mDocument; - - // constructor - public XMLDocumentHandlerSAX(File xmlFile) - throws ParserConfigurationException, SAXException, IOException { - SAXParserFactory spf = SAXParserFactory.newInstance(); - - // use validating parser? - //spf.setValidating(false); - // make parser name space aware? - //spf.setNamespaceAware(true); - - SAXParser parser = spf.newSAXParser(); - //System.out.println("parser is validating: " + parser.isValidating()); - try { - parser.parse(xmlFile, this); - } catch (org.xml.sax.SAXParseException spe) { - System.out.println("SAXParser caught SAXParseException at line: " + - spe.getLineNumber() + " column " + - spe.getColumnNumber()); - } - } - - // call at document start - public void startDocument() throws SAXException { - mDocument = new Document(); - } - - // call at element start - public void startElement(String namespaceURI, String localName, - String qualifiedName, Attributes attrs) throws SAXException { - - String eName = localName; - if ("".equals(eName)) { - eName = qualifiedName; // namespaceAware = false - } - // list the attribute(s) - if (attrs != null) { - for (int i = 0; i < attrs.getLength(); i++) { - String aName = attrs.getLocalName(i); // Attr name - if ("".equals(aName)) { aName = attrs.getQName(i); } - // perform application specific action on attribute(s) - // for now just dump out attribute name and value - System.out.println("attr " + aName+"="+attrs.getValue(i)); - } - } - elementBuffer.setLength(0); - } - - // call when cdata found - public void characters(char[] text, int start, int length) - throws SAXException { - elementBuffer.append(text, start, length); - } - - // call at element end - public void endElement(String namespaceURI, String simpleName, - String qualifiedName) throws SAXException { - - String eName = simpleName; - if ("".equals(eName)) { - eName = qualifiedName; // namespaceAware = false - } - - mDocument.add(Field.Text(eName, elementBuffer.toString())); - } - - public Document getDocument() { - return mDocument; - } -} diff --git a/sandbox/contributions/XML-Indexing-Demo/xdocs/about-LuceneIndexingDemo.xml b/sandbox/contributions/XML-Indexing-Demo/xdocs/about-LuceneIndexingDemo.xml deleted file mode 100644 index 3538ebb8175..00000000000 --- a/sandbox/contributions/XML-Indexing-Demo/xdocs/about-LuceneIndexingDemo.xml +++ /dev/null @@ -1,17 +0,0 @@ - - - -Aruna Raghavan -Otis Gospodnetic -Lucene Indexing Demo - - - - -
-

Lucene Indexing Demo illustrates how one can parse XML documents -using a SAX2 or DOM and index them with Lucene.

-
- - -
diff --git a/sandbox/contributions/build.xml b/sandbox/contributions/build.xml deleted file mode 100644 index c56a1dd1557..00000000000 --- a/sandbox/contributions/build.xml +++ /dev/null @@ -1,26 +0,0 @@ - - - - - - - - - - - - - - - - - - - - - - - diff --git a/sandbox/contributions/common.xml b/sandbox/contributions/common.xml deleted file mode 100644 index 7e83e378195..00000000000 --- a/sandbox/contributions/common.xml +++ /dev/null @@ -1,241 +0,0 @@ - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - Unit tests failed. Check log or reports for details - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - diff --git a/sandbox/contributions/parsers/build.xml b/sandbox/contributions/parsers/build.xml deleted file mode 100644 index d47d11d4881..00000000000 --- a/sandbox/contributions/parsers/build.xml +++ /dev/null @@ -1,19 +0,0 @@ - - - - - - Document parsers - - - - - - - - - - diff --git a/sandbox/contributions/parsers/lib/readme.txt b/sandbox/contributions/parsers/lib/readme.txt deleted file mode 100644 index 42c80cf3a2f..00000000000 --- a/sandbox/contributions/parsers/lib/readme.txt +++ /dev/null @@ -1 +0,0 @@ -Place pj.jar here (from http://www.etymon.com/pub/software/pj/) and log4j JAR. \ No newline at end of file diff --git a/sandbox/contributions/parsers/src/java/org/apache/lucene/parsers/pdf/PdfTextExtractor.java b/sandbox/contributions/parsers/src/java/org/apache/lucene/parsers/pdf/PdfTextExtractor.java deleted file mode 100644 index 6ef4a19ca20..00000000000 --- a/sandbox/contributions/parsers/src/java/org/apache/lucene/parsers/pdf/PdfTextExtractor.java +++ /dev/null @@ -1,172 +0,0 @@ -package org.apache.lucene.parsers.pdf; - -/* ==================================================================== - * The Apache Software License, Version 1.1 - * - * Copyright (c) 2001 The Apache Software Foundation. All rights - * reserved. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions - * are met: - * - * 1. Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * - * 2. Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in - * the documentation and/or other materials provided with the - * distribution. - * - * 3. The end-user documentation included with the redistribution, - * if any, must include the following acknowledgment: - * "This product includes software developed by the - * Apache Software Foundation (http://www.apache.org/)." - * Alternately, this acknowledgment may appear in the software itself, - * if and wherever such third-party acknowledgments normally appear. - * - * 4. The names "Apache" and "Apache Software Foundation" - * must not be used to endorse or promote products - * derived from this software without prior written permission. For - * written permission, please contact apache@apache.org. - * - * 5. Products derived from this software may not be called "Apache", - * nor may "Apache" appear in their name, without - * prior written permission of the Apache Software Foundation. - * - * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESSED OR IMPLIED - * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES - * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE - * DISCLAIMED. IN NO EVENT SHALL THE APACHE SOFTWARE FOUNDATION OR - * ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, - * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT - * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF - * USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND - * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, - * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT - * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF - * SUCH DAMAGE. - * ==================================================================== - * - * This software consists of voluntary contributions made by many - * individuals on behalf of the Apache Software Foundation. For more - * information on the Apache Software Foundation, please see - * . - */ - -import com.etymon.pj.Pdf; -import com.etymon.pj.exception.InvalidPdfObjectException; -import com.etymon.pj.exception.PjException; -import com.etymon.pj.object.PjArray; -import com.etymon.pj.object.PjObject; -import com.etymon.pj.object.PjPage; -import com.etymon.pj.object.PjStream; -import org.apache.log4j.Category; - -import java.io.File; -import java.io.IOException; -import java.util.Vector; - -/** - *

- * Attempts to extract text from a PDF file. - *

- *

- * - * Known limitations - *

- * - * @author Kelvin Tan - * @version $Revision$ - */ -public class PdfTextExtractor -{ - private static Category cat = Category.getInstance(PdfTextExtractor.class); - - public static void main(String[] args) - { - File f = new File("/usr/local/test.pdf"); - try - { - Pdf pdf = new Pdf(f.toString()); - int pagecount = pdf.getPageCount(); - cat.debug(f.toString() + "has " + pagecount + " pages."); - for (int i = 1; i <= pagecount; i++) - { - System.out.println(getContent(pdf, i)); - } - } - catch (IOException ioe) - { - cat.error("IOException parsing PDF file:" + f.toString(), ioe); - } - catch (PjException pje) - { - cat.error("PjException parsing PDF file:" + f.toString(), pje); - } - } - - private static String getContent(Pdf pdf, int pageNo) - { - String content = null; - PjStream stream = null; - StringBuffer strbf = new StringBuffer(); - try - { - PjPage page = (PjPage) pdf.getObject(pdf.getPage(pageNo)); - PjObject pobj = (PjObject) pdf.resolve(page.getContents()); - if (pobj instanceof PjArray) - { - PjArray array = (PjArray) pobj; - Vector vArray = array.getVector(); - int size = vArray.size(); - for (int j = 0; j < size; j++) - { - stream = (PjStream) pdf.resolve((PjObject) vArray.get(j)); - strbf.append(getStringFromPjStream(stream)); - } - content = strbf.toString(); - } - else - { - stream = (PjStream) pobj; - content = getStringFromPjStream(stream); - } - } - catch (InvalidPdfObjectException pdfe) - { - cat.error("Invalid PDF Object:" + pdfe, pdfe); - } - catch (Exception e) - { - cat.error("Exception in getContent() " + e, e); - } - return content; - } - - private static String getStringFromPjStream(PjStream stream) - { - StringBuffer strbf = new StringBuffer(); - try - { - int start,end = 0; - stream = stream.flateDecompress(); - String longString = stream.toString(); - int strlen = longString.length(); - int lastIndex = longString.lastIndexOf(')'); - while (lastIndex != -1 && end != lastIndex) - { - start = longString.indexOf('(', end); - end = longString.indexOf(')', start); - String text = longString.substring(start + 1, end); - strbf.append(text); - } - } - catch (InvalidPdfObjectException pdfe) - { - cat.error("InvalidObjectException:" + pdfe.getMessage(), pdfe); - } - return strbf.toString(); - } -} -