diff --git a/sandbox/contributions/indyo/lib/jakarta-oro-2.0.6.jar b/sandbox/contributions/indyo/lib/jakarta-oro-2.0.6.jar deleted file mode 100644 index c98f821de9f..00000000000 --- a/sandbox/contributions/indyo/lib/jakarta-oro-2.0.6.jar +++ /dev/null @@ -1,2 +0,0 @@ -AnyObjectId[346504c6d4bd7232f0776a4a0f8a32333cedd93e] was removed in git history. -Apache SVN contains full history. \ No newline at end of file diff --git a/sandbox/contributions/indyo/lib/jdom.jar b/sandbox/contributions/indyo/lib/jdom.jar deleted file mode 100644 index 9b20ebce98f..00000000000 --- a/sandbox/contributions/indyo/lib/jdom.jar +++ /dev/null @@ -1,2 +0,0 @@ -AnyObjectId[93e77a4a4476afff71a110dda1e96465cb7f25a9] was removed in git history. -Apache SVN contains full history. \ No newline at end of file diff --git a/sandbox/contributions/indyo/lib/log4j-1.2.6.jar b/sandbox/contributions/indyo/lib/log4j-1.2.6.jar deleted file mode 100644 index feed73c51ab..00000000000 --- a/sandbox/contributions/indyo/lib/log4j-1.2.6.jar +++ /dev/null @@ -1,2 +0,0 @@ -AnyObjectId[be4a9176c35a7feeecf5b70edf070ecb5d13ac5d] was removed in git history. -Apache SVN contains full history. \ No newline at end of file diff --git a/sandbox/contributions/indyo/lib/lucene-1.2.jar b/sandbox/contributions/indyo/lib/lucene-1.2.jar deleted file mode 100644 index ab9c261bc40..00000000000 --- a/sandbox/contributions/indyo/lib/lucene-1.2.jar +++ /dev/null @@ -1,2 +0,0 @@ -AnyObjectId[ff9b90061b65c32122fcdde27bfe7f1e61fbd7bd] was removed in git history. -Apache SVN contains full history. \ No newline at end of file diff --git a/sandbox/contributions/indyo/lib/tartool.jar b/sandbox/contributions/indyo/lib/tartool.jar deleted file mode 100644 index 97cc1163c90..00000000000 --- a/sandbox/contributions/indyo/lib/tartool.jar +++ /dev/null @@ -1,2 +0,0 @@ -AnyObjectId[329aef393bece9d77eef16279910f6cd73113c39] was removed in git history. -Apache SVN contains full history. \ No newline at end of file diff --git a/sandbox/contributions/indyo/lib/xmlParserAPIs-xerces-2.0.2.jar b/sandbox/contributions/indyo/lib/xmlParserAPIs-xerces-2.0.2.jar deleted file mode 100644 index 2cd620815af..00000000000 --- a/sandbox/contributions/indyo/lib/xmlParserAPIs-xerces-2.0.2.jar +++ /dev/null @@ -1,2 +0,0 @@ -AnyObjectId[c1fa1d645474eee07f085a8ee29e38422f7614cf] was removed in git history. -Apache SVN contains full history. \ No newline at end of file diff --git a/sandbox/contributions/indyo/src/conf/default.config.xml b/sandbox/contributions/indyo/src/conf/default.config.xml deleted file mode 100644 index effd3e9fc9f..00000000000 --- a/sandbox/contributions/indyo/src/conf/default.config.xml +++ /dev/null @@ -1,19 +0,0 @@ - - - - - - - - - - - - - - - - - - - diff --git a/sandbox/contributions/indyo/src/java/com/relevanz/indyo/AbstractDataSource.java b/sandbox/contributions/indyo/src/java/com/relevanz/indyo/AbstractDataSource.java deleted file mode 100644 index ed2c3e6a26f..00000000000 --- a/sandbox/contributions/indyo/src/java/com/relevanz/indyo/AbstractDataSource.java +++ /dev/null @@ -1,90 +0,0 @@ -package com.relevanz.indyo; - -/* ==================================================================== - * The Apache Software License, Version 1.1 - * - * Copyright (c) 2001 The Apache Software Foundation. All rights - * reserved. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions - * are met: - * - * 1. Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * - * 2. Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in - * the documentation and/or other materials provided with the - * distribution. - * - * 3. The end-user documentation included with the redistribution, - * if any, must include the following acknowledgment: - * "This product includes software developed by the - * Apache Software Foundation (http://www.apache.org/)." - * Alternately, this acknowledgment may appear in the software itself, - * if and wherever such third-party acknowledgments normally appear. - * - * 4. The names "Apache" and "Apache Software Foundation" and - * "Apache POI" must not be used to endorse or promote products - * derived from this software without prior written permission. For - * written permission, please contact apache@apache.org. - * - * 5. Products derived from this software may not be called "Apache", - * "Apache Lucene", nor may "Apache" appear in their name, without - * prior written permission of the Apache Software Foundation. - * - * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESSED OR IMPLIED - * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES - * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE - * DISCLAIMED. IN NO EVENT SHALL THE APACHE SOFTWARE FOUNDATION OR - * ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, - * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT - * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF - * USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND - * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, - * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT - * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF - * SUCH DAMAGE. - * ==================================================================== - * - * This software consists of voluntary contributions made by many - * individuals on behalf of the Apache Software Foundation. For more - * information on the Apache Software Foundation, please see - * . - */ - -import java.util.Map; -import java.util.Set; - -/** - * Generic implementation of an index datasource. - * - * @version $Id$ - */ -public abstract class AbstractDataSource implements IndexDataSource -{ - protected AbstractDataSource() - { - } - - protected AbstractDataSource(Map map) - { - loadFields(map); - } - - /** - * Fields to index. - */ - protected String[] fields; - - /** - * Convenience method to load fields to index into a Map. - */ - protected void loadFields(Map map) - { - Set fieldSet = map.keySet(); - fields = new String[fieldSet.size()]; - fieldSet.toArray(fields); - } -} diff --git a/sandbox/contributions/indyo/src/java/com/relevanz/indyo/DocumentHandler.java b/sandbox/contributions/indyo/src/java/com/relevanz/indyo/DocumentHandler.java deleted file mode 100644 index dfc63743519..00000000000 --- a/sandbox/contributions/indyo/src/java/com/relevanz/indyo/DocumentHandler.java +++ /dev/null @@ -1,332 +0,0 @@ -package com.relevanz.indyo; - -/* ==================================================================== - * The Apache Software License, Version 1.1 - * - * Copyright (c) 2001 The Apache Software Foundation. All rights - * reserved. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions - * are met: - * - * 1. Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * - * 2. Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in - * the documentation and/or other materials provided with the - * distribution. - * - * 3. The end-user documentation included with the redistribution, - * if any, must include the following acknowledgment: - * "This product includes software developed by the - * Apache Software Foundation (http://www.apache.org/)." - * Alternately, this acknowledgment may appear in the software itself, - * if and wherever such third-party acknowledgments normally appear. - * - * 4. The names "Apache" and "Apache Software Foundation" and - * "Apache Lucene" must not be used to endorse or promote products - * derived from this software without prior written permission. For - * written permission, please contact apache@apache.org. - * - * 5. Products derived from this software may not be called "Apache", - * "Apache Lucene", nor may "Apache" appear in their name, without - * prior written permission of the Apache Software Foundation. - * - * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESSED OR IMPLIED - * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES - * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE - * DISCLAIMED. IN NO EVENT SHALL THE APACHE SOFTWARE FOUNDATION OR - * ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, - * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT - * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF - * USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND - * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, - * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT - * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF - * SUCH DAMAGE. - * ==================================================================== - * - * This software consists of voluntary contributions made by many - * individuals on behalf of the Apache Software Foundation. For more - * information on the Apache Software Foundation, please see - * . - */ - -import com.relevanz.indyo.util.StringUtils; -import org.apache.log4j.Logger; -import org.apache.lucene.document.Document; -import org.apache.lucene.document.Field; -import org.apache.lucene.index.IndexWriter; - -import java.io.IOException; -import java.io.Reader; -import java.util.*; - -/** - *

- * A document is the atomic unit used for indexing purposes. It consists of - * metadata as well as its file contents. File contents are handled by - * {@link ContentHandler}. - *

- *

- * DocumentHandler creates the {@link org.apache.lucene.document.Document}, - * adds fields to it, delegates to {@link ContentHandler} to handle - * file contents. - *

- * - * @version $Id$ - */ -public class DocumentHandler -{ - /** - * Field to retrieve all documents. - */ - public static final String ALL_DOCUMENTS_FIELD = "AllDocuments"; - - private static Logger log = Logger.getLogger(DocumentHandler.class); - - private static boolean isDebugEnabled = log.isDebugEnabled(); - - /** - * Should parent documents include data of its children? - */ - private static boolean parentEncapsulation = false; - /** - * Document object this DocumentHandler is handling. - */ - private Document doc; - - /** - * Map of metadata for this document. Contains the field:value pair - * to be added to the document. - */ - private Map metadata; - - /** - * Map of fields. Contains field:type_of_field pair. - */ - private Map customFields; - - /** - * IndexWriter. - */ - private IndexWriter writer; - - /** - * A collection of documents to be added to the writer. - */ - private List documents = new ArrayList(); - - /** - * Ctor. - * - * @param Map of metadata for this document. - * @param Map of fields. - * @param Writer. - */ - public DocumentHandler(Map metadata, - Map customFields, - IndexWriter writer) - { - this.metadata = metadata; - this.customFields = customFields; - this.writer = writer; - } - - /** - * Handles the actual processing of the document. - */ - public void process() throws IOException, Exception - { - String objectid = (String) metadata.get(IndexDataSource.OBJECT_IDENTIFIER); - if (objectid == null) - return; - doc = createDocument(); - addMapToDoc(metadata); - addNestedDataSource(metadata); - doc.add(Field.Text(ALL_DOCUMENTS_FIELD, ALL_DOCUMENTS_FIELD)); - //documents.add(doc); - if (writer != null) - { - addToWriter(); - } - else - { - documents.add(doc); - } - } - - private List getDocuments() - { - return documents; - } - - private Document createDocument() - { - return new Document(); - } - - /** - * Add the contents of a Map to a document. - * - * @param Map to add. - */ - private void addMapToDoc(Map map) - { - for (Iterator it = map.keySet().iterator(); it.hasNext();) - { - String field = (String) it.next(); - Object value = map.get(field); - if (value instanceof String) - { - String type = null; - if (customFields != null) - { - type = (String) customFields.get(field); - } - addFieldToDoc(type, field, (String) value); - } - else if (value instanceof Reader) - { - addFieldToDoc(field, (Reader) value); - } - } - } - - /** - * Add nested datasources. - * - * @param Map which contains the nested datasources. - */ - private void addNestedDataSource(Map map) throws Exception - { - Object o = map.get(IndexDataSource.NESTED_DATASOURCE); - if (o == null) - return; - if (o instanceof IndexDataSource) - { - IndexDataSource ds = (IndexDataSource) o; - addDataSource(ds); - } - else if (o instanceof List) - { - List nestedDataSource = (List) o; - for (int i = 0, n = nestedDataSource.size(); i < n; i++) - { - IndexDataSource ds = (IndexDataSource) nestedDataSource.get(i); - addDataSource(ds); - } - } - else if (o instanceof IndexDataSource[]) - { - IndexDataSource[] nestedDataSource = (IndexDataSource[]) o; - for (int i = 0, n = nestedDataSource.length; i < n; i++) - { - IndexDataSource ds = (IndexDataSource) nestedDataSource[i]; - addDataSource(ds); - } - } - else - { - log.warn("Unknown object found as nested datasource:" + o); - } - } - - /** - * Datasources are basically a collection of data maps to be indexed. - * addMapToDoc is invoked for each map. - * - * @param Datasource to add. - */ - private void addDataSource(IndexDataSource ds) throws Exception - { - Map[] data = ds.getData(); - for (int i = 0; i < data.length; i++) - { - Map map = data[i]; - if (map.containsKey(IndexDataSource.OBJECT_IDENTIFIER)) - { - /** - * Create a new document because child datasources may need - * to be retrieved independently of parent doc. - */ - DocumentHandler docHandler = new DocumentHandler(map, null, null); - docHandler.process(); - documents.addAll(docHandler.getDocuments()); - } - else - { - addMapToDoc(map); - /** - * Add nested datasources of this datasource's data - */ - addNestedDataSource(map); - } - } - } - - /** - * Adds a String-based field to a document. - * - * @param Type of field. - * @param Name of field. - * @param Value of field. - */ - private void addFieldToDoc(String type, String field, String value) - { - if (value == null) - value = StringUtils.EMPTY_STRING; - if (SearchConfiguration.KEYWORD_FIELD_TYPE.equalsIgnoreCase(type)) - doc.add(Field.Keyword(field, value)); - else if (SearchConfiguration.UNINDEXED_FIELD_TYPE.equalsIgnoreCase(type)) - doc.add(Field.UnIndexed(field, value)); - else if (SearchConfiguration.UNSTORED_FIELD_TYPE.equalsIgnoreCase(type)) - doc.add(Field.UnStored(field, value)); - else - doc.add(Field.Text(field, value)); - } - - /** - * Adds a Reader-based field to a document. - * - * @param Name of field. - * @param Reader. - */ - private void addFieldToDoc(String field, Reader reader) - { - doc.add(Field.Text(field, reader)); - } - - /** - * Adds documents to the IndexWriter. - */ - private void addToWriter() throws IOException - { - if (parentEncapsulation) - { - for (int i = 0, n = documents.size(); i < n; i++) - { - Document d = (Document) documents.get(i); - for (Enumeration e = d.fields(); e.hasMoreElements();) - { - Field f = (Field) e.nextElement(); - String fieldName = f.name(); - if (!fieldName.equals(IndexDataSource.CONTAINER_IDENTIFIER) - && !fieldName.equals(IndexDataSource.OBJECT_CLASS) - && !fieldName.equals(IndexDataSource.OBJECT_IDENTIFIER)) - { - doc.add(f); - } - } - } - } - writer.addDocument(doc); - - for (int i = 0, n = documents.size(); i < n; i++) - { - writer.addDocument((Document) documents.get(i)); - } - } -} \ No newline at end of file diff --git a/sandbox/contributions/indyo/src/java/com/relevanz/indyo/FSDataSource.java b/sandbox/contributions/indyo/src/java/com/relevanz/indyo/FSDataSource.java deleted file mode 100644 index c1efba9dd88..00000000000 --- a/sandbox/contributions/indyo/src/java/com/relevanz/indyo/FSDataSource.java +++ /dev/null @@ -1,160 +0,0 @@ -package com.relevanz.indyo; - -/* ==================================================================== - * The Apache Software License, Version 1.1 - * - * Copyright (c) 2001 The Apache Software Foundation. All rights - * reserved. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions - * are met: - * - * 1. Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * - * 2. Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in - * the documentation and/or other materials provided with the - * distribution. - * - * 3. The end-user documentation included with the redistribution, - * if any, must include the following acknowledgment: - * "This product includes software developed by the - * Apache Software Foundation (http://www.apache.org/)." - * Alternately, this acknowledgment may appear in the software itself, - * if and wherever such third-party acknowledgments normally appear. - * - * 4. The names "Apache" and "Apache Software Foundation" and - * "Apache Lucene" must not be used to endorse or promote products - * derived from this software without prior written permission. For - * written permission, please contact apache@apache.org. - * - * 5. Products derived from this software may not be called "Apache", - * "Apache Lucene", nor may "Apache" appear in their name, without - * prior written permission of the Apache Software Foundation. - * - * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESSED OR IMPLIED - * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES - * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE - * DISCLAIMED. IN NO EVENT SHALL THE APACHE SOFTWARE FOUNDATION OR - * ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, - * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT - * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF - * USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND - * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, - * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT - * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF - * SUCH DAMAGE. - * ==================================================================== - * - * This software consists of voluntary contributions made by many - * individuals on behalf of the Apache Software Foundation. For more - * information on the Apache Software Foundation, please see - * . - */ - -import org.apache.lucene.document.DateField; -import com.relevanz.indyo.contenthandler.FileContentHandler; -import com.relevanz.indyo.contenthandler.FileContentHandlerFactory; -import com.relevanz.indyo.util.IOUtils; - -import java.io.File; -import java.io.Reader; -import java.util.ArrayList; -import java.util.HashMap; -import java.util.List; -import java.util.Map; - -/** - * A filesystem-based datasource. - * - * @author Kelvin Tan - * @version $Id$ - */ -public class FSDataSource extends AbstractDataSource -{ - public static final String FILE_PATH_FIELD = "filePath"; - public static final String FILE_NAME_FIELD = "fileName"; - public static final String FILE_SIZE_FIELD = "fileSize"; - public static final String FILE_FORMAT_FIELD = "fileFormat"; - public static final String FILE_CONTENTS_FIELD = "fileContents"; - public static final String FILE_LAST_MODIFIED_DATE_FIELD = "fileLastModifiedDate"; - - private File targetFileOrDir; - - public FSDataSource(String targetFileOrDirStr) - { - this(new File(targetFileOrDirStr)); - } - - public FSDataSource(File targetFileOrDir) - { - setTargetDirectory(targetFileOrDir); - } - - public Map[] getData() - { - Map[] returnData = null; - List temp = new ArrayList(); - loadDataFromFiles(targetFileOrDir, temp); - returnData = new Map[temp.size()]; - returnData = (Map[]) temp.toArray(returnData); - return returnData; - } - - public void setTargetDirectory(File targetFileOrDir) - { - this.targetFileOrDir = targetFileOrDir; - } - - private void loadDataFromFiles(File f, List list) - { - if (f.isDirectory()) - { - File[] directoryTree = f.listFiles(); - for (int i = 0; i < directoryTree.length; i++) - { - loadDataFromFiles(directoryTree[i], list); - } - } - else - { - Map dataMap = new HashMap(); - dataMap.put(FILE_PATH_FIELD, f.getPath()); - dataMap.put(FILE_NAME_FIELD, f.getName()); - dataMap.put(FILE_LAST_MODIFIED_DATE_FIELD, - DateField.timeToString(f.lastModified())); - dataMap.put(FILE_SIZE_FIELD, String.valueOf(f.length())); - dataMap.put(FILE_FORMAT_FIELD, - IOUtils.getFileExtension(f)); - addFileContents(f, dataMap); - list.add(dataMap); - } - } - - private void addFileContents(File targetFile, Map dataMap) - { - FileContentHandler cHandler = - FileContentHandlerFactory.getContentHandler(targetFile); - if (cHandler != null) - { - if (cHandler.fileContentIsReadable()) - { - Reader r = cHandler.getReader(); - if (r != null) - { - dataMap.put(FILE_CONTENTS_FIELD, r); - } - } - if (cHandler.containsNestedData()) - { - dataMap.put(NESTED_DATASOURCE, cHandler.getNestedDataSource()); - } - } - else - { - //cat.warn("ContentHandler not found for " + contentFile.getName()); - } - } -} diff --git a/sandbox/contributions/indyo/src/java/com/relevanz/indyo/IllegalConfigurationException.java b/sandbox/contributions/indyo/src/java/com/relevanz/indyo/IllegalConfigurationException.java deleted file mode 100644 index 6e157949612..00000000000 --- a/sandbox/contributions/indyo/src/java/com/relevanz/indyo/IllegalConfigurationException.java +++ /dev/null @@ -1,69 +0,0 @@ -package com.relevanz.indyo; - -/* ==================================================================== - * The Apache Software License, Version 1.1 - * - * Copyright (c) 2001 The Apache Software Foundation. All rights - * reserved. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions - * are met: - * - * 1. Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * - * 2. Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in - * the documentation and/or other materials provided with the - * distribution. - * - * 3. The end-user documentation included with the redistribution, - * if any, must include the following acknowledgment: - * "This product includes software developed by the - * Apache Software Foundation (http://www.apache.org/)." - * Alternately, this acknowledgment may appear in the software itself, - * if and wherever such third-party acknowledgments normally appear. - * - * 4. The names "Apache" and "Apache Software Foundation" and - * "Apache POI" must not be used to endorse or promote products - * derived from this software without prior written permission. For - * written permission, please contact apache@apache.org. - * - * 5. Products derived from this software may not be called "Apache", - * "Apache Lucene", nor may "Apache" appear in their name, without - * prior written permission of the Apache Software Foundation. - * - * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESSED OR IMPLIED - * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES - * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE - * DISCLAIMED. IN NO EVENT SHALL THE APACHE SOFTWARE FOUNDATION OR - * ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, - * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT - * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF - * USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND - * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, - * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT - * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF - * SUCH DAMAGE. - * ==================================================================== - * - * This software consists of voluntary contributions made by many - * individuals on behalf of the Apache Software Foundation. For more - * information on the Apache Software Foundation, please see - * . - */ - -/** - * Thrown when loading SearchConfiguration. - * - * @author Kelvin Tan - * @version $Id$ - */ -public class IllegalConfigurationException extends Exception -{ - public IllegalConfigurationException(String msg) - { - super(msg); - } -} diff --git a/sandbox/contributions/indyo/src/java/com/relevanz/indyo/IndexDataSource.java b/sandbox/contributions/indyo/src/java/com/relevanz/indyo/IndexDataSource.java deleted file mode 100644 index 0a2dfc02081..00000000000 --- a/sandbox/contributions/indyo/src/java/com/relevanz/indyo/IndexDataSource.java +++ /dev/null @@ -1,103 +0,0 @@ -package com.relevanz.indyo; -/* ==================================================================== - * The Apache Software License, Version 1.1 - * - * Copyright (c) 2001 The Apache Software Foundation. All rights - * reserved. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions - * are met: - * - * 1. Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * - * 2. Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in - * the documentation and/or other materials provided with the - * distribution. - * - * 3. The end-user documentation included with the redistribution, - * if any, must include the following acknowledgment: - * "This product includes software developed by the - * Apache Software Foundation (http://www.apache.org/)." - * Alternately, this acknowledgment may appear in the software itself, - * if and wherever such third-party acknowledgments normally appear. - * - * 4. The names "Apache" and "Apache Software Foundation" and - * "Apache POI" must not be used to endorse or promote products - * derived from this software without prior written permission. For - * written permission, please contact apache@apache.org. - * - * 5. Products derived from this software may not be called "Apache", - * "Apache Lucene", nor may "Apache" appear in their name, without - * prior written permission of the Apache Software Foundation. - * - * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESSED OR IMPLIED - * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES - * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE - * DISCLAIMED. IN NO EVENT SHALL THE APACHE SOFTWARE FOUNDATION OR - * ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, - * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT - * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF - * USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND - * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, - * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT - * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF - * SUCH DAMAGE. - * ==================================================================== - * - * This software consists of voluntary contributions made by many - * individuals on behalf of the Apache Software Foundation. For more - * information on the Apache Software Foundation, please see - * . - */ - -import java.util.Map; - -/** - * A datasource is any source of data (filesystem, database, URL, etc) - * which is indexed by SearchIndexer. - * - * @version $Id$ - */ -public interface IndexDataSource -{ - /** - * Key in the map (located in the list returned by getData) - * to represent the class name of the object being indexed. - */ - public static final String OBJECT_CLASS = "objectClass"; - - /** - * Key in the map (located in the list returned by getData) - * to represent the uuid of the object being indexed. - */ - public static final String OBJECT_IDENTIFIER = "objectId"; - - /** - * The key in the map (located in the list returned by getData) - * to represent nested datasources. - */ - public static final String NESTED_DATASOURCE = "nestedDataSource"; - - /** - * Key in the map (located in the list returned by getData) - * to represent the id of the datasource's container. Applies to - * nested datasources. - */ - public static final String CONTAINER_IDENTIFIER = "containerId"; - - /** - * Key in the map to represent the class name of the Search Result - * object for this datasource (if any). - */ - public static final String SEARCH_RESULT_CLASSNAME = "resultClassname"; - - /** - * Retrieve a array of Maps. Each map represents the - * a document to be indexed. The key:value pair of the map - * is the metadata of the document. - */ - public Map[] getData() throws Exception; -} diff --git a/sandbox/contributions/indyo/src/java/com/relevanz/indyo/IndyoIndexer.java b/sandbox/contributions/indyo/src/java/com/relevanz/indyo/IndyoIndexer.java deleted file mode 100644 index 56dba39ddf6..00000000000 --- a/sandbox/contributions/indyo/src/java/com/relevanz/indyo/IndyoIndexer.java +++ /dev/null @@ -1,125 +0,0 @@ -package com.relevanz.indyo; - -/* ==================================================================== - * The Apache Software License, Version 1.1 - * - * Copyright (c) 2001 The Apache Software Foundation. All rights - * reserved. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions - * are met: - * - * 1. Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * - * 2. Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in - * the documentation and/or other materials provided with the - * distribution. - * - * 3. The end-user documentation included with the redistribution, - * if any, must include the following acknowledgment: - * "This product includes software developed by the - * Apache Software Foundation (http://www.apache.org/)." - * Alternately, this acknowledgment may appear in the software itself, - * if and wherever such third-party acknowledgments normally appear. - * - * 4. The names "Apache" and "Apache Software Foundation" and - * "Apache POI" must not be used to endorse or promote products - * derived from this software without prior written permission. For - * written permission, please contact apache@apache.org. - * - * 5. Products derived from this software may not be called "Apache", - * "Apache Lucene", nor may "Apache" appear in their name, without - * prior written permission of the Apache Software Foundation. - * - * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESSED OR IMPLIED - * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES - * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE - * DISCLAIMED. IN NO EVENT SHALL THE APACHE SOFTWARE FOUNDATION OR - * ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, - * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT - * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF - * USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND - * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, - * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT - * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF - * SUCH DAMAGE. - * ==================================================================== - * - * This software consists of voluntary contributions made by many - * individuals on behalf of the Apache Software Foundation. For more - * information on the Apache Software Foundation, please see - * . - */ - -import com.relevanz.indyo.contenthandler.FileContentHandlerFactory; -import org.apache.log4j.Logger; -import org.apache.lucene.analysis.Analyzer; -import org.apache.lucene.analysis.standard.StandardAnalyzer; -import org.apache.lucene.index.IndexWriter; - -import java.io.IOException; -import java.util.Collections; -import java.util.Map; - -/** - * Entry point for search engine indexing. - *

- * SearchIndexer is responsible for creating the IndexWriter - * {@see org.apache.lucene.index.IndexWriter} and passing it to - * DocumentHandlers {@link DocumentHandler} to index individual documents. - *

- * - * @version $Id$ - */ -public class IndyoIndexer -{ - private static Logger log = Logger.getLogger(IndyoIndexer.class); - private IndexWriter fsWriter; - private SearchConfiguration config; - - public IndyoIndexer(String indexDirectory, String configFile) - throws IOException, IllegalConfigurationException - { - Analyzer a = new StandardAnalyzer(); - fsWriter = new IndexWriter(indexDirectory, a, true); - fsWriter.maxFieldLength = 1000000; - loadConfig(configFile); - } - - /** - * Indexes documents. - */ - public synchronized void index(IndexDataSource ds) throws IOException, Exception - { - log.debug("Initiating search engine indexing..."); - long start = System.currentTimeMillis(); - // temporarily use an empty map whilst custom fields get implemented - indexDataSource(ds, Collections.EMPTY_MAP); - fsWriter.optimize(); - fsWriter.close(); - long stop = System.currentTimeMillis(); - log.debug("Indexing took " + (stop - start) + " milliseconds"); - } - - private void loadConfig(String configFile) throws IllegalConfigurationException - { - config = new SearchConfiguration(configFile); - FileContentHandlerFactory.setHandlerRegistry(config.getContentHandlers()); - } - - private void indexDataSource(IndexDataSource source, Map customFields) - throws Exception - { - Map[] data = source.getData(); - // here's a good place to spawn a couple of threads for indexing - for (int i = 0; i < data.length; i++) - { - DocumentHandler docHandler = - new DocumentHandler(data[i], customFields, fsWriter); - docHandler.process(); - } - } -} diff --git a/sandbox/contributions/indyo/src/java/com/relevanz/indyo/SearchConfiguration.java b/sandbox/contributions/indyo/src/java/com/relevanz/indyo/SearchConfiguration.java deleted file mode 100644 index 4f088a04b42..00000000000 --- a/sandbox/contributions/indyo/src/java/com/relevanz/indyo/SearchConfiguration.java +++ /dev/null @@ -1,259 +0,0 @@ -package com.relevanz.indyo; - -/* ==================================================================== - * The Apache Software License, Version 1.1 - * - * Copyright (c) 2001 The Apache Software Foundation. All rights - * reserved. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions - * are met: - * - * 1. Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * - * 2. Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in - * the documentation and/or other materials provided with the - * distribution. - * - * 3. The end-user documentation included with the redistribution, - * if any, must include the following acknowledgment: - * "This product includes software developed by the - * Apache Software Foundation (http://www.apache.org/)." - * Alternately, this acknowledgment may appear in the software itself, - * if and wherever such third-party acknowledgments normally appear. - * - * 4. The names "Apache" and "Apache Software Foundation" and - * "Apache Lucene" must not be used to endorse or promote products - * derived from this software without prior written permission. For - * written permission, please contact apache@apache.org. - * - * 5. Products derived from this software may not be called "Apache", - * "Apache Lucene", nor may "Apache" appear in their name, without - * prior written permission of the Apache Software Foundation. - * - * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESSED OR IMPLIED - * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES - * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE - * DISCLAIMED. IN NO EVENT SHALL THE APACHE SOFTWARE FOUNDATION OR - * ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, - * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT - * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF - * USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND - * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, - * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT - * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF - * SUCH DAMAGE. - * ==================================================================== - * - * This software consists of voluntary contributions made by many - * individuals on behalf of the Apache Software Foundation. For more - * information on the Apache Software Foundation, please see - * . - */ - -import com.relevanz.indyo.contenthandler.FileContentHandlerFactory; -import com.relevanz.indyo.util.DataUnformatFilter; -import org.apache.log4j.Category; -import org.apache.log4j.Logger; -import org.jdom.Document; -import org.jdom.Element; -import org.jdom.input.SAXBuilder; - -import java.util.HashMap; -import java.util.List; -import java.util.Map; -import java.util.StringTokenizer; - -/** - * Configures the indexing process using an XML file. - * - * @author Kelvin Tan - * @version $Id$ - */ -public class SearchConfiguration -{ - public static final String TEXT_FIELD_TYPE = "text"; - public static final String KEYWORD_FIELD_TYPE = "keyword"; - public static final String UNINDEXED_FIELD_TYPE = "unindexed"; - public static final String UNSTORED_FIELD_TYPE = "unstored"; - - /** Log4j category. - */ - static Logger log = Logger.getLogger(SearchConfiguration.class.getName()); - - /** - * Key in the config file to declare content handlers. - */ - private static final String CONTENT_HANDLER_KEY = "Search.ContentHandlers"; - - /** - * Key in the config file to declare custom fields. - */ - private static final String FIELD_KEY = "Search.Fields"; - - /** - * Map of content handlers. - */ - private Map contentHandlers = new HashMap(); - - /** - * Map of (non-standard) custom fields to index. - */ - private Map customFields = new HashMap(); - - /** - * Document object which represents the xml configuration file. - */ - private Document doc; - - /** - * Creates a new SearchConfiguration. - * - * @param configFile Name of the xml configuration file. - */ - public SearchConfiguration(String configFile) throws IllegalConfigurationException - { - try - { - SAXBuilder builder = new SAXBuilder(); - DataUnformatFilter format = new DataUnformatFilter(); - builder.setXMLFilter(format); - doc = builder.build(configFile); - } - catch (Exception e) - { - log.error("Error creating XML parser:" + e.getMessage(), e); - } - loadContentHandlers(); - loadCustomFields(); - } - - public Map getContentHandlers() - { - return this.contentHandlers; - } - - public Map getCustomFields() - { - return this.customFields; - } - - /** - * Loads the content handlers. - */ - protected void loadContentHandlers() throws IllegalConfigurationException - { - String[] extensions = getChildPropertyAttributeValues(CONTENT_HANDLER_KEY, "extension"); - String[] handlers = getChildPropertyAttributeValues(CONTENT_HANDLER_KEY, "handler"); - if (extensions.length != handlers.length) - throw new IllegalConfigurationException( - "Illegal configuration of Search Content Handlers!"); - for (int i = 0; i < extensions.length; i++) - { - contentHandlers.put(extensions[i], generateObject(handlers[i])); - } - String[] defaultExtension = getChildPropertyAttributeValues(CONTENT_HANDLER_KEY, "default"); - for (int i = 0; i < defaultExtension.length; i++) - { - if (defaultExtension[i] != null && defaultExtension[i].equals("true")) - { - contentHandlers.put(FileContentHandlerFactory.DEFAULT_HANDLER_KEY - , generateObject(handlers[i])); - } - } - } - - /** - * Loads the custom fields to index. - */ - protected void loadCustomFields() throws IllegalConfigurationException - { - String[] fields = getChildPropertyAttributeValues(FIELD_KEY, "name"); - String[] fieldtypes = getChildPropertyAttributeValues(FIELD_KEY, "type"); - if (fields.length != fieldtypes.length) - throw new IllegalConfigurationException( - "Illegal configuration of custom search fields!"); - for (int i = 0; i < fields.length; i++) - { - customFields.put(fields[i], fieldtypes[i]); - } - } - - /** - * Return attribute values for all child nodes. - */ - private String[] getChildPropertyAttributeValues(String parent, - String attributeName) - { - String[] nodeName = parseNodeName(parent); - Element element = doc.getRootElement(); - for (int i = 0; i < nodeName.length; i++) - { - element = element.getChild(nodeName[i]); - if (element == null) - { - return new String[]{}; - } - } - List children = element.getChildren(); - int childCount = children.size(); - String[] childrenAttributeValue = new String[childCount]; - for (int i = 0; i < childCount; i++) - { - childrenAttributeValue[i] = - ((Element) children.get(i)).getAttributeValue(attributeName); - } - return childrenAttributeValue; - } - - /** - * Node names are in the form "x.y.z". Returns a String array - * representation of the node elements. - */ - private String[] parseNodeName(String nodeName) - { - StringTokenizer st = new StringTokenizer(nodeName, "."); - String[] nodeElements = new String[st.countTokens()]; - int i = 0; - while (st.hasMoreTokens()) - { - nodeElements[i] = st.nextToken(); - ++i; - } - return nodeElements; - } - - /** - * Utility method to return an object based on its class name. - * The object needs to have a constructor which accepts no parameters. - * - * @param className Class name of object to be generated - * @return Object - */ - private static Object generateObject(String className) - { - Object o = null; - try - { - Class c = Class.forName(className); - o = c.newInstance(); - } - catch (ClassNotFoundException cnfe) - { - log.error(cnfe.getMessage() + " No class named '" + className + "' was found.", cnfe); - } - catch (InstantiationException ie) - { - log.error(ie.getMessage() + " Class named '" + className + "' could not be instantiated.", ie); - } - catch (IllegalAccessException iae) - { - log.error(iae.getMessage() + " No access to class named '" + className + "'.", iae); - } - return o; - } - -} diff --git a/sandbox/contributions/indyo/src/java/com/relevanz/indyo/contenthandler/FileContentHandler.java b/sandbox/contributions/indyo/src/java/com/relevanz/indyo/contenthandler/FileContentHandler.java deleted file mode 100644 index 183209382ed..00000000000 --- a/sandbox/contributions/indyo/src/java/com/relevanz/indyo/contenthandler/FileContentHandler.java +++ /dev/null @@ -1,90 +0,0 @@ -package com.relevanz.indyo.contenthandler; - -/* ==================================================================== - * The Apache Software License, Version 1.1 - * - * Copyright (c) 2001 The Apache Software Foundation. All rights - * reserved. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions - * are met: - * - * 1. Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * - * 2. Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in - * the documentation and/or other materials provided with the - * distribution. - * - * 3. The end-user documentation included with the redistribution, - * if any, must include the following acknowledgment: - * "This product includes software developed by the - * Apache Software Foundation (http://www.apache.org/)." - * Alternately, this acknowledgment may appear in the software itself, - * if and wherever such third-party acknowledgments normally appear. - * - * 4. The names "Apache" and "Apache Software Foundation" and - * "Apache Lucene" must not be used to endorse or promote products - * derived from this software without prior written permission. For - * written permission, please contact apache@apache.org. - * - * 5. Products derived from this software may not be called "Apache", - * "Apache Lucene", nor may "Apache" appear in their name, without - * prior written permission of the Apache Software Foundation. - * - * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESSED OR IMPLIED - * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES - * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE - * DISCLAIMED. IN NO EVENT SHALL THE APACHE SOFTWARE FOUNDATION OR - * ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, - * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT - * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF - * USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND - * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, - * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT - * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF - * SUCH DAMAGE. - * ==================================================================== - * - * This software consists of voluntary contributions made by many - * individuals on behalf of the Apache Software Foundation. For more - * information on the Apache Software Foundation, please see - * . - */ - -import java.io.Reader; -import java.util.List; - -/** - * A content handler determines how to index a file's contents. - * - * @version $Id$ - */ -public interface FileContentHandler -{ - /** - * Do the file contents of this file have any meaning? Should - * its contents be indexed? - */ - public boolean fileContentIsReadable(); - - /** - * Returns a reader for this file's contents. - */ - public Reader getReader(); - - /** - * Does this file have nested data within? - */ - public boolean containsNestedData(); - - /** - * Return the datasources contained within the parent file. - * This can be URLs contained within a HTML file, files - * within a ZIP file, basically anything represented by a - * DataSource. - */ - public List getNestedDataSource(); -} diff --git a/sandbox/contributions/indyo/src/java/com/relevanz/indyo/contenthandler/FileContentHandlerAdapter.java b/sandbox/contributions/indyo/src/java/com/relevanz/indyo/contenthandler/FileContentHandlerAdapter.java deleted file mode 100644 index 4d17dae689c..00000000000 --- a/sandbox/contributions/indyo/src/java/com/relevanz/indyo/contenthandler/FileContentHandlerAdapter.java +++ /dev/null @@ -1,89 +0,0 @@ -package com.relevanz.indyo.contenthandler; - -/* ==================================================================== - * The Apache Software License, Version 1.1 - * - * Copyright (c) 2001 The Apache Software Foundation. All rights - * reserved. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions - * are met: - * - * 1. Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * - * 2. Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in - * the documentation and/or other materials provided with the - * distribution. - * - * 3. The end-user documentation included with the redistribution, - * if any, must include the following acknowledgment: - * "This product includes software developed by the - * Apache Software Foundation (http://www.apache.org/)." - * Alternately, this acknowledgment may appear in the software itself, - * if and wherever such third-party acknowledgments normally appear. - * - * 4. The names "Apache" and "Apache Software Foundation" and - * "Apache Lucene" must not be used to endorse or promote products - * derived from this software without prior written permission. For - * written permission, please contact apache@apache.org. - * - * 5. Products derived from this software may not be called "Apache", - * "Apache Lucene", nor may "Apache" appear in their name, without - * prior written permission of the Apache Software Foundation. - * - * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESSED OR IMPLIED - * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES - * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE - * DISCLAIMED. IN NO EVENT SHALL THE APACHE SOFTWARE FOUNDATION OR - * ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, - * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT - * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF - * USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND - * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, - * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT - * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF - * SUCH DAMAGE. - * ==================================================================== - * - * This software consists of voluntary contributions made by many - * individuals on behalf of the Apache Software Foundation. For more - * information on the Apache Software Foundation, please see - * . - */ - -import java.io.File; -import java.io.Reader; -import java.util.List; - -/** - * A no-op implementation to make FileContentHandler creation easier. - *

- * Classes which need to implement the FileContentHandler interface should - * extend this class or {@link NestedFileContentHandlerAdapter}. - *

- * - * @author Kelvin Tan - * @version $Id$ - */ -public abstract class FileContentHandlerAdapter implements FileContentHandler -{ - protected File file; - - protected FileContentHandlerAdapter(File file) - { - this.file = file; - } - - public Reader getReader() - { - return null; - } - - public List getNestedDataSource() - { - return null; - } -} diff --git a/sandbox/contributions/indyo/src/java/com/relevanz/indyo/contenthandler/FileContentHandlerFactory.java b/sandbox/contributions/indyo/src/java/com/relevanz/indyo/contenthandler/FileContentHandlerFactory.java deleted file mode 100644 index 707d99ee550..00000000000 --- a/sandbox/contributions/indyo/src/java/com/relevanz/indyo/contenthandler/FileContentHandlerFactory.java +++ /dev/null @@ -1,180 +0,0 @@ -package com.relevanz.indyo.contenthandler; - -/* ==================================================================== - * The Apache Software License, Version 1.1 - * - * Copyright (c) 2001 The Apache Software Foundation. All rights - * reserved. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions - * are met: - * - * 1. Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * - * 2. Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in - * the documentation and/or other materials provided with the - * distribution. - * - * 3. The end-user documentation included with the redistribution, - * if any, must include the following acknowledgment: - * "This product includes software developed by the - * Apache Software Foundation (http://www.apache.org/)." - * Alternately, this acknowledgment may appear in the software itself, - * if and wherever such third-party acknowledgments normally appear. - * - * 4. The names "Apache" and "Apache Software Foundation" and - * "Apache Lucene" must not be used to endorse or promote products - * derived from this software without prior written permission. For - * written permission, please contact apache@apache.org. - * - * 5. Products derived from this software may not be called "Apache", - * "Apache Lucene", nor may "Apache" appear in their name, without - * prior written permission of the Apache Software Foundation. - * - * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESSED OR IMPLIED - * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES - * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE - * DISCLAIMED. IN NO EVENT SHALL THE APACHE SOFTWARE FOUNDATION OR - * ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, - * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT - * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF - * USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND - * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, - * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT - * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF - * SUCH DAMAGE. - * ==================================================================== - * - * This software consists of voluntary contributions made by many - * individuals on behalf of the Apache Software Foundation. For more - * information on the Apache Software Foundation, please see - * . - */ - -import org.apache.log4j.Category; - -import java.util.Map; -import java.io.File; -import java.lang.reflect.InvocationTargetException; -import java.lang.reflect.Constructor; - -import com.relevanz.indyo.util.IOUtils; - -/** - * Factory responsible for obtaining ContentHandlers. - * - * @author Kelvin Tan - * @version $Id$ - */ -public abstract class FileContentHandlerFactory -{ - public static final String DEFAULT_HANDLER_KEY = "DEFAULT"; - static Category cat = Category.getInstance(FileContentHandlerFactory.class.getName()); - private static Map handlerRegistry; - - public static FileContentHandler getContentHandler(File f) - { - String extension = IOUtils.getFileExtension(f); - if (handlerRegistry.containsKey(extension)) - { - String handlerClassname = (String) handlerRegistry.get(extension); - return (FileContentHandler) generateObject(handlerClassname, - new Class[]{File.class}, - new Object[]{f}); - } - else if (handlerRegistry.containsKey(DEFAULT_HANDLER_KEY)) - { - String handlerClassname = (String) handlerRegistry.get(DEFAULT_HANDLER_KEY); - return (FileContentHandler) generateObject(handlerClassname); - } - else - { - return NullHandler.getInstance(); - } - } - - public static void setHandlerRegistry(Map handlerRegistry) - { - FileContentHandlerFactory.handlerRegistry = handlerRegistry; - } - - /** - * Utility method to return an object based on its class name. - * The object needs to have a constructor which accepts no parameters. - * - * @param className Class name of object to be generated - * @return Object - */ - private static Object generateObject(String className) - { - Object o = null; - try - { - Class c = Class.forName(className); - o = c.newInstance(); - } - catch (ClassNotFoundException cnfe) - { - cat.error(cnfe.getMessage() + " No class named '" + className + "' was found.", cnfe); - } - catch (InstantiationException ie) - { - cat.error(ie.getMessage() + " Class named '" + className + "' could not be instantiated.", ie); - } - catch (IllegalAccessException iae) - { - cat.error(iae.getMessage() + " No access to class named '" + className + "'.", iae); - } - return o; - } - - /** - * Utility method to return an object based on its class name. - * - * @param type Class name of object to be generated - * @param clazz Class array of parameters. - * @param args Object array of arguments. - * @return Object - */ - private static Object generateObject(String className, - Class[] clazz, - Object[] args) - { - Object o = null; - try - { - Class c = Class.forName(className); - Constructor con = c.getConstructor(clazz); - if (con != null) - { - o = con.newInstance(args); - } - else - throw new InstantiationException("Constructor with arguments:" + clazz.toString() + " non-existent."); - } - catch (ClassNotFoundException cnfe) - { - cat.error(cnfe.getMessage() + " No class named '" + className + "' was found.", cnfe); - } - catch (InstantiationException ie) - { - cat.error(ie.getMessage() + " Class named '" + className + "' could not be instantiated.", ie); - } - catch (IllegalAccessException iae) - { - cat.error(iae.getMessage() + " No access to class named '" + className + "'.", iae); - } - catch (NoSuchMethodException nsme) - { - cat.error(nsme.getMessage() + " No method in class named '" + className + "'.", nsme); - } - catch (InvocationTargetException ite) - { - cat.error(ite.getMessage() + " in class named '" + className + "'.", ite); - } - return o; - } -} diff --git a/sandbox/contributions/indyo/src/java/com/relevanz/indyo/contenthandler/GZipHandler.java b/sandbox/contributions/indyo/src/java/com/relevanz/indyo/contenthandler/GZipHandler.java deleted file mode 100644 index d51765cb2f7..00000000000 --- a/sandbox/contributions/indyo/src/java/com/relevanz/indyo/contenthandler/GZipHandler.java +++ /dev/null @@ -1,131 +0,0 @@ -package com.relevanz.indyo.contenthandler; - -/* ==================================================================== - * The Apache Software License, Version 1.1 - * - * Copyright (c) 2001 The Apache Software Foundation. All rights - * reserved. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions - * are met: - * - * 1. Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * - * 2. Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in - * the documentation and/or other materials provided with the - * distribution. - * - * 3. The end-user documentation included with the redistribution, - * if any, must include the following acknowledgment: - * "This product includes software developed by the - * Apache Software Foundation (http://www.apache.org/)." - * Alternately, this acknowledgment may appear in the software itself, - * if and wherever such third-party acknowledgments normally appear. - * - * 4. The names "Apache" and "Apache Software Foundation" and - * "Apache Lucene" must not be used to endorse or promote products - * derived from this software without prior written permission. For - * written permission, please contact apache@apache.org. - * - * 5. Products derived from this software may not be called "Apache", - * "Apache Lucene", nor may "Apache" appear in their name, without - * prior written permission of the Apache Software Foundation. - * - * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESSED OR IMPLIED - * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES - * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE - * DISCLAIMED. IN NO EVENT SHALL THE APACHE SOFTWARE FOUNDATION OR - * ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, - * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT - * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF - * USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND - * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, - * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT - * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF - * SUCH DAMAGE. - * ==================================================================== - * - * This software consists of voluntary contributions made by many - * individuals on behalf of the Apache Software Foundation. For more - * information on the Apache Software Foundation, please see - * . - */ - -import org.apache.log4j.Category; -import com.relevanz.indyo.IndexDataSource; -import com.relevanz.indyo.FSDataSource; -import com.relevanz.indyo.util.IOUtils; - -import java.io.File; -import java.io.IOException; -import java.io.Reader; -import java.util.List; - -/** - * Handles GZip content. - * - * @author Kelvin Tan - * @version $Id$ - */ -public class GZipHandler extends NestedFileContentHandlerAdapter -{ - private static Category cat = Category.getInstance(GZipHandler.class.getName()); - - public GZipHandler(File file) - { - super(file); - } - - public Reader getReader() - { - return null; - } - - public List getNestedDataSource() - { - if (!file.exists()) - return null; - try - { - File tempDir = new File(TEMP_FOLDER); - tempDir.mkdirs(); - tempDir.deleteOnExit(); - String filename = file.getName(); - File tempFile = new File(tempDir, filename.substring(0, filename.lastIndexOf("."))); - tempFile.deleteOnExit(); - IOUtils.extractGZip(file, tempFile); - indexGZipDirectory(tempDir); - } - catch (IOException ioe) - { - cat.error("IOException ungzipping " + file.toString(), ioe); - } - return nestedDataSource; - } - - public boolean fileContentIsReadable() - { - return false; - } - - // only one file, but let's just treat it like a directory anyway - private void indexGZipDirectory(File dir) - { - if (dir.isDirectory()) - { - File[] dirContents = dir.listFiles(); - for (int i = 0; i < dirContents.length; i++) - { - indexGZipDirectory(dirContents[i]); - } - } - else if (dir.isFile()) - { - IndexDataSource ds = new FSDataSource(dir); - nestedDataSource.add(nestedDataSource); - } - } -} \ No newline at end of file diff --git a/sandbox/contributions/indyo/src/java/com/relevanz/indyo/contenthandler/NestedFileContentHandlerAdapter.java b/sandbox/contributions/indyo/src/java/com/relevanz/indyo/contenthandler/NestedFileContentHandlerAdapter.java deleted file mode 100644 index 21852abc608..00000000000 --- a/sandbox/contributions/indyo/src/java/com/relevanz/indyo/contenthandler/NestedFileContentHandlerAdapter.java +++ /dev/null @@ -1,91 +0,0 @@ -package com.relevanz.indyo.contenthandler; - -/* ==================================================================== - * The Apache Software License, Version 1.1 - * - * Copyright (c) 2001 The Apache Software Foundation. All rights - * reserved. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions - * are met: - * - * 1. Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * - * 2. Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in - * the documentation and/or other materials provided with the - * distribution. - * - * 3. The end-user documentation included with the redistribution, - * if any, must include the following acknowledgment: - * "This product includes software developed by the - * Apache Software Foundation (http://www.apache.org/)." - * Alternately, this acknowledgment may appear in the software itself, - * if and wherever such third-party acknowledgments normally appear. - * - * 4. The names "Apache" and "Apache Software Foundation" and - * "Apache Lucene" must not be used to endorse or promote products - * derived from this software without prior written permission. For - * written permission, please contact apache@apache.org. - * - * 5. Products derived from this software may not be called "Apache", - * "Apache Lucene", nor may "Apache" appear in their name, without - * prior written permission of the Apache Software Foundation. - * - * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESSED OR IMPLIED - * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES - * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE - * DISCLAIMED. IN NO EVENT SHALL THE APACHE SOFTWARE FOUNDATION OR - * ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, - * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT - * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF - * USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND - * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, - * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT - * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF - * SUCH DAMAGE. - * ==================================================================== - * - * This software consists of voluntary contributions made by many - * individuals on behalf of the Apache Software Foundation. For more - * information on the Apache Software Foundation, please see - * . - */ - -import org.apache.lucene.document.Document; - -import java.io.File; -import java.util.ArrayList; -import java.util.List; - -/** - * A no-op implementation to make FileContentHandler creation easier. - *

- * Classes which need to implement the FileContentHandler interface - * and need to handle nested content (example: zip, tar, rar, etc) should - * extend this class. - *

- * - * @author Kelvin Tan - * @version $Id$ - */ -public abstract class NestedFileContentHandlerAdapter - extends FileContentHandlerAdapter -{ - protected final String TEMP_FOLDER = "/usr/temp" + '/' - + Math.random() + '/'; - - protected List nestedDataSource; - - public NestedFileContentHandlerAdapter(File file) - { - super(file); - } - - public boolean containsNestedData() - { - return true; - } -} diff --git a/sandbox/contributions/indyo/src/java/com/relevanz/indyo/contenthandler/NullHandler.java b/sandbox/contributions/indyo/src/java/com/relevanz/indyo/contenthandler/NullHandler.java deleted file mode 100644 index 5c3353a071c..00000000000 --- a/sandbox/contributions/indyo/src/java/com/relevanz/indyo/contenthandler/NullHandler.java +++ /dev/null @@ -1,94 +0,0 @@ -package com.relevanz.indyo.contenthandler; - -import java.io.File; -import java.io.Reader; - -/* ==================================================================== - * The Apache Software License, Version 1.1 - * - * Copyright (c) 2001 The Apache Software Foundation. All rights - * reserved. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions - * are met: - * - * 1. Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * - * 2. Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in - * the documentation and/or other materials provided with the - * distribution. - * - * 3. The end-user documentation included with the redistribution, - * if any, must include the following acknowledgment: - * "This product includes software developed by the - * Apache Software Foundation (http://www.apache.org/)." - * Alternately, this acknowledgment may appear in the software itself, - * if and wherever such third-party acknowledgments normally appear. - * - * 4. The names "Apache" and "Apache Software Foundation" and - * "Apache Lucene" must not be used to endorse or promote products - * derived from this software without prior written permission. For - * written permission, please contact apache@apache.org. - * - * 5. Products derived from this software may not be called "Apache", - * "Apache Lucene", nor may "Apache" appear in their name, without - * prior written permission of the Apache Software Foundation. - * - * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESSED OR IMPLIED - * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES - * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE - * DISCLAIMED. IN NO EVENT SHALL THE APACHE SOFTWARE FOUNDATION OR - * ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, - * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT - * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF - * USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND - * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, - * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT - * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF - * SUCH DAMAGE. - * ==================================================================== - * - * This software consists of voluntary contributions made by many - * individuals on behalf of the Apache Software Foundation. For more - * information on the Apache Software Foundation, please see - * . - */ - -/** - * Do-nothing content handler. - * - * @author Kelvin Tan - * @version $Id$ - */ -public class NullHandler extends FileContentHandlerAdapter -{ - private static NullHandler singleton = new NullHandler(null); - - public static FileContentHandler getInstance() - { - return singleton; - } - - private NullHandler(File file) - { - super(file); - } - - public boolean fileContentIsReadable() - { - return false; - } - - public Reader getReader() - { - return null; - } - - public boolean containsNestedData() - { - return false; - } -} diff --git a/sandbox/contributions/indyo/src/java/com/relevanz/indyo/contenthandler/TARHandler.java b/sandbox/contributions/indyo/src/java/com/relevanz/indyo/contenthandler/TARHandler.java deleted file mode 100644 index 4d0e542d468..00000000000 --- a/sandbox/contributions/indyo/src/java/com/relevanz/indyo/contenthandler/TARHandler.java +++ /dev/null @@ -1,132 +0,0 @@ -package com.relevanz.indyo.contenthandler; - -/* ==================================================================== - * The Apache Software License, Version 1.1 - * - * Copyright (c) 2001 The Apache Software Foundation. All rights - * reserved. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions - * are met: - * - * 1. Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * - * 2. Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in - * the documentation and/or other materials provided with the - * distribution. - * - * 3. The end-user documentation included with the redistribution, - * if any, must include the following acknowledgment: - * "This product includes software developed by the - * Apache Software Foundation (http://www.apache.org/)." - * Alternately, this acknowledgment may appear in the software itself, - * if and wherever such third-party acknowledgments normally appear. - * - * 4. The names "Apache" and "Apache Software Foundation" and - * "Apache Lucene" must not be used to endorse or promote products - * derived from this software without prior written permission. For - * written permission, please contact apache@apache.org. - * - * 5. Products derived from this software may not be called "Apache", - * "Apache Lucene", nor may "Apache" appear in their name, without - * prior written permission of the Apache Software Foundation. - * - * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESSED OR IMPLIED - * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES - * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE - * DISCLAIMED. IN NO EVENT SHALL THE APACHE SOFTWARE FOUNDATION OR - * ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, - * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT - * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF - * USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND - * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, - * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT - * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF - * SUCH DAMAGE. - * ==================================================================== - * - * This software consists of voluntary contributions made by many - * individuals on behalf of the Apache Software Foundation. For more - * information on the Apache Software Foundation, please see - * . - */ - -import org.apache.log4j.Category; -import com.relevanz.indyo.IndexDataSource; -import com.relevanz.indyo.FSDataSource; -import com.relevanz.indyo.util.IOUtils; - -import java.io.File; -import java.io.IOException; -import java.io.Reader; -import java.util.ArrayList; -import java.util.List; - -/** - * Handles Tar files. - * - * @author Kelvin Tan - * @version $Id$ - */ -public class TARHandler extends NestedFileContentHandlerAdapter -{ - static Category cat = Category.getInstance(TARHandler.class.getName()); - - public TARHandler(File file) - { - super(file); - } - - public Reader getReader() - { - return null; - } - - public boolean fileContentIsReadable() - { - return false; - } - - public List getNestedDataSource() - { - if (!file.exists()) - return null; - if (nestedDataSource == null) - { - nestedDataSource = new ArrayList(); - } - try - { - File tempDir = new File(TEMP_FOLDER); - tempDir.deleteOnExit(); - IOUtils.extractTar(file, tempDir); - indexTarDirectory(tempDir); - } - catch (IOException ioe) - { - cat.error(ioe.getMessage(), ioe); - } - return nestedDataSource; - } - - private void indexTarDirectory(File dir) - { - if (dir.isDirectory()) - { - File[] dirContents = dir.listFiles(); - for (int i = 0; i < dirContents.length; i++) - { - indexTarDirectory(dirContents[i]); - } - } - else if (dir.isFile()) - { - // here create new DataMap for the tarred file - IndexDataSource ds = new FSDataSource(dir); - nestedDataSource.add(nestedDataSource); - } - } -} \ No newline at end of file diff --git a/sandbox/contributions/indyo/src/java/com/relevanz/indyo/contenthandler/TextHandler.java b/sandbox/contributions/indyo/src/java/com/relevanz/indyo/contenthandler/TextHandler.java deleted file mode 100644 index 74ae53169d1..00000000000 --- a/sandbox/contributions/indyo/src/java/com/relevanz/indyo/contenthandler/TextHandler.java +++ /dev/null @@ -1,117 +0,0 @@ -package com.relevanz.indyo.contenthandler; - -/* ==================================================================== - * The Apache Software License, Version 1.1 - * - * Copyright (c) 2001 The Apache Software Foundation. All rights - * reserved. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions - * are met: - * - * 1. Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * - * 2. Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in - * the documentation and/or other materials provided with the - * distribution. - * - * 3. The end-user documentation included with the redistribution, - * if any, must include the following acknowledgment: - * "This product includes software developed by the - * Apache Software Foundation (http://www.apache.org/)." - * Alternately, this acknowledgment may appear in the software itself, - * if and wherever such third-party acknowledgments normally appear. - * - * 4. The names "Apache" and "Apache Software Foundation" and - * "Apache Lucene" must not be used to endorse or promote products - * derived from this software without prior written permission. For - * written permission, please contact apache@apache.org. - * - * 5. Products derived from this software may not be called "Apache", - * "Apache Lucene", nor may "Apache" appear in their name, without - * prior written permission of the Apache Software Foundation. - * - * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESSED OR IMPLIED - * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES - * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE - * DISCLAIMED. IN NO EVENT SHALL THE APACHE SOFTWARE FOUNDATION OR - * ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, - * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT - * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF - * USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND - * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, - * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT - * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF - * SUCH DAMAGE. - * ==================================================================== - * - * This software consists of voluntary contributions made by many - * individuals on behalf of the Apache Software Foundation. For more - * information on the Apache Software Foundation, please see - * . - */ - -import org.apache.log4j.Category; -import org.apache.lucene.document.Document; -import org.apache.lucene.document.Field; - -import java.io.*; - -import com.relevanz.indyo.util.StringUtils; - -/** - * Handles text-based content. - * - * @author Kelvin Tan - * @version $Id$ - */ -public class TextHandler extends FileContentHandlerAdapter -{ - static Category cat = Category.getInstance(TextHandler.class.getName()); - - public TextHandler(File file) - { - super(file); - } - - public Reader getReader() - { - if (!file.exists()) - { - cat.error(file.toString() + " doesn't exist! Failing silently..."); - return null; - } - return getReader(file); - } - - public boolean containsNestedData() - { - return false; - } - - public boolean fileContentIsReadable() - { - return true; - } - - private Reader getReader(File f) - { - Reader reader = null; - try - { - reader = new FileReader(f); - } - catch (FileNotFoundException nfe) - { - cat.error("File Not Found Exception:" + f.toString(), nfe); - } - catch (IOException ioe) - { - cat.error(ioe.getMessage(), ioe); - } - return reader; - } -} diff --git a/sandbox/contributions/indyo/src/java/com/relevanz/indyo/contenthandler/ZIPHandler.java b/sandbox/contributions/indyo/src/java/com/relevanz/indyo/contenthandler/ZIPHandler.java deleted file mode 100644 index 44fa49531a3..00000000000 --- a/sandbox/contributions/indyo/src/java/com/relevanz/indyo/contenthandler/ZIPHandler.java +++ /dev/null @@ -1,133 +0,0 @@ -package com.relevanz.indyo.contenthandler; - -/* ==================================================================== - * The Apache Software License, Version 1.1 - * - * Copyright (c) 2001 The Apache Software Foundation. All rights - * reserved. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions - * are met: - * - * 1. Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * - * 2. Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in - * the documentation and/or other materials provided with the - * distribution. - * - * 3. The end-user documentation included with the redistribution, - * if any, must include the following acknowledgment: - * "This product includes software developed by the - * Apache Software Foundation (http://www.apache.org/)." - * Alternately, this acknowledgment may appear in the software itself, - * if and wherever such third-party acknowledgments normally appear. - * - * 4. The names "Apache" and "Apache Software Foundation" and - * "Apache Lucene" must not be used to endorse or promote products - * derived from this software without prior written permission. For - * written permission, please contact apache@apache.org. - * - * 5. Products derived from this software may not be called "Apache", - * "Apache Lucene", nor may "Apache" appear in their name, without - * prior written permission of the Apache Software Foundation. - * - * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESSED OR IMPLIED - * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES - * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE - * DISCLAIMED. IN NO EVENT SHALL THE APACHE SOFTWARE FOUNDATION OR - * ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, - * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT - * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF - * USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND - * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, - * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT - * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF - * SUCH DAMAGE. - * ==================================================================== - * - * This software consists of voluntary contributions made by many - * individuals on behalf of the Apache Software Foundation. For more - * information on the Apache Software Foundation, please see - * . - */ - -import org.apache.log4j.Category; -import com.relevanz.indyo.IndexDataSource; -import com.relevanz.indyo.FSDataSource; -import com.relevanz.indyo.util.IOUtils; - -import java.io.File; -import java.io.IOException; -import java.io.Reader; -import java.util.ArrayList; -import java.util.Enumeration; -import java.util.List; -import java.util.zip.ZipEntry; -import java.util.zip.ZipException; -import java.util.zip.ZipFile; - -/** - * Handles Zip files. - * - * @author Kelvin Tan - * @version $Id$ - */ -public class ZIPHandler extends NestedFileContentHandlerAdapter -{ - private static Category cat = Category.getInstance(ZIPHandler.class); - - public ZIPHandler(File file) - { - super(file); - } - - public boolean fileContentIsReadable() - { - return false; - } - - public Reader getReader() - { - return null; - } - - public List getNestedDataSource() - { - if (!file.exists()) - return null; - if (nestedDataSource == null) - { - nestedDataSource = new ArrayList(); - } - try - { - ZipFile zFile = new ZipFile(file); - for (Enumeration e = zFile.entries(); e.hasMoreElements();) - { - ZipEntry entry = (ZipEntry) e.nextElement(); - String entryName = entry.getName(); - IOUtils.writeToTempFile(zFile.getInputStream(entry), - TEMP_FOLDER + entryName); - if (!entry.isDirectory()) - { - // create a new DataMap for each zip entry - IndexDataSource ds = new FSDataSource(TEMP_FOLDER + entryName); - nestedDataSource.add(ds); - } - } - zFile.close(); - } - catch (ZipException ze) - { - cat.error("ZipException parsing zip:" + ze.getMessage(), ze); - } - catch (IOException ioe) - { - cat.error("IOException parsing zip:" + ioe.getMessage(), ioe); - } - return nestedDataSource; - } -} \ No newline at end of file diff --git a/sandbox/contributions/indyo/src/java/com/relevanz/indyo/util/DataUnformatFilter.java b/sandbox/contributions/indyo/src/java/com/relevanz/indyo/util/DataUnformatFilter.java deleted file mode 100644 index 1d0cb3eb161..00000000000 --- a/sandbox/contributions/indyo/src/java/com/relevanz/indyo/util/DataUnformatFilter.java +++ /dev/null @@ -1,312 +0,0 @@ -/*-- - - Copyright (C) 2000 Brett McLaughlin & Jason Hunter. - All rights reserved. - - Redistribution and use in source and binary forms, with or without - modification, are permitted provided that the following conditions - are met: - - 1. Redistributions of source code must retain the above copyright - notice, this list of conditions, and the following disclaimer. - - 2. Redistributions in binary form must reproduce the above copyright - notice, this list of conditions, and the disclaimer that follows - these conditions in the documentation and/or other materials - provided with the distribution. - - 3. The name "JDOM" must not be used to endorse or promote products - derived from this software without prior written permission. For - written permission, please contact license@jdom.org. - - 4. Products derived from this software may not be called "JDOM", nor - may "JDOM" appear in their name, without prior written permission - from the JDOM Project Management (pm@jdom.org). - - In addition, we request (but do not require) that you include in the - end-user documentation provided with the redistribution and/or in the - software itself an acknowledgement equivalent to the following: - "This product includes software developed by the - JDOM Project (http://www.jdom.org/)." - Alternatively, the acknowledgment may be graphical using the logos - available at http://www.jdom.org/images/logos. - - THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESSED OR IMPLIED - WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES - OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE - DISCLAIMED. IN NO EVENT SHALL THE JDOM AUTHORS OR THE PROJECT - CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, - SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT - LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF - USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND - ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, - OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT - OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF - SUCH DAMAGE. - - This software consists of voluntary contributions made by many - individuals on behalf of the JDOM Project and was originally - created by Brett McLaughlin and - Jason Hunter . For more information on the - JDOM Project, please see . - - */ -package com.relevanz.indyo.util; - -import java.util.Stack; - -import org.xml.sax.Attributes; -import org.xml.sax.SAXException; -import org.xml.sax.XMLReader; - - -/** - * Filter for removing formatting from data- or field-oriented XML. - * - * Code and comments adapted from DataWriter-0.2, written - * by David Megginson and released into the public domain, - * without warranty. - * - *

This filter removes leading and trailing whitespace from - * field-oriented XML without mixed content. Note that this class will - * likely not yield appropriate results for document-oriented XML like - * XHTML pages, which mix character data and elements together.

- * - * @see DataFormatFilter - */ -public class DataUnformatFilter extends XMLFilterBase -{ - - //////////////////////////////////////////////////////////////////// - // Constructors. - //////////////////////////////////////////////////////////////////// - - /** - * Create a new filter. - */ - public DataUnformatFilter() - { - } - - /** - * Create a new filter. - * - *

Use the XMLReader provided as the source of events.

- * - * @param xmlreader The parent in the filter chain. - */ - public DataUnformatFilter(XMLReader xmlreader) - { - super(xmlreader); - } - - //////////////////////////////////////////////////////////////////// - // Public methods. - //////////////////////////////////////////////////////////////////// - - /** - * Reset the filter so that it can be reused. - * - *

This method is especially useful if the filter failed - * with an exception the last time through.

- */ - public void reset () - { - state = SEEN_NOTHING; - stateStack = new Stack(); - whitespace = new StringBuffer(); - } - - /** - * Filter a start document event. - * - *

Reset state and pass the event on for further processing.

- * - * @exception org.xml.sax.SAXException If a filter - * further down the chain raises an exception. - * @see org.xml.sax.ContentHandler#startDocument - */ - public void startDocument () - throws SAXException - { - reset(); - super.startDocument(); - } - - /** - * Filter a start element event. - * - * @param uri The element's Namespace URI. - * @param localName The element's local name. - * @param qName The element's qualified (prefixed) name. - * @param atts The element's attribute list. - * @exception org.xml.sax.SAXException If a filter - * further down the chain raises an exception. - * @see org.xml.sax.ContentHandler#startElement - */ - public void startElement (String uri, String localName, - String qName, Attributes atts) - throws SAXException - { - clearWhitespace(); - stateStack.push(SEEN_ELEMENT); - state = SEEN_NOTHING; - super.startElement(uri, localName, qName, atts); - } - - /** - * Filter an end element event. - * - * @param uri The element's Namespace URI. - * @param localName The element's local name. - * @param qName The element's qualified (prefixed) name. - * @exception org.xml.sax.SAXException If a filter - * further down the chain raises an exception. - * @see org.xml.sax.ContentHandler#endElement - */ - public void endElement (String uri, String localName, String qName) - throws SAXException - { - if (state == SEEN_ELEMENT) { - clearWhitespace(); - } else { - emitWhitespace(); - } - state = stateStack.pop(); - super.endElement(uri, localName, qName); - } - - /** - * Filter a character data event. - * - * @param ch The characters to write. - * @param start The starting position in the array. - * @param length The number of characters to use. - * @exception org.xml.sax.SAXException If a filter - * further down the chain raises an exception. - * @see org.xml.sax.ContentHandler#characters - */ - public void characters (char ch[], int start, int length) - throws SAXException - { - if (state != SEEN_DATA) { - - /* Look for non-whitespace. */ - int end = start + length; - while (end-- > start) { - if (!isXMLWhitespace(ch[end])) - break; - } - - /* - * If all the characters are whitespace, save them for later. - * If we've got some data, emit any saved whitespace and update - * our state to show we've seen data. - */ - if (end < start) { - saveWhitespace(ch, start, length); - } else { - state = SEEN_DATA; - emitWhitespace(); - } - } - - /* Pass on everything inside a data field. */ - if (state == SEEN_DATA) { - super.characters(ch, start, length); - } - } - - /** - * Filter an ignorable whitespace event. - * - * @param ch The array of characters to write. - * @param start The starting position in the array. - * @param length The number of characters to write. - * @exception org.xml.sax.SAXException If a filter - * further down the chain raises an exception. - * @see org.xml.sax.ContentHandler#ignorableWhitespace - */ - public void ignorableWhitespace (char ch[], int start, int length) - throws SAXException - { - emitWhitespace(); - // ignore - } - - /** - * Filter a processing instruction event. - * - * @param target The PI target. - * @param data The PI data. - * @exception org.xml.sax.SAXException If a filter - * further down the chain raises an exception. - * @see org.xml.sax.ContentHandler#processingInstruction - */ - public void processingInstruction (String target, String data) - throws SAXException - { - emitWhitespace(); - super.processingInstruction(target, data); - } - - //////////////////////////////////////////////////////////////////// - // Internal methods. - //////////////////////////////////////////////////////////////////// - - /** - * Saves trailing whitespace. - */ - protected void saveWhitespace (char[] ch, int start, int length) { - whitespace.append(ch, start, length); - } - - /** - * Passes saved whitespace down the filter chain. - */ - protected void emitWhitespace () - throws SAXException - { - char[] data = new char[whitespace.length()]; - if (whitespace.length() > 0) { - whitespace.getChars(0, data.length, data, 0); - whitespace.setLength(0); - super.characters(data, 0, data.length); - } - } - - /** - * Discards saved whitespace. - */ - protected void clearWhitespace () { - whitespace.setLength(0); - } - - /** - * Returns true if character is XML whitespace. - */ - private boolean isXMLWhitespace (char c) - { - return c == ' ' || c == '\t' || c == '\r' || c == '\n'; - } - - //////////////////////////////////////////////////////////////////// - // Constants. - //////////////////////////////////////////////////////////////////// - - private static final Object SEEN_NOTHING = new Object(); - private static final Object SEEN_ELEMENT = new Object(); - private static final Object SEEN_DATA = new Object(); - - - //////////////////////////////////////////////////////////////////// - // Internal state. - //////////////////////////////////////////////////////////////////// - - private Object state = SEEN_NOTHING; - private Stack stateStack = new Stack(); - - private StringBuffer whitespace = new StringBuffer(); -} - -// end of DataUnformatFilter.java diff --git a/sandbox/contributions/indyo/src/java/com/relevanz/indyo/util/IOUtils.java b/sandbox/contributions/indyo/src/java/com/relevanz/indyo/util/IOUtils.java deleted file mode 100644 index ad4952ed21e..00000000000 --- a/sandbox/contributions/indyo/src/java/com/relevanz/indyo/util/IOUtils.java +++ /dev/null @@ -1,274 +0,0 @@ -package com.relevanz.indyo.util; - -/* ==================================================================== - * The Apache Software License, Version 1.1 - * - * Copyright (c) 2001 The Apache Software Foundation. All rights - * reserved. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions - * are met: - * - * 1. Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * - * 2. Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in - * the documentation and/or other materials provided with the - * distribution. - * - * 3. The end-user documentation included with the redistribution, - * if any, must include the following acknowledgment: - * "This product includes software developed by the - * Apache Software Foundation (http://www.apache.org/)." - * Alternately, this acknowledgment may appear in the software itself, - * if and wherever such third-party acknowledgments normally appear. - * - * 4. The names "Apache" and "Apache Software Foundation" and - * "Apache Lucene" must not be used to endorse or promote products - * derived from this software without prior written permission. For - * written permission, please contact apache@apache.org. - * - * 5. Products derived from this software may not be called "Apache", - * "Apache Lucene", nor may "Apache" appear in their name, without - * prior written permission of the Apache Software Foundation. - * - * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESSED OR IMPLIED - * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES - * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE - * DISCLAIMED. IN NO EVENT SHALL THE APACHE SOFTWARE FOUNDATION OR - * ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, - * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT - * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF - * USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND - * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, - * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT - * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF - * SUCH DAMAGE. - * ==================================================================== - * - * This software consists of voluntary contributions made by many - * individuals on behalf of the Apache Software Foundation. For more - * information on the Apache Software Foundation, please see - * . - */ - -import com.ice.tar.TarArchive; -import org.apache.log4j.Category; - -import java.io.*; -import java.util.zip.GZIPInputStream; -import java.util.zip.ZipEntry; -import java.util.zip.ZipOutputStream; - -/** - * Utility IO-related methods. - * - * @author Kelvin Tan - * @version $Id$ - */ -public final class IOUtils -{ - /** - * Log4j category. - */ - private static Category cat = Category.getInstance(IOUtils.class.getName()); - - /** - * Writes data from the inputstream to the outputstream. - * - * @param in InputStream to read from. - * @param out OutputStream to write to. - * @throws IOException I/O error. - */ - public static void transferData(InputStream in, OutputStream out) - throws IOException - { - byte[] data = new byte[10000]; - int len; - while ((len = in.read(data)) != -1) - { - out.write(data, 0, len); - } - } - - /** - * Recursively deletes a directory. - * @param File Directory to delete. - */ - public static void deleteDirectory(File directory) - { - File[] fArray = directory.listFiles(); - for (int i = 0; i < fArray.length; i++) - { - if (fArray[i].isDirectory()) - { - deleteDirectory(fArray[i]); - } - fArray[i].delete(); - } - directory.delete(); - } - - /** - * Writes an input stream to a temporary file which is set - * to delete when the VM exits. - * @param Inputstream to read data from - * @param Temporary file to write to - */ - public static void writeToTempFile(InputStream in, String tempfile) - throws IOException - { - OutputStream out = null; - try - { - File f = new File(tempfile); - f.deleteOnExit(); - char lastChar = tempfile.charAt(tempfile.length() - 1); - // make no assumptions that java.io.File detects directories - // in a cross-platform manner - if (f.isDirectory() || lastChar == '\\' || lastChar == '/') - f.mkdirs(); - else - { - // ensure that all necessary directories are created - File parent = f.getParentFile(); - parent.deleteOnExit(); - parent.mkdirs(); - out = new FileOutputStream(tempfile); - transferData(in, out); - } - } - finally - { - if (out != null) - out.close(); - } - } - - /** - * Writes an file to a ZipOutputStream. - * @param File to read data from - * @param Path of the ZipEntry - * @param ZipOutputStream to write to - */ - public static void addToZipOutputStream(String file, - String zipPath, - ZipOutputStream out) - throws FileNotFoundException, IOException - { - File f = new File(file); - byte[] buffer = new byte[8192]; // Create a buffer for copying - int bytes_read; - FileInputStream in = null; - try - { - in = new FileInputStream(f); // Stream to read file - ZipEntry entry = new ZipEntry(zipPath); // Make a ZipEntry - out.putNextEntry(entry); // Store entry in zipfile - while ((bytes_read = in.read(buffer)) != -1) // Copy bytes to zipfile - out.write(buffer, 0, bytes_read); - } - finally - { - if (in != null) - in.close(); // Close input stream - } - } - - /** - * Extracts a tar file to a directory. - * @param Tar file to read data from - * @param Directory to write to - */ - public static void extractTar(File tarFile, File destDir) - throws IOException - { - FileInputStream fis = null; - try - { - fis = new FileInputStream(tarFile); - TarArchive ta = new TarArchive(fis); - ta.extractContents(destDir); - ta.closeArchive(); - } - finally - { - if (fis != null) - fis.close(); - } - } - - /** - * Extracts a GZip file to a file. - * @param GZip file to read data from - * @param File to write to - */ - public static void extractGZip(File f, File destFile) throws IOException - { - FileOutputStream out = null; - FileInputStream fis = null; - GZIPInputStream gzin = null; - try - { - out = new FileOutputStream(destFile); - fis = new FileInputStream(f); - gzin = new GZIPInputStream(fis); - byte[] data = new byte[10000]; - int len; - while ((len = gzin.read(data)) != -1) - { - out.write(data, 0, len); - } - out.flush(); - } - finally - { - if (gzin != null) - gzin.close(); - if (out != null) - out.close(); - if (fis != null) - fis.close(); - } - } - - /** - * reads all bytes from the given stream - * @param is the stream to read from - */ - public static final byte[] loadBytes(InputStream is) throws IOException - { - // read in the entry data - int count = 0; - byte[] buffer = new byte[0]; - byte[] chunk = new byte[4096]; - while ((count = is.read(chunk)) >= 0) - { - byte[] t = new byte[buffer.length + count]; - System.arraycopy(buffer, 0, t, 0, buffer.length); - System.arraycopy(chunk, 0, t, buffer.length, count); - buffer = t; - } - return buffer; - } - - /** Returns the file extension of a file. - * @param filename Filename to obtain the file extension. - * @return File extension (without the "."). - */ - public static String getFileExtension(String filename) - { - return filename.substring(filename.lastIndexOf(".") + 1); // + 1 to remove the "." - } - - /** Returns the file extension of a file. - * @param f File object to obtain the file extension. - * @return File extension (without the "."). - */ - public static String getFileExtension(File f) - { - return getFileExtension(f.getName()); - } -} diff --git a/sandbox/contributions/indyo/src/java/com/relevanz/indyo/util/StringUtils.java b/sandbox/contributions/indyo/src/java/com/relevanz/indyo/util/StringUtils.java deleted file mode 100644 index 2a0a458c8c2..00000000000 --- a/sandbox/contributions/indyo/src/java/com/relevanz/indyo/util/StringUtils.java +++ /dev/null @@ -1,93 +0,0 @@ -package com.relevanz.indyo.util; - -/* ==================================================================== - * The Apache Software License, Version 1.1 - * - * Copyright (c) 2001 The Apache Software Foundation. All rights - * reserved. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions - * are met: - * - * 1. Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * - * 2. Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in - * the documentation and/or other materials provided with the - * distribution. - * - * 3. The end-user documentation included with the redistribution, - * if any, must include the following acknowledgment: - * "This product includes software developed by the - * Apache Software Foundation (http://www.apache.org/)." - * Alternately, this acknowledgment may appear in the software itself, - * if and wherever such third-party acknowledgments normally appear. - * - * 4. The names "Apache" and "Apache Software Foundation" and - * "Apache Lucene" must not be used to endorse or promote products - * derived from this software without prior written permission. For - * written permission, please contact apache@apache.org. - * - * 5. Products derived from this software may not be called "Apache", - * "Apache Lucene", nor may "Apache" appear in their name, without - * prior written permission of the Apache Software Foundation. - * - * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESSED OR IMPLIED - * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES - * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE - * DISCLAIMED. IN NO EVENT SHALL THE APACHE SOFTWARE FOUNDATION OR - * ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, - * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT - * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF - * USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND - * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, - * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT - * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF - * SUCH DAMAGE. - * ==================================================================== - * - * This software consists of voluntary contributions made by many - * individuals on behalf of the Apache Software Foundation. For more - * information on the Apache Software Foundation, please see - * . - */ - -import org.apache.oro.text.perl.Perl5Util; - -/** - * Utility String-related methods. - * - * @author Kelvin Tan - * @version $Id$ - */ -public final class StringUtils -{ - public static final String EMPTY_STRING = ""; - private static final char[] QUOTE_ENCODE = """.toCharArray(); - private static final char[] AMP_ENCODE = "&".toCharArray(); - private static final char[] LT_ENCODE = "<".toCharArray(); - private static final char[] GT_ENCODE = ">".toCharArray(); - private static final char[] APOS_ENCODE = "'".toCharArray(); - // Create a regular expression engine - private static Perl5Util perl5Util = new Perl5Util(); - - public static final String removeUnreadableCharacters(String s) - { - if (perl5Util.match("/\\W+/", s)) - { - // replace unreadable characters with a space - s = perl5Util.substitute("s#[^a-zA-Z0-9_@]+# #gm", s); - // remove any single/double word characters - s = perl5Util.substitute("s#\\b[a-zA-Z0-9_]{1,2}\\b##gm", s); - } - return trimWhitespace(s); - } - - public static final String trimWhitespace(String s) - { - s = perl5Util.substitute("s#[\\s]{3,}# #m", s); - return s; - } -} diff --git a/sandbox/contributions/indyo/src/java/com/relevanz/indyo/util/XMLFilterBase.java b/sandbox/contributions/indyo/src/java/com/relevanz/indyo/util/XMLFilterBase.java deleted file mode 100644 index 11b487e6b94..00000000000 --- a/sandbox/contributions/indyo/src/java/com/relevanz/indyo/util/XMLFilterBase.java +++ /dev/null @@ -1,404 +0,0 @@ -/*-- - - Copyright (C) 2000 Brett McLaughlin & Jason Hunter. - All rights reserved. - - Redistribution and use in source and binary forms, with or without - modification, are permitted provided that the following conditions - are met: - - 1. Redistributions of source code must retain the above copyright - notice, this list of conditions, and the following disclaimer. - - 2. Redistributions in binary form must reproduce the above copyright - notice, this list of conditions, and the disclaimer that follows - these conditions in the documentation and/or other materials - provided with the distribution. - - 3. The name "JDOM" must not be used to endorse or promote products - derived from this software without prior written permission. For - written permission, please contact license@jdom.org. - - 4. Products derived from this software may not be called "JDOM", nor - may "JDOM" appear in their name, without prior written permission - from the JDOM Project Management (pm@jdom.org). - - In addition, we request (but do not require) that you include in the - end-user documentation provided with the redistribution and/or in the - software itself an acknowledgement equivalent to the following: - "This product includes software developed by the - JDOM Project (http://www.jdom.org/)." - Alternatively, the acknowledgment may be graphical using the logos - available at http://www.jdom.org/images/logos. - - THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESSED OR IMPLIED - WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES - OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE - DISCLAIMED. IN NO EVENT SHALL THE JDOM AUTHORS OR THE PROJECT - CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, - SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT - LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF - USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND - ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, - OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT - OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF - SUCH DAMAGE. - - This software consists of voluntary contributions made by many - individuals on behalf of the JDOM Project and was originally - created by Brett McLaughlin and - Jason Hunter . For more information on the - JDOM Project, please see . - - */ -package com.relevanz.indyo.util; - -import java.io.IOException; - -import org.xml.sax.Attributes; -import org.xml.sax.InputSource; -import org.xml.sax.SAXException; -import org.xml.sax.SAXNotRecognizedException; -import org.xml.sax.SAXNotSupportedException; -import org.xml.sax.XMLReader; -import org.xml.sax.ext.LexicalHandler; -import org.xml.sax.helpers.AttributesImpl; -import org.xml.sax.helpers.XMLFilterImpl; - -/** - * Adds convenience methods to base SAX2 Filter implementation. - * - * Code and comments adapted from XMLWriter-0.2, written - * by David Megginson and released into the public domain, - * without warranty. - * - *

The convenience methods are provided so that clients do not have to - * create empty attribute lists or provide empty strings as parameters; - * for example, the method invocation

- * - *
- * w.startElement("foo");
- * 
- * - *

is equivalent to the regular SAX2 ContentHandler method

- * - *
- * w.startElement("", "foo", "", new AttributesImpl());
- * 
- * - *

Except that it is more efficient because it does not allocate - * a new empty attribute list each time.

- * - *

In fact, there is an even simpler convenience method, - * dataElement, designed for writing elements that - * contain only character data.

- * - *
- * w.dataElement("greeting", "Hello, world!");
- * 
- * - *

is equivalent to

- * - *
- * w.startElement("greeting");
- * w.characters("Hello, world!");
- * w.endElement("greeting");
- * 
- * - * @see org.xml.sax.helpers.XMLFilterImpl - */ -class XMLFilterBase extends XMLFilterImpl -{ - - //////////////////////////////////////////////////////////////////// - // Constructors. - //////////////////////////////////////////////////////////////////// - - /** - * Construct an XML filter with no parent. - * - *

This filter will have no parent: you must assign a parent - * before you start a parse or do any configuration with - * setFeature or setProperty.

- * - * @see org.xml.sax.XMLReader#setFeature - * @see org.xml.sax.XMLReader#setProperty - */ - public XMLFilterBase() - { - } - - /** - * Create an XML filter with the specified parent. - * - *

Use the XMLReader provided as the source of events.

- * - * @param xmlreader The parent in the filter chain. - */ - public XMLFilterBase(XMLReader parent) - { - super(parent); - } - - //////////////////////////////////////////////////////////////////// - // Convenience methods. - //////////////////////////////////////////////////////////////////// - - /** - * Start a new element without a qname or attributes. - * - *

This method will provide a default empty attribute - * list and an empty string for the qualified name. - * It invokes {@link - * #startElement(String, String, String, Attributes)} - * directly.

- * - * @param uri The element's Namespace URI. - * @param localName The element's local name. - * @exception org.xml.sax.SAXException If a filter - * further down the chain raises an exception. - * @see org.xml.sax.ContentHandler#startElement - */ - public void startElement (String uri, String localName) throws SAXException - { - startElement(uri, localName, "", EMPTY_ATTS); - } - - /** - * Start a new element without a qname, attributes or a Namespace URI. - * - *

This method will provide an empty string for the - * Namespace URI, and empty string for the qualified name, - * and a default empty attribute list. It invokes - * #startElement(String, String, String, Attributes)} - * directly.

- * - * @param localName The element's local name. - * @exception org.xml.sax.SAXException If a filter - * further down the chain raises an exception. - * @see org.xml.sax.ContentHandler#startElement - */ - public void startElement (String localName) throws SAXException - { - startElement("", localName, "", EMPTY_ATTS); - } - - /** - * End an element without a qname. - * - *

This method will supply an empty string for the qName. - * It invokes {@link #endElement(String, String, String)} - * directly.

- * - * @param uri The element's Namespace URI. - * @param localName The element's local name. - * @exception org.xml.sax.SAXException If a filter - * further down the chain raises an exception. - * @see org.xml.sax.ContentHandler#endElement - */ - public void endElement (String uri, String localName) throws SAXException - { - endElement(uri, localName, ""); - } - - /** - * End an element without a Namespace URI or qname. - * - *

This method will supply an empty string for the qName - * and an empty string for the Namespace URI. - * It invokes {@link #endElement(String, String, String)} - * directly.

- * - * @param localName The element's local name. - * @exception org.xml.sax.SAXException If a filter - * further down the chain raises an exception. - * @see org.xml.sax.ContentHandler#endElement - */ - public void endElement (String localName) throws SAXException - { - endElement("", localName, ""); - } - - /** - * Add an empty element. - * - * Both a {@link #startElement startElement} and an - * {@link #endElement endElement} event will be passed on down - * the filter chain. - * - * @param uri The element's Namespace URI, or the empty string - * if the element has no Namespace or if Namespace - * processing is not being performed. - * @param localName The element's local name (without prefix). This - * parameter must be provided. - * @param qName The element's qualified name (with prefix), or - * the empty string if none is available. This parameter - * is strictly advisory: the writer may or may not use - * the prefix attached. - * @param atts The element's attribute list. - * @exception org.xml.sax.SAXException If a filter - * further down the chain raises an exception. - * @see org.xml.sax.ContentHandler#startElement - * @see org.xml.sax.ContentHandler#endElement - */ - public void emptyElement (String uri, String localName, String qName, - Attributes atts) throws SAXException - { - startElement(uri, localName, qName, atts); - endElement(uri, localName, qName); - } - - /** - * Add an empty element without a qname or attributes. - * - *

This method will supply an empty string for the qname - * and an empty attribute list. It invokes - * {@link #emptyElement(String, String, String, Attributes)} - * directly.

- * - * @param uri The element's Namespace URI. - * @param localName The element's local name. - * @exception org.xml.sax.SAXException If a filter - * further down the chain raises an exception. - * @see #emptyElement(String, String, String, Attributes) - */ - public void emptyElement (String uri, String localName) throws SAXException - { - emptyElement(uri, localName, "", EMPTY_ATTS); - } - - /** - * Add an empty element without a Namespace URI, qname or attributes. - * - *

This method will supply an empty string for the qname, - * and empty string for the Namespace URI, and an empty - * attribute list. It invokes - * {@link #emptyElement(String, String, String, Attributes)} - * directly.

- * - * @param localName The element's local name. - * @exception org.xml.sax.SAXException If a filter - * further down the chain raises an exception. - * @see #emptyElement(String, String, String, Attributes) - */ - public void emptyElement (String localName) throws SAXException - { - emptyElement("", localName, "", EMPTY_ATTS); - } - - /** - * Add an element with character data content. - * - *

This is a convenience method to add a complete element - * with character data content, including the start tag - * and end tag.

- * - *

This method invokes - * {@link @see org.xml.sax.ContentHandler#startElement}, - * followed by - * {@link #characters(String)}, followed by - * {@link @see org.xml.sax.ContentHandler#endElement}.

- * - * @param uri The element's Namespace URI. - * @param localName The element's local name. - * @param qName The element's default qualified name. - * @param atts The element's attributes. - * @param content The character data content. - * @exception org.xml.sax.SAXException If a filter - * further down the chain raises an exception. - * @see org.xml.sax.ContentHandler#startElement - * @see #characters(String) - * @see org.xml.sax.ContentHandler#endElement - */ - public void dataElement (String uri, String localName, String qName, - Attributes atts, String content) throws SAXException - { - startElement(uri, localName, qName, atts); - characters(content); - endElement(uri, localName, qName); - } - - /** - * Add an element with character data content but no attributes. - * - *

This is a convenience method to add a complete element - * with character data content, including the start tag - * and end tag. This method provides an empty string - * for the qname and an empty attribute list.

- * - *

This method invokes - * {@link @see org.xml.sax.ContentHandler#startElement}, - * followed by - * {@link #characters(String)}, followed by - * {@link @see org.xml.sax.ContentHandler#endElement}.

- * - * @param uri The element's Namespace URI. - * @param localName The element's local name. - * @param content The character data content. - * @exception org.xml.sax.SAXException If a filter - * further down the chain raises an exception. - * @see org.xml.sax.ContentHandler#startElement - * @see #characters(String) - * @see org.xml.sax.ContentHandler#endElement - */ - public void dataElement (String uri, String localName, String content) - throws SAXException - { - dataElement(uri, localName, "", EMPTY_ATTS, content); - } - - /** - * Add an element with character data content but no attributes or - * Namespace URI. - * - *

This is a convenience method to add a complete element - * with character data content, including the start tag - * and end tag. The method provides an empty string for the - * Namespace URI, and empty string for the qualified name, - * and an empty attribute list.

- * - *

This method invokes - * {@link @see org.xml.sax.ContentHandler#startElement}, - * followed by - * {@link #characters(String)}, followed by - * {@link @see org.xml.sax.ContentHandler#endElement}.

- * - * @param localName The element's local name. - * @param content The character data content. - * @exception org.xml.sax.SAXException If a filter - * further down the chain raises an exception. - * @see org.xml.sax.ContentHandler#startElement - * @see #characters(String) - * @see org.xml.sax.ContentHandler#endElement - */ - public void dataElement (String localName, String content) - throws SAXException - { - dataElement("", localName, "", EMPTY_ATTS, content); - } - - /** - * Add a string of character data, with XML escaping. - * - *

This is a convenience method that takes an XML - * String, converts it to a character array, then invokes - * {@link @see org.xml.sax.ContentHandler#characters}.

- * - * @param data The character data. - * @exception org.xml.sax.SAXException If a filter - * further down the chain raises an exception. - * @see @see org.xml.sax.ContentHandler#characters - */ - public void characters (String data) throws SAXException - { - char ch[] = data.toCharArray(); - characters(ch, 0, ch.length); - } - - //////////////////////////////////////////////////////////////////// - // Constants. - //////////////////////////////////////////////////////////////////// - protected static final Attributes EMPTY_ATTS = new AttributesImpl(); -} - -// end of XMLFilterBase.java