diff --git a/sandbox/contributions/indyo/lib/jakarta-oro-2.0.6.jar b/sandbox/contributions/indyo/lib/jakarta-oro-2.0.6.jar
deleted file mode 100644
index c98f821de9f..00000000000
--- a/sandbox/contributions/indyo/lib/jakarta-oro-2.0.6.jar
+++ /dev/null
@@ -1,2 +0,0 @@
-AnyObjectId[346504c6d4bd7232f0776a4a0f8a32333cedd93e] was removed in git history.
-Apache SVN contains full history.
\ No newline at end of file
diff --git a/sandbox/contributions/indyo/lib/jdom.jar b/sandbox/contributions/indyo/lib/jdom.jar
deleted file mode 100644
index 9b20ebce98f..00000000000
--- a/sandbox/contributions/indyo/lib/jdom.jar
+++ /dev/null
@@ -1,2 +0,0 @@
-AnyObjectId[93e77a4a4476afff71a110dda1e96465cb7f25a9] was removed in git history.
-Apache SVN contains full history.
\ No newline at end of file
diff --git a/sandbox/contributions/indyo/lib/log4j-1.2.6.jar b/sandbox/contributions/indyo/lib/log4j-1.2.6.jar
deleted file mode 100644
index feed73c51ab..00000000000
--- a/sandbox/contributions/indyo/lib/log4j-1.2.6.jar
+++ /dev/null
@@ -1,2 +0,0 @@
-AnyObjectId[be4a9176c35a7feeecf5b70edf070ecb5d13ac5d] was removed in git history.
-Apache SVN contains full history.
\ No newline at end of file
diff --git a/sandbox/contributions/indyo/lib/lucene-1.2.jar b/sandbox/contributions/indyo/lib/lucene-1.2.jar
deleted file mode 100644
index ab9c261bc40..00000000000
--- a/sandbox/contributions/indyo/lib/lucene-1.2.jar
+++ /dev/null
@@ -1,2 +0,0 @@
-AnyObjectId[ff9b90061b65c32122fcdde27bfe7f1e61fbd7bd] was removed in git history.
-Apache SVN contains full history.
\ No newline at end of file
diff --git a/sandbox/contributions/indyo/lib/tartool.jar b/sandbox/contributions/indyo/lib/tartool.jar
deleted file mode 100644
index 97cc1163c90..00000000000
--- a/sandbox/contributions/indyo/lib/tartool.jar
+++ /dev/null
@@ -1,2 +0,0 @@
-AnyObjectId[329aef393bece9d77eef16279910f6cd73113c39] was removed in git history.
-Apache SVN contains full history.
\ No newline at end of file
diff --git a/sandbox/contributions/indyo/lib/xmlParserAPIs-xerces-2.0.2.jar b/sandbox/contributions/indyo/lib/xmlParserAPIs-xerces-2.0.2.jar
deleted file mode 100644
index 2cd620815af..00000000000
--- a/sandbox/contributions/indyo/lib/xmlParserAPIs-xerces-2.0.2.jar
+++ /dev/null
@@ -1,2 +0,0 @@
-AnyObjectId[c1fa1d645474eee07f085a8ee29e38422f7614cf] was removed in git history.
-Apache SVN contains full history.
\ No newline at end of file
diff --git a/sandbox/contributions/indyo/src/conf/default.config.xml b/sandbox/contributions/indyo/src/conf/default.config.xml
deleted file mode 100644
index effd3e9fc9f..00000000000
--- a/sandbox/contributions/indyo/src/conf/default.config.xml
+++ /dev/null
@@ -1,19 +0,0 @@
-
-
- * A document is the atomic unit used for indexing purposes. It consists of - * metadata as well as its file contents. File contents are handled by - * {@link ContentHandler}. - *
- *- * DocumentHandler creates the {@link org.apache.lucene.document.Document}, - * adds fields to it, delegates to {@link ContentHandler} to handle - * file contents. - *
- * - * @version $Id$ - */ -public class DocumentHandler -{ - /** - * Field to retrieve all documents. - */ - public static final String ALL_DOCUMENTS_FIELD = "AllDocuments"; - - private static Logger log = Logger.getLogger(DocumentHandler.class); - - private static boolean isDebugEnabled = log.isDebugEnabled(); - - /** - * Should parent documents include data of its children? - */ - private static boolean parentEncapsulation = false; - /** - * Document object this DocumentHandler is handling. - */ - private Document doc; - - /** - * Map of metadata for this document. Contains the field:value pair - * to be added to the document. - */ - private Map metadata; - - /** - * Map of fields. Contains field:type_of_field pair. - */ - private Map customFields; - - /** - * IndexWriter. - */ - private IndexWriter writer; - - /** - * A collection of documents to be added to the writer. - */ - private List documents = new ArrayList(); - - /** - * Ctor. - * - * @param Map of metadata for this document. - * @param Map of fields. - * @param Writer. - */ - public DocumentHandler(Map metadata, - Map customFields, - IndexWriter writer) - { - this.metadata = metadata; - this.customFields = customFields; - this.writer = writer; - } - - /** - * Handles the actual processing of the document. - */ - public void process() throws IOException, Exception - { - String objectid = (String) metadata.get(IndexDataSource.OBJECT_IDENTIFIER); - if (objectid == null) - return; - doc = createDocument(); - addMapToDoc(metadata); - addNestedDataSource(metadata); - doc.add(Field.Text(ALL_DOCUMENTS_FIELD, ALL_DOCUMENTS_FIELD)); - //documents.add(doc); - if (writer != null) - { - addToWriter(); - } - else - { - documents.add(doc); - } - } - - private List getDocuments() - { - return documents; - } - - private Document createDocument() - { - return new Document(); - } - - /** - * Add the contents of a Map to a document. - * - * @param Map to add. - */ - private void addMapToDoc(Map map) - { - for (Iterator it = map.keySet().iterator(); it.hasNext();) - { - String field = (String) it.next(); - Object value = map.get(field); - if (value instanceof String) - { - String type = null; - if (customFields != null) - { - type = (String) customFields.get(field); - } - addFieldToDoc(type, field, (String) value); - } - else if (value instanceof Reader) - { - addFieldToDoc(field, (Reader) value); - } - } - } - - /** - * Add nested datasources. - * - * @param Map which contains the nested datasources. - */ - private void addNestedDataSource(Map map) throws Exception - { - Object o = map.get(IndexDataSource.NESTED_DATASOURCE); - if (o == null) - return; - if (o instanceof IndexDataSource) - { - IndexDataSource ds = (IndexDataSource) o; - addDataSource(ds); - } - else if (o instanceof List) - { - List nestedDataSource = (List) o; - for (int i = 0, n = nestedDataSource.size(); i < n; i++) - { - IndexDataSource ds = (IndexDataSource) nestedDataSource.get(i); - addDataSource(ds); - } - } - else if (o instanceof IndexDataSource[]) - { - IndexDataSource[] nestedDataSource = (IndexDataSource[]) o; - for (int i = 0, n = nestedDataSource.length; i < n; i++) - { - IndexDataSource ds = (IndexDataSource) nestedDataSource[i]; - addDataSource(ds); - } - } - else - { - log.warn("Unknown object found as nested datasource:" + o); - } - } - - /** - * Datasources are basically a collection of data maps to be indexed. - * addMapToDoc is invoked for each map. - * - * @param Datasource to add. - */ - private void addDataSource(IndexDataSource ds) throws Exception - { - Map[] data = ds.getData(); - for (int i = 0; i < data.length; i++) - { - Map map = data[i]; - if (map.containsKey(IndexDataSource.OBJECT_IDENTIFIER)) - { - /** - * Create a new document because child datasources may need - * to be retrieved independently of parent doc. - */ - DocumentHandler docHandler = new DocumentHandler(map, null, null); - docHandler.process(); - documents.addAll(docHandler.getDocuments()); - } - else - { - addMapToDoc(map); - /** - * Add nested datasources of this datasource's data - */ - addNestedDataSource(map); - } - } - } - - /** - * Adds a String-based field to a document. - * - * @param Type of field. - * @param Name of field. - * @param Value of field. - */ - private void addFieldToDoc(String type, String field, String value) - { - if (value == null) - value = StringUtils.EMPTY_STRING; - if (SearchConfiguration.KEYWORD_FIELD_TYPE.equalsIgnoreCase(type)) - doc.add(Field.Keyword(field, value)); - else if (SearchConfiguration.UNINDEXED_FIELD_TYPE.equalsIgnoreCase(type)) - doc.add(Field.UnIndexed(field, value)); - else if (SearchConfiguration.UNSTORED_FIELD_TYPE.equalsIgnoreCase(type)) - doc.add(Field.UnStored(field, value)); - else - doc.add(Field.Text(field, value)); - } - - /** - * Adds a Reader-based field to a document. - * - * @param Name of field. - * @param Reader. - */ - private void addFieldToDoc(String field, Reader reader) - { - doc.add(Field.Text(field, reader)); - } - - /** - * Adds documents to the IndexWriter. - */ - private void addToWriter() throws IOException - { - if (parentEncapsulation) - { - for (int i = 0, n = documents.size(); i < n; i++) - { - Document d = (Document) documents.get(i); - for (Enumeration e = d.fields(); e.hasMoreElements();) - { - Field f = (Field) e.nextElement(); - String fieldName = f.name(); - if (!fieldName.equals(IndexDataSource.CONTAINER_IDENTIFIER) - && !fieldName.equals(IndexDataSource.OBJECT_CLASS) - && !fieldName.equals(IndexDataSource.OBJECT_IDENTIFIER)) - { - doc.add(f); - } - } - } - } - writer.addDocument(doc); - - for (int i = 0, n = documents.size(); i < n; i++) - { - writer.addDocument((Document) documents.get(i)); - } - } -} \ No newline at end of file diff --git a/sandbox/contributions/indyo/src/java/com/relevanz/indyo/FSDataSource.java b/sandbox/contributions/indyo/src/java/com/relevanz/indyo/FSDataSource.java deleted file mode 100644 index c1efba9dd88..00000000000 --- a/sandbox/contributions/indyo/src/java/com/relevanz/indyo/FSDataSource.java +++ /dev/null @@ -1,160 +0,0 @@ -package com.relevanz.indyo; - -/* ==================================================================== - * The Apache Software License, Version 1.1 - * - * Copyright (c) 2001 The Apache Software Foundation. All rights - * reserved. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions - * are met: - * - * 1. Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * - * 2. Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in - * the documentation and/or other materials provided with the - * distribution. - * - * 3. The end-user documentation included with the redistribution, - * if any, must include the following acknowledgment: - * "This product includes software developed by the - * Apache Software Foundation (http://www.apache.org/)." - * Alternately, this acknowledgment may appear in the software itself, - * if and wherever such third-party acknowledgments normally appear. - * - * 4. The names "Apache" and "Apache Software Foundation" and - * "Apache Lucene" must not be used to endorse or promote products - * derived from this software without prior written permission. For - * written permission, please contact apache@apache.org. - * - * 5. Products derived from this software may not be called "Apache", - * "Apache Lucene", nor may "Apache" appear in their name, without - * prior written permission of the Apache Software Foundation. - * - * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESSED OR IMPLIED - * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES - * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE - * DISCLAIMED. IN NO EVENT SHALL THE APACHE SOFTWARE FOUNDATION OR - * ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, - * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT - * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF - * USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND - * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, - * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT - * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF - * SUCH DAMAGE. - * ==================================================================== - * - * This software consists of voluntary contributions made by many - * individuals on behalf of the Apache Software Foundation. For more - * information on the Apache Software Foundation, please see - *- * SearchIndexer is responsible for creating the IndexWriter - * {@see org.apache.lucene.index.IndexWriter} and passing it to - * DocumentHandlers {@link DocumentHandler} to index individual documents. - *
- * - * @version $Id$ - */ -public class IndyoIndexer -{ - private static Logger log = Logger.getLogger(IndyoIndexer.class); - private IndexWriter fsWriter; - private SearchConfiguration config; - - public IndyoIndexer(String indexDirectory, String configFile) - throws IOException, IllegalConfigurationException - { - Analyzer a = new StandardAnalyzer(); - fsWriter = new IndexWriter(indexDirectory, a, true); - fsWriter.maxFieldLength = 1000000; - loadConfig(configFile); - } - - /** - * Indexes documents. - */ - public synchronized void index(IndexDataSource ds) throws IOException, Exception - { - log.debug("Initiating search engine indexing..."); - long start = System.currentTimeMillis(); - // temporarily use an empty map whilst custom fields get implemented - indexDataSource(ds, Collections.EMPTY_MAP); - fsWriter.optimize(); - fsWriter.close(); - long stop = System.currentTimeMillis(); - log.debug("Indexing took " + (stop - start) + " milliseconds"); - } - - private void loadConfig(String configFile) throws IllegalConfigurationException - { - config = new SearchConfiguration(configFile); - FileContentHandlerFactory.setHandlerRegistry(config.getContentHandlers()); - } - - private void indexDataSource(IndexDataSource source, Map customFields) - throws Exception - { - Map[] data = source.getData(); - // here's a good place to spawn a couple of threads for indexing - for (int i = 0; i < data.length; i++) - { - DocumentHandler docHandler = - new DocumentHandler(data[i], customFields, fsWriter); - docHandler.process(); - } - } -} diff --git a/sandbox/contributions/indyo/src/java/com/relevanz/indyo/SearchConfiguration.java b/sandbox/contributions/indyo/src/java/com/relevanz/indyo/SearchConfiguration.java deleted file mode 100644 index 4f088a04b42..00000000000 --- a/sandbox/contributions/indyo/src/java/com/relevanz/indyo/SearchConfiguration.java +++ /dev/null @@ -1,259 +0,0 @@ -package com.relevanz.indyo; - -/* ==================================================================== - * The Apache Software License, Version 1.1 - * - * Copyright (c) 2001 The Apache Software Foundation. All rights - * reserved. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions - * are met: - * - * 1. Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * - * 2. Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in - * the documentation and/or other materials provided with the - * distribution. - * - * 3. The end-user documentation included with the redistribution, - * if any, must include the following acknowledgment: - * "This product includes software developed by the - * Apache Software Foundation (http://www.apache.org/)." - * Alternately, this acknowledgment may appear in the software itself, - * if and wherever such third-party acknowledgments normally appear. - * - * 4. The names "Apache" and "Apache Software Foundation" and - * "Apache Lucene" must not be used to endorse or promote products - * derived from this software without prior written permission. For - * written permission, please contact apache@apache.org. - * - * 5. Products derived from this software may not be called "Apache", - * "Apache Lucene", nor may "Apache" appear in their name, without - * prior written permission of the Apache Software Foundation. - * - * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESSED OR IMPLIED - * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES - * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE - * DISCLAIMED. IN NO EVENT SHALL THE APACHE SOFTWARE FOUNDATION OR - * ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, - * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT - * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF - * USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND - * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, - * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT - * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF - * SUCH DAMAGE. - * ==================================================================== - * - * This software consists of voluntary contributions made by many - * individuals on behalf of the Apache Software Foundation. For more - * information on the Apache Software Foundation, please see - *- * Classes which need to implement the FileContentHandler interface should - * extend this class or {@link NestedFileContentHandlerAdapter}. - *
- * - * @author Kelvin Tan - * @version $Id$ - */ -public abstract class FileContentHandlerAdapter implements FileContentHandler -{ - protected File file; - - protected FileContentHandlerAdapter(File file) - { - this.file = file; - } - - public Reader getReader() - { - return null; - } - - public List getNestedDataSource() - { - return null; - } -} diff --git a/sandbox/contributions/indyo/src/java/com/relevanz/indyo/contenthandler/FileContentHandlerFactory.java b/sandbox/contributions/indyo/src/java/com/relevanz/indyo/contenthandler/FileContentHandlerFactory.java deleted file mode 100644 index 707d99ee550..00000000000 --- a/sandbox/contributions/indyo/src/java/com/relevanz/indyo/contenthandler/FileContentHandlerFactory.java +++ /dev/null @@ -1,180 +0,0 @@ -package com.relevanz.indyo.contenthandler; - -/* ==================================================================== - * The Apache Software License, Version 1.1 - * - * Copyright (c) 2001 The Apache Software Foundation. All rights - * reserved. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions - * are met: - * - * 1. Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * - * 2. Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in - * the documentation and/or other materials provided with the - * distribution. - * - * 3. The end-user documentation included with the redistribution, - * if any, must include the following acknowledgment: - * "This product includes software developed by the - * Apache Software Foundation (http://www.apache.org/)." - * Alternately, this acknowledgment may appear in the software itself, - * if and wherever such third-party acknowledgments normally appear. - * - * 4. The names "Apache" and "Apache Software Foundation" and - * "Apache Lucene" must not be used to endorse or promote products - * derived from this software without prior written permission. For - * written permission, please contact apache@apache.org. - * - * 5. Products derived from this software may not be called "Apache", - * "Apache Lucene", nor may "Apache" appear in their name, without - * prior written permission of the Apache Software Foundation. - * - * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESSED OR IMPLIED - * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES - * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE - * DISCLAIMED. IN NO EVENT SHALL THE APACHE SOFTWARE FOUNDATION OR - * ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, - * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT - * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF - * USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND - * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, - * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT - * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF - * SUCH DAMAGE. - * ==================================================================== - * - * This software consists of voluntary contributions made by many - * individuals on behalf of the Apache Software Foundation. For more - * information on the Apache Software Foundation, please see - *- * Classes which need to implement the FileContentHandler interface - * and need to handle nested content (example: zip, tar, rar, etc) should - * extend this class. - *
- * - * @author Kelvin Tan - * @version $Id$ - */ -public abstract class NestedFileContentHandlerAdapter - extends FileContentHandlerAdapter -{ - protected final String TEMP_FOLDER = "/usr/temp" + '/' - + Math.random() + '/'; - - protected List nestedDataSource; - - public NestedFileContentHandlerAdapter(File file) - { - super(file); - } - - public boolean containsNestedData() - { - return true; - } -} diff --git a/sandbox/contributions/indyo/src/java/com/relevanz/indyo/contenthandler/NullHandler.java b/sandbox/contributions/indyo/src/java/com/relevanz/indyo/contenthandler/NullHandler.java deleted file mode 100644 index 5c3353a071c..00000000000 --- a/sandbox/contributions/indyo/src/java/com/relevanz/indyo/contenthandler/NullHandler.java +++ /dev/null @@ -1,94 +0,0 @@ -package com.relevanz.indyo.contenthandler; - -import java.io.File; -import java.io.Reader; - -/* ==================================================================== - * The Apache Software License, Version 1.1 - * - * Copyright (c) 2001 The Apache Software Foundation. All rights - * reserved. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions - * are met: - * - * 1. Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * - * 2. Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in - * the documentation and/or other materials provided with the - * distribution. - * - * 3. The end-user documentation included with the redistribution, - * if any, must include the following acknowledgment: - * "This product includes software developed by the - * Apache Software Foundation (http://www.apache.org/)." - * Alternately, this acknowledgment may appear in the software itself, - * if and wherever such third-party acknowledgments normally appear. - * - * 4. The names "Apache" and "Apache Software Foundation" and - * "Apache Lucene" must not be used to endorse or promote products - * derived from this software without prior written permission. For - * written permission, please contact apache@apache.org. - * - * 5. Products derived from this software may not be called "Apache", - * "Apache Lucene", nor may "Apache" appear in their name, without - * prior written permission of the Apache Software Foundation. - * - * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESSED OR IMPLIED - * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES - * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE - * DISCLAIMED. IN NO EVENT SHALL THE APACHE SOFTWARE FOUNDATION OR - * ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, - * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT - * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF - * USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND - * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, - * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT - * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF - * SUCH DAMAGE. - * ==================================================================== - * - * This software consists of voluntary contributions made by many - * individuals on behalf of the Apache Software Foundation. For more - * information on the Apache Software Foundation, please see - *This filter removes leading and trailing whitespace from - * field-oriented XML without mixed content. Note that this class will - * likely not yield appropriate results for document-oriented XML like - * XHTML pages, which mix character data and elements together.
- * - * @see DataFormatFilter - */ -public class DataUnformatFilter extends XMLFilterBase -{ - - //////////////////////////////////////////////////////////////////// - // Constructors. - //////////////////////////////////////////////////////////////////// - - /** - * Create a new filter. - */ - public DataUnformatFilter() - { - } - - /** - * Create a new filter. - * - *Use the XMLReader provided as the source of events.
- * - * @param xmlreader The parent in the filter chain. - */ - public DataUnformatFilter(XMLReader xmlreader) - { - super(xmlreader); - } - - //////////////////////////////////////////////////////////////////// - // Public methods. - //////////////////////////////////////////////////////////////////// - - /** - * Reset the filter so that it can be reused. - * - *This method is especially useful if the filter failed - * with an exception the last time through.
- */ - public void reset () - { - state = SEEN_NOTHING; - stateStack = new Stack(); - whitespace = new StringBuffer(); - } - - /** - * Filter a start document event. - * - *Reset state and pass the event on for further processing.
- * - * @exception org.xml.sax.SAXException If a filter - * further down the chain raises an exception. - * @see org.xml.sax.ContentHandler#startDocument - */ - public void startDocument () - throws SAXException - { - reset(); - super.startDocument(); - } - - /** - * Filter a start element event. - * - * @param uri The element's Namespace URI. - * @param localName The element's local name. - * @param qName The element's qualified (prefixed) name. - * @param atts The element's attribute list. - * @exception org.xml.sax.SAXException If a filter - * further down the chain raises an exception. - * @see org.xml.sax.ContentHandler#startElement - */ - public void startElement (String uri, String localName, - String qName, Attributes atts) - throws SAXException - { - clearWhitespace(); - stateStack.push(SEEN_ELEMENT); - state = SEEN_NOTHING; - super.startElement(uri, localName, qName, atts); - } - - /** - * Filter an end element event. - * - * @param uri The element's Namespace URI. - * @param localName The element's local name. - * @param qName The element's qualified (prefixed) name. - * @exception org.xml.sax.SAXException If a filter - * further down the chain raises an exception. - * @see org.xml.sax.ContentHandler#endElement - */ - public void endElement (String uri, String localName, String qName) - throws SAXException - { - if (state == SEEN_ELEMENT) { - clearWhitespace(); - } else { - emitWhitespace(); - } - state = stateStack.pop(); - super.endElement(uri, localName, qName); - } - - /** - * Filter a character data event. - * - * @param ch The characters to write. - * @param start The starting position in the array. - * @param length The number of characters to use. - * @exception org.xml.sax.SAXException If a filter - * further down the chain raises an exception. - * @see org.xml.sax.ContentHandler#characters - */ - public void characters (char ch[], int start, int length) - throws SAXException - { - if (state != SEEN_DATA) { - - /* Look for non-whitespace. */ - int end = start + length; - while (end-- > start) { - if (!isXMLWhitespace(ch[end])) - break; - } - - /* - * If all the characters are whitespace, save them for later. - * If we've got some data, emit any saved whitespace and update - * our state to show we've seen data. - */ - if (end < start) { - saveWhitespace(ch, start, length); - } else { - state = SEEN_DATA; - emitWhitespace(); - } - } - - /* Pass on everything inside a data field. */ - if (state == SEEN_DATA) { - super.characters(ch, start, length); - } - } - - /** - * Filter an ignorable whitespace event. - * - * @param ch The array of characters to write. - * @param start The starting position in the array. - * @param length The number of characters to write. - * @exception org.xml.sax.SAXException If a filter - * further down the chain raises an exception. - * @see org.xml.sax.ContentHandler#ignorableWhitespace - */ - public void ignorableWhitespace (char ch[], int start, int length) - throws SAXException - { - emitWhitespace(); - // ignore - } - - /** - * Filter a processing instruction event. - * - * @param target The PI target. - * @param data The PI data. - * @exception org.xml.sax.SAXException If a filter - * further down the chain raises an exception. - * @see org.xml.sax.ContentHandler#processingInstruction - */ - public void processingInstruction (String target, String data) - throws SAXException - { - emitWhitespace(); - super.processingInstruction(target, data); - } - - //////////////////////////////////////////////////////////////////// - // Internal methods. - //////////////////////////////////////////////////////////////////// - - /** - * Saves trailing whitespace. - */ - protected void saveWhitespace (char[] ch, int start, int length) { - whitespace.append(ch, start, length); - } - - /** - * Passes saved whitespace down the filter chain. - */ - protected void emitWhitespace () - throws SAXException - { - char[] data = new char[whitespace.length()]; - if (whitespace.length() > 0) { - whitespace.getChars(0, data.length, data, 0); - whitespace.setLength(0); - super.characters(data, 0, data.length); - } - } - - /** - * Discards saved whitespace. - */ - protected void clearWhitespace () { - whitespace.setLength(0); - } - - /** - * Returns true if character is XML whitespace. - */ - private boolean isXMLWhitespace (char c) - { - return c == ' ' || c == '\t' || c == '\r' || c == '\n'; - } - - //////////////////////////////////////////////////////////////////// - // Constants. - //////////////////////////////////////////////////////////////////// - - private static final Object SEEN_NOTHING = new Object(); - private static final Object SEEN_ELEMENT = new Object(); - private static final Object SEEN_DATA = new Object(); - - - //////////////////////////////////////////////////////////////////// - // Internal state. - //////////////////////////////////////////////////////////////////// - - private Object state = SEEN_NOTHING; - private Stack stateStack = new Stack(); - - private StringBuffer whitespace = new StringBuffer(); -} - -// end of DataUnformatFilter.java diff --git a/sandbox/contributions/indyo/src/java/com/relevanz/indyo/util/IOUtils.java b/sandbox/contributions/indyo/src/java/com/relevanz/indyo/util/IOUtils.java deleted file mode 100644 index ad4952ed21e..00000000000 --- a/sandbox/contributions/indyo/src/java/com/relevanz/indyo/util/IOUtils.java +++ /dev/null @@ -1,274 +0,0 @@ -package com.relevanz.indyo.util; - -/* ==================================================================== - * The Apache Software License, Version 1.1 - * - * Copyright (c) 2001 The Apache Software Foundation. All rights - * reserved. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions - * are met: - * - * 1. Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * - * 2. Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in - * the documentation and/or other materials provided with the - * distribution. - * - * 3. The end-user documentation included with the redistribution, - * if any, must include the following acknowledgment: - * "This product includes software developed by the - * Apache Software Foundation (http://www.apache.org/)." - * Alternately, this acknowledgment may appear in the software itself, - * if and wherever such third-party acknowledgments normally appear. - * - * 4. The names "Apache" and "Apache Software Foundation" and - * "Apache Lucene" must not be used to endorse or promote products - * derived from this software without prior written permission. For - * written permission, please contact apache@apache.org. - * - * 5. Products derived from this software may not be called "Apache", - * "Apache Lucene", nor may "Apache" appear in their name, without - * prior written permission of the Apache Software Foundation. - * - * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESSED OR IMPLIED - * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES - * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE - * DISCLAIMED. IN NO EVENT SHALL THE APACHE SOFTWARE FOUNDATION OR - * ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, - * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT - * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF - * USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND - * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, - * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT - * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF - * SUCH DAMAGE. - * ==================================================================== - * - * This software consists of voluntary contributions made by many - * individuals on behalf of the Apache Software Foundation. For more - * information on the Apache Software Foundation, please see - *The convenience methods are provided so that clients do not have to - * create empty attribute lists or provide empty strings as parameters; - * for example, the method invocation
- * - *- * w.startElement("foo"); - *- * - *
is equivalent to the regular SAX2 ContentHandler method
- * - *- * w.startElement("", "foo", "", new AttributesImpl()); - *- * - *
Except that it is more efficient because it does not allocate - * a new empty attribute list each time.
- * - *In fact, there is an even simpler convenience method, - * dataElement, designed for writing elements that - * contain only character data.
- * - *- * w.dataElement("greeting", "Hello, world!"); - *- * - *
is equivalent to
- * - *- * w.startElement("greeting"); - * w.characters("Hello, world!"); - * w.endElement("greeting"); - *- * - * @see org.xml.sax.helpers.XMLFilterImpl - */ -class XMLFilterBase extends XMLFilterImpl -{ - - //////////////////////////////////////////////////////////////////// - // Constructors. - //////////////////////////////////////////////////////////////////// - - /** - * Construct an XML filter with no parent. - * - *
This filter will have no parent: you must assign a parent - * before you start a parse or do any configuration with - * setFeature or setProperty.
- * - * @see org.xml.sax.XMLReader#setFeature - * @see org.xml.sax.XMLReader#setProperty - */ - public XMLFilterBase() - { - } - - /** - * Create an XML filter with the specified parent. - * - *Use the XMLReader provided as the source of events.
- * - * @param xmlreader The parent in the filter chain. - */ - public XMLFilterBase(XMLReader parent) - { - super(parent); - } - - //////////////////////////////////////////////////////////////////// - // Convenience methods. - //////////////////////////////////////////////////////////////////// - - /** - * Start a new element without a qname or attributes. - * - *This method will provide a default empty attribute - * list and an empty string for the qualified name. - * It invokes {@link - * #startElement(String, String, String, Attributes)} - * directly.
- * - * @param uri The element's Namespace URI. - * @param localName The element's local name. - * @exception org.xml.sax.SAXException If a filter - * further down the chain raises an exception. - * @see org.xml.sax.ContentHandler#startElement - */ - public void startElement (String uri, String localName) throws SAXException - { - startElement(uri, localName, "", EMPTY_ATTS); - } - - /** - * Start a new element without a qname, attributes or a Namespace URI. - * - *This method will provide an empty string for the - * Namespace URI, and empty string for the qualified name, - * and a default empty attribute list. It invokes - * #startElement(String, String, String, Attributes)} - * directly.
- * - * @param localName The element's local name. - * @exception org.xml.sax.SAXException If a filter - * further down the chain raises an exception. - * @see org.xml.sax.ContentHandler#startElement - */ - public void startElement (String localName) throws SAXException - { - startElement("", localName, "", EMPTY_ATTS); - } - - /** - * End an element without a qname. - * - *This method will supply an empty string for the qName. - * It invokes {@link #endElement(String, String, String)} - * directly.
- * - * @param uri The element's Namespace URI. - * @param localName The element's local name. - * @exception org.xml.sax.SAXException If a filter - * further down the chain raises an exception. - * @see org.xml.sax.ContentHandler#endElement - */ - public void endElement (String uri, String localName) throws SAXException - { - endElement(uri, localName, ""); - } - - /** - * End an element without a Namespace URI or qname. - * - *This method will supply an empty string for the qName - * and an empty string for the Namespace URI. - * It invokes {@link #endElement(String, String, String)} - * directly.
- * - * @param localName The element's local name. - * @exception org.xml.sax.SAXException If a filter - * further down the chain raises an exception. - * @see org.xml.sax.ContentHandler#endElement - */ - public void endElement (String localName) throws SAXException - { - endElement("", localName, ""); - } - - /** - * Add an empty element. - * - * Both a {@link #startElement startElement} and an - * {@link #endElement endElement} event will be passed on down - * the filter chain. - * - * @param uri The element's Namespace URI, or the empty string - * if the element has no Namespace or if Namespace - * processing is not being performed. - * @param localName The element's local name (without prefix). This - * parameter must be provided. - * @param qName The element's qualified name (with prefix), or - * the empty string if none is available. This parameter - * is strictly advisory: the writer may or may not use - * the prefix attached. - * @param atts The element's attribute list. - * @exception org.xml.sax.SAXException If a filter - * further down the chain raises an exception. - * @see org.xml.sax.ContentHandler#startElement - * @see org.xml.sax.ContentHandler#endElement - */ - public void emptyElement (String uri, String localName, String qName, - Attributes atts) throws SAXException - { - startElement(uri, localName, qName, atts); - endElement(uri, localName, qName); - } - - /** - * Add an empty element without a qname or attributes. - * - *This method will supply an empty string for the qname - * and an empty attribute list. It invokes - * {@link #emptyElement(String, String, String, Attributes)} - * directly.
- * - * @param uri The element's Namespace URI. - * @param localName The element's local name. - * @exception org.xml.sax.SAXException If a filter - * further down the chain raises an exception. - * @see #emptyElement(String, String, String, Attributes) - */ - public void emptyElement (String uri, String localName) throws SAXException - { - emptyElement(uri, localName, "", EMPTY_ATTS); - } - - /** - * Add an empty element without a Namespace URI, qname or attributes. - * - *This method will supply an empty string for the qname, - * and empty string for the Namespace URI, and an empty - * attribute list. It invokes - * {@link #emptyElement(String, String, String, Attributes)} - * directly.
- * - * @param localName The element's local name. - * @exception org.xml.sax.SAXException If a filter - * further down the chain raises an exception. - * @see #emptyElement(String, String, String, Attributes) - */ - public void emptyElement (String localName) throws SAXException - { - emptyElement("", localName, "", EMPTY_ATTS); - } - - /** - * Add an element with character data content. - * - *This is a convenience method to add a complete element - * with character data content, including the start tag - * and end tag.
- * - *This method invokes - * {@link @see org.xml.sax.ContentHandler#startElement}, - * followed by - * {@link #characters(String)}, followed by - * {@link @see org.xml.sax.ContentHandler#endElement}.
- * - * @param uri The element's Namespace URI. - * @param localName The element's local name. - * @param qName The element's default qualified name. - * @param atts The element's attributes. - * @param content The character data content. - * @exception org.xml.sax.SAXException If a filter - * further down the chain raises an exception. - * @see org.xml.sax.ContentHandler#startElement - * @see #characters(String) - * @see org.xml.sax.ContentHandler#endElement - */ - public void dataElement (String uri, String localName, String qName, - Attributes atts, String content) throws SAXException - { - startElement(uri, localName, qName, atts); - characters(content); - endElement(uri, localName, qName); - } - - /** - * Add an element with character data content but no attributes. - * - *This is a convenience method to add a complete element - * with character data content, including the start tag - * and end tag. This method provides an empty string - * for the qname and an empty attribute list.
- * - *This method invokes - * {@link @see org.xml.sax.ContentHandler#startElement}, - * followed by - * {@link #characters(String)}, followed by - * {@link @see org.xml.sax.ContentHandler#endElement}.
- * - * @param uri The element's Namespace URI. - * @param localName The element's local name. - * @param content The character data content. - * @exception org.xml.sax.SAXException If a filter - * further down the chain raises an exception. - * @see org.xml.sax.ContentHandler#startElement - * @see #characters(String) - * @see org.xml.sax.ContentHandler#endElement - */ - public void dataElement (String uri, String localName, String content) - throws SAXException - { - dataElement(uri, localName, "", EMPTY_ATTS, content); - } - - /** - * Add an element with character data content but no attributes or - * Namespace URI. - * - *This is a convenience method to add a complete element - * with character data content, including the start tag - * and end tag. The method provides an empty string for the - * Namespace URI, and empty string for the qualified name, - * and an empty attribute list.
- * - *This method invokes - * {@link @see org.xml.sax.ContentHandler#startElement}, - * followed by - * {@link #characters(String)}, followed by - * {@link @see org.xml.sax.ContentHandler#endElement}.
- * - * @param localName The element's local name. - * @param content The character data content. - * @exception org.xml.sax.SAXException If a filter - * further down the chain raises an exception. - * @see org.xml.sax.ContentHandler#startElement - * @see #characters(String) - * @see org.xml.sax.ContentHandler#endElement - */ - public void dataElement (String localName, String content) - throws SAXException - { - dataElement("", localName, "", EMPTY_ATTS, content); - } - - /** - * Add a string of character data, with XML escaping. - * - *This is a convenience method that takes an XML - * String, converts it to a character array, then invokes - * {@link @see org.xml.sax.ContentHandler#characters}.
- * - * @param data The character data. - * @exception org.xml.sax.SAXException If a filter - * further down the chain raises an exception. - * @see @see org.xml.sax.ContentHandler#characters - */ - public void characters (String data) throws SAXException - { - char ch[] = data.toCharArray(); - characters(ch, 0, ch.length); - } - - //////////////////////////////////////////////////////////////////// - // Constants. - //////////////////////////////////////////////////////////////////// - protected static final Attributes EMPTY_ATTS = new AttributesImpl(); -} - -// end of XMLFilterBase.java