SOLR-469 -- Added DataImportHandler as a contrib project.

git-svn-id: https://svn.apache.org/repos/asf/lucene/solr/trunk@681182 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
Shalin Shekhar Mangar 2008-07-30 19:35:58 +00:00
parent 842680f15d
commit 79e77502f6
66 changed files with 10257 additions and 0 deletions

View File

@ -331,6 +331,11 @@ New Features
65. SOLR-663: Allow multiple files for stopwords, keepwords, protwords and synonyms (shalin)
66. SOLR-469: Added DataImportHandler as a contrib project which makes indexing data from Databases, XML files and HTTP
data sources into Solr quick and easy. Includes API and implementations for supporting multiple
data sources, processors and transformers for importing data. Supports full data imports as well as
incremental (delta) indexing. See http://wiki.apache.org/solr/DataImportHandler for more details.
(Noble Paul, shalin)
Changes in runtime behavior
1. SOLR-559: use Lucene updateDocument, deleteDocuments methods. This

View File

@ -0,0 +1,129 @@
<?xml version="1.0"?>
<!--
Licensed to the Apache Software Foundation (ASF) under one or more
contributor license agreements. See the NOTICE file distributed with
this work for additional information regarding copyright ownership.
The ASF licenses this file to You under the Apache License, Version 2.0
the "License"); you may not use this file except in compliance with
the License. You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
-->
<project name="solr-dataimporthandler" default="build">
<property name="solr-path" value="../.." />
<property name="javadoc.dir" value="target/javadoc"></property>
<import file="../../common-build.xml"/>
<description>
Data Import Handler
</description>
<path id="common.classpath">
<pathelement location="${solr-path}/build/common" />
<pathelement location="${solr-path}/build/core" />
<fileset dir="${solr-path}/lib" includes="*.jar"></fileset>
</path>
<path id="test.classpath">
<path refid="common.classpath" />
<pathelement path="target/classes" />
<pathelement path="target/test-classes" />
</path>
<target name="clean">
<delete failonerror="false" dir="target"/>
</target>
<target name="init">
<mkdir dir="target/classes"/>
<ant dir="../../" inheritall="false" target="compile" />
</target>
<target name="compile" depends="init">
<solr-javac destdir="target/classes"
classpathref="common.classpath">
<src path="src/main/java" />
</solr-javac>
</target>
<target name="build" depends="compile">
<jar destfile="target/${fullnamever}.jar" basedir="target/classes" />
</target>
<target name="compileTests" depends="compile">
<solr-javac destdir="target/test-classes"
classpathref="test.classpath">
<src path="src/test/java" />
</solr-javac>
</target>
<target name="test" depends="compileTests">
<mkdir dir="target/test-results"/>
<junit printsummary="on"
haltonfailure="no"
errorProperty="tests.failed"
failureProperty="tests.failed"
dir="src/test/resources/"
>
<formatter type="brief" usefile="false" if="junit.details"/>
<classpath refid="test.classpath"/>
<formatter type="xml"/>
<batchtest fork="yes" todir="target/test-results" unless="testcase">
<fileset dir="src/test/java" includes="${junit.includes}"/>
</batchtest>
<batchtest fork="yes" todir="target/test-results" if="testcase">
<fileset dir="src/test/java" includes="**/${testcase}.java"/>
</batchtest>
</junit>
<fail if="tests.failed">Tests failed!</fail>
</target>
<target name="dist" depends="build">
<copy todir="../../build/web">
<fileset dir="src/main/webapp" includes="**" />
</copy>
<mkdir dir="../../build/web/WEB-INF/lib"/>
<copy file="target/${fullnamever}.jar" todir="${solr-path}/build/web/WEB-INF/lib"></copy>
<copy file="target/${fullnamever}.jar" todir="${solr-path}/dist"></copy>
</target>
<target name="javadoc">
<sequential>
<mkdir dir="${javadoc.dir}/contrib-${fullnamever}"/>
<javadoc
destdir="${javadoc.dir}/contrib-${fullnamever}"
author="true"
version="true"
failonerror="true"
use="true"
encoding="utf8"
access="${javadoc.access}"
windowtitle="${Name} ${version} contrib-${fullnamever} API"
doctitle="${Name} ${version} API (${specversion})"
bottom="Copyright &amp;copy; ${javadoc.years} The Apache Software Foundation"
>
<packageset dir="src/main/java"/>
<link href="${javadoc.link.java}"/>
<link href="${javadoc.link.junit}"/>
<link href="${javadoc.link.lucene}"/>
<classpath refid="common.classpath"/>
</javadoc>
<jar basedir="${javadoc.dir}/contrib-${fullname}" destfile="target/contrib-${fullnamever}-javadoc.jar"/>
<copy file="target/contrib-${fullnamever}-javadoc.jar" todir="${solr-path}/dist"></copy>
</sequential>
</target>
</project>

View File

@ -0,0 +1,161 @@
/**
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.solr.handler.dataimport;
import org.apache.solr.core.SolrCore;
import org.apache.solr.request.LocalSolrQueryRequest;
import org.apache.solr.util.AbstractSolrTestCase;
import java.io.IOException;
import java.util.Collections;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
/**
* <p>
* Abstract base class for DataImportHandler tests
* </p>
* <p/>
* <b>This API is experimental and subject to change</b>
*
* @version $Id$
* @since solr 1.3
*/
public abstract class AbstractDataImportHandlerTest extends
AbstractSolrTestCase {
@Override
public void setUp() throws Exception {
super.setUp();
}
@Override
public void tearDown() throws Exception {
super.tearDown();
}
protected String loadDataConfig(String dataConfigFileName) {
try {
SolrCore core = h.getCore();
return SolrWriter.getResourceAsString(core.getResourceLoader()
.openResource(dataConfigFileName));
} catch (IOException e) {
e.printStackTrace();
return null;
}
}
protected void runFullImport(String dataConfig) throws Exception {
LocalSolrQueryRequest request = lrf.makeRequest("command", "full-import",
"debug", "on", "clean", "true", "commit", "true", "dataConfig",
dataConfig);
h.query("/dataimport", request);
}
protected void runDeltaImport(String dataConfig) throws Exception {
LocalSolrQueryRequest request = lrf.makeRequest("command", "delta-import",
"debug", "on", "clean", "true", "commit", "true", "dataConfig",
dataConfig);
h.query("/dataimport", request);
}
/**
* Helper for creating a Context instance. Useful for testing Transformers
*/
@SuppressWarnings("unchecked")
public static Context getContext(DataConfig.Entity parentEntity,
VariableResolverImpl resolver, DataSource parentDataSource,
int currProcess, final List<Map<String, String>> entityFields,
final Map<String, String> entityAttrs) {
final Context delegate = new ContextImpl(parentEntity, resolver,
parentDataSource, currProcess, Collections.EMPTY_MAP,
new HashMap<String, Object>(), null, null);
return new Context() {
public String getEntityAttribute(String name) {
return entityAttrs == null ? delegate.getEntityAttribute(name)
: entityAttrs.get(name);
}
public List<Map<String, String>> getAllEntityFields() {
return entityFields == null ? delegate.getAllEntityFields()
: entityFields;
}
public VariableResolver getVariableResolver() {
return delegate.getVariableResolver();
}
public DataSource getDataSource() {
return delegate.getDataSource();
}
public boolean isRootEntity() {
return false;
}
public int currentProcess() {
return delegate.currentProcess();
}
public Map<String, Object> getRequestParameters() {
return delegate.getRequestParameters();
}
public EntityProcessor getEntityProcessor() {
return null;
}
public void setSessionAttribute(String name, Object val, String scope) {
delegate.setSessionAttribute(name, val, scope);
}
public Object getSessionAttribute(String name, String scope) {
return delegate.getSessionAttribute(name, scope);
}
public Context getParentContext() {
return delegate.getParentContext();
}
public DataSource getDataSource(String name) {
return delegate.getDataSource(name);
}
public SolrCore getSolrCore() {
return delegate.getSolrCore();
}
};
}
/**
* Strings at even index are keys, odd-index strings are values in the
* returned map
*/
@SuppressWarnings("unchecked")
public static Map createMap(Object... args) {
Map result = new HashMap();
if (args == null || args.length == 0)
return result;
for (int i = 0; i < args.length - 1; i += 2)
result.put(args[i], args[i + 1]);
return result;
}
}

View File

@ -0,0 +1,80 @@
/**
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.solr.handler.dataimport;
import java.util.ArrayList;
import java.util.List;
import java.util.Map;
/**
* This class enables caching of data obtained from the DB to avoid too many sql
* queries
* <p/>
* <p>
* Refer to <a
* href="http://wiki.apache.org/solr/DataImportHandler">http://wiki.apache.org/solr/DataImportHandler</a>
* for more details.
* </p>
* <p/>
* <b>This API is experimental and subject to change</b>
*
* @version $Id$
* @since solr 1.3
*/
public class CachedSqlEntityProcessor extends SqlEntityProcessor {
private boolean isFirst;
@SuppressWarnings("unchecked")
public void init(Context context) {
super.init(context);
super.cacheInit();
isFirst = true;
}
public Map<String, Object> nextRow() {
if (rowcache != null) return getFromRowCache();
if (dataSourceRowCache != null)
return getFromRowCacheTransformed();
if (!isFirst)
return null;
String query = resolver.replaceTokens(context.getEntityAttribute("query"));
isFirst = false;
if (simpleCache != null) {
return getSimplCacheData(query);
} else {
return getIdCacheData(query);
}
}
protected List<Map<String, Object>> getAllNonCachedRows() {
List<Map<String, Object>> rows = new ArrayList<Map<String, Object>>();
String q = getQuery();
initQuery(resolver.replaceTokens(q));
if (rowIterator == null)
return rows;
while (rowIterator.hasNext()) {
Map<String, Object> arow = rowIterator.next();
if (arow == null) {
break;
} else {
rows.add(arow);
}
}
return rows;
}
}

View File

@ -0,0 +1,154 @@
/**
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.solr.handler.dataimport;
import org.apache.solr.core.SolrCore;
import java.util.List;
import java.util.Map;
/**
* <p>
* This interface is supposed to give access to all available objects. So any
* component implemented by a user can have the full power of DataImportHandler
* </p>
* <p>
* Refer to <a
* href="http://wiki.apache.org/solr/DataImportHandler">http://wiki.apache.org/solr/DataImportHandler</a>
* for more details.
* </p>
* <p/>
* <b>This API is experimental and subject to change</b>
*
* @version $Id$
* @since solr 1.3
*/
public abstract class Context {
public static final int FULL_DUMP = 1, DELTA_DUMP = 2, FIND_DELTA = 3;
public static final String SCOPE_ENTITY = "entity", SCOPE_GLOBAL = "global",
SCOPE_DOC = "document";
/**
* Get the value of any attribute put into this entity
*
* @param name name of the attribute eg: 'name'
* @return value of named attribute in entity
*/
public abstract String getEntityAttribute(String name);
/**
* Returns all the fields put into an entity. each item (which is a map ) in
* the list corresponds to one field. each if the map contains the attribute
* names and values in a field
*
* @return all fields in an entity
*/
public abstract List<Map<String, String>> getAllEntityFields();
/**
* Returns the VariableResolver used in this entity which can be used to
* resolve the tokens in ${<namespce.name>}
*
* @return a VariableResolver instance
* @see org.apache.solr.handler.dataimport.VariableResolver
*/
public abstract VariableResolver getVariableResolver();
/**
* Gets the datasource instance defined for this entity.
*
* @return a new DataSource instance as configured for the current entity
* @see org.apache.solr.handler.dataimport.DataSource
*/
public abstract DataSource getDataSource();
/**
* Gets a new DataSource instance with a name.
*
* @param name Name of the dataSource as defined in the dataSource tag
* @return a new DataSource instance as configured for the named entity
* @see org.apache.solr.handler.dataimport.DataSource
*/
public abstract DataSource getDataSource(String name);
/**
* Returns the instance of EntityProcessor used for this entity
*
* @return instance of EntityProcessor used for the current entity
* @see org.apache.solr.handler.dataimport.EntityProcessor
*/
public abstract EntityProcessor getEntityProcessor();
/**
* Store values in a certain name and scope (entity, document,global)
*
* @param name the key
* @param val the value
* @param scope the scope in which the given key, value pair is to be stored
*/
public abstract void setSessionAttribute(String name, Object val, String scope);
/**
* get a value by name in the given scope (entity, document,global)
*
* @param name the key
* @param scope the scope from which the value is to be retreived
* @return the object stored in the given scope with the given key
*/
public abstract Object getSessionAttribute(String name, String scope);
/**
* Get the context instance for the parent entity. works only in the full dump
* If the current entity is rootmost a null is returned
*
* @return parent entity's Context
*/
public abstract Context getParentContext();
/**
* The request parameters passed over HTTP for this command the values in the
* map are either String(for single valued parameters) or List<String> (for
* multi-valued parameters)
*
* @return the request parameters passed in the URL to initiate this process
*/
public abstract Map<String, Object> getRequestParameters();
/**
* Returns if the current entity is the root entity
*
* @return true if current entity is the root entity, false otherwise
*/
public abstract boolean isRootEntity();
/**
* Returns the current process FULL_DUMP =1, DELTA_DUMP=2, FIND_DELTA=3
*
* @return the code of the current running process
*/
public abstract int currentProcess();
/**
* Exposing the actual SolrCore to the components
*
* @return the core
*/
public abstract SolrCore getSolrCore();
}

View File

@ -0,0 +1,143 @@
package org.apache.solr.handler.dataimport;
import org.apache.solr.core.SolrCore;
import java.util.Collections;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
/**
* <p>
* An implementation for the Context
* </p>
* <p/>
* <b>This API is experimental and subject to change</b>
*
* @version $Id$
* @since solr 1.3
*/
public class ContextImpl extends Context {
private DataConfig.Entity entity;
private ContextImpl parent;
private VariableResolverImpl resolver;
private DataSource ds;
private int currProcess;
private Map<String, Object> requestParams;
private DataImporter dataImporter;
private Map<String, Object> entitySession, globalSession, docSession;
public ContextImpl(DataConfig.Entity entity, VariableResolverImpl resolver,
DataSource ds, int currProcess, Map<String, Object> requestParams,
Map<String, Object> global, ContextImpl p, DataImporter di) {
this.entity = entity;
this.resolver = resolver;
this.ds = ds;
this.currProcess = currProcess;
this.requestParams = requestParams;
globalSession = global;
parent = p;
dataImporter = di;
}
public String getEntityAttribute(String name) {
return entity == null ? null : entity.allAttributes.get(name);
}
public List<Map<String, String>> getAllEntityFields() {
return entity == null ? Collections.EMPTY_LIST : entity.allFieldsList;
}
public VariableResolver getVariableResolver() {
return resolver;
}
public DataSource getDataSource() {
return ds;
}
public DataSource getDataSource(String name) {
return dataImporter.getDataSourceInstance(entity);
}
public boolean isRootEntity() {
return entity.isDocRoot;
}
public int currentProcess() {
return currProcess;
}
public Map<String, Object> getRequestParameters() {
return requestParams;
}
public EntityProcessor getEntityProcessor() {
return entity == null ? null : entity.processor;
}
public void setSessionAttribute(String name, Object val, String scope) {
if (Context.SCOPE_ENTITY.equals(scope)) {
if (entitySession == null)
entitySession = new HashMap<String, Object>();
entitySession.put(name, val);
} else if (Context.SCOPE_GLOBAL.equals(scope)) {
if (globalSession != null) {
globalSession.put(name, val);
}
} else if (Context.SCOPE_DOC.equals(scope)) {
Map<String, Object> docsession = getDocSession();
if (docsession != null)
docsession.put(name, val);
}
}
public Object getSessionAttribute(String name, String scope) {
if (Context.SCOPE_ENTITY.equals(scope)) {
if (entitySession == null)
return null;
return entitySession.get(name);
} else if (Context.SCOPE_GLOBAL.equals(scope)) {
if (globalSession != null) {
return globalSession.get(name);
}
} else if (Context.SCOPE_DOC.equals(scope)) {
Map<String, Object> docsession = getDocSession();
if (docsession != null)
return docsession.get(name);
}
return null;
}
public Context getParentContext() {
return parent;
}
public Map<String, Object> getDocSession() {
ContextImpl c = this;
while (true) {
if (c.docSession != null)
return c.docSession;
if (c.parent != null)
c = c.parent;
else
return null;
}
}
public void setDocSession(Map<String, Object> docSession) {
this.docSession = docSession;
}
public SolrCore getSolrCore() {
return dataImporter.getCore();
}
}

View File

@ -0,0 +1,360 @@
/**
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.solr.handler.dataimport;
import org.w3c.dom.Element;
import org.w3c.dom.NamedNodeMap;
import org.w3c.dom.Node;
import org.w3c.dom.NodeList;
import java.util.*;
/**
* <p>
* Mapping for data-config.xml
* </p>
* <p/>
* <p>
* Refer to <a
* href="http://wiki.apache.org/solr/DataImportHandler">http://wiki.apache.org/solr/DataImportHandler</a>
* for more details.
* </p>
* <p/>
* <b>This API is experimental and subject to change</b>
*
* @version $Id$
* @since solr 1.3
*/
public class DataConfig {
public List<Document> documents;
public List<Props> properties;
private Map<String, Document> documentCache;
public Map<String, Evaluator> evaluators = new HashMap<String, Evaluator>();
public Script script;
public Map<String, Properties> dataSources = new HashMap<String, Properties>();
public Document getDocumentByName(String name) {
if (documentCache == null) {
documentCache = new HashMap<String, Document>();
for (Document document : documents)
documentCache.put(document.name, document);
}
return documentCache.get(name);
}
public static class Document {
public String name;
public String deleteQuery;
public List<Entity> entities = new ArrayList<Entity>();
public List<Field> fields;
public Document() {
}
public Document(Element element) {
this.name = getStringAttribute(element, NAME, null);
this.deleteQuery = getStringAttribute(element, "deleteQuery", null);
List<Element> l = getChildNodes(element, "entity");
for (Element e : l)
entities.add(new Entity(e));
// entities = new Entity(l.get(0));
l = getChildNodes(element, "field");
if (!l.isEmpty())
fields = new ArrayList<Field>();
for (Element e : l)
fields.add(new Field(e));
}
}
public static class Props {
public String name;
public String file;
}
public static class Entity {
public String name;
public String pk;
public String dataSource;
public Map<String, String> allAttributes;
public String proc;
public String docRoot;
public boolean isDocRoot = false;
public List<Field> fields;
public List<Map<String, String>> allFieldsList = new ArrayList<Map<String, String>>();
public List<Entity> entities;
public String[] primaryKeys;
public Entity parentEntity;
public EntityProcessor processor;
@SuppressWarnings("unchecked")
public DataSource dataSrc;
public Script script;
public List<Field> implicitFields;
public Entity() {
}
public Entity(Element element) {
name = getStringAttribute(element, NAME, null);
pk = getStringAttribute(element, "pk", null);
docRoot = getStringAttribute(element, ROOT_ENTITY, null);
proc = getStringAttribute(element, PROCESSOR, null);
dataSource = getStringAttribute(element, DataImporter.DATA_SRC, null);
allAttributes = getAllAttributes(element);
List<Element> n = getChildNodes(element, "field");
fields = new ArrayList<Field>();
for (Element elem : n)
fields.add(new Field(elem));
n = getChildNodes(element, "entity");
if (!n.isEmpty())
entities = new ArrayList<Entity>();
for (Element elem : n)
entities.add(new Entity(elem));
}
public void clearCache() {
if (entities != null) {
for (Entity entity : entities)
entity.clearCache();
}
try {
processor.destroy();
} catch (Exception e) {
/*no op*/
}
processor = null;
if (dataSrc != null)
dataSrc.close();
}
}
public static class Script {
public String language;
public String script;
public Script() {
}
public Script(Element e) {
this.language = getStringAttribute(e, "language", "JavaScript");
StringBuffer buffer = new StringBuffer();
String script = getTxt(e, buffer);
if (script != null)
this.script = script.trim();
}
}
public static class Field {
public String column;
public String name;
public Float boost = 1.0f;
public boolean toWrite = true;
public boolean multiValued = false;
public String nameOrColName;
public Map<String, String> allAttributes = new HashMap<String, String>() {
public String put(String key, String value) {
if (super.containsKey(key))
return super.get(key);
return super.put(key, value);
}
};
public Field() {
}
public Field(Element e) {
this.name = getStringAttribute(e, DataImporter.NAME, null);
this.column = getStringAttribute(e, DataImporter.COLUMN, null);
this.boost = Float.parseFloat(getStringAttribute(e, "boost", "1.0f"));
allAttributes.putAll(getAllAttributes(e));
}
public Field(String name, boolean b) {
name = nameOrColName = column = name;
multiValued = b;
}
public String getName() {
return name == null ? column : name;
}
public Entity entity;
}
public void readFromXml(Element e) {
List<Element> n = getChildNodes(e, "document");
if (!n.isEmpty())
documents = new ArrayList<Document>();
for (Element element : n)
documents.add(new Document(element));
n = getChildNodes(e, SCRIPT);
if (!n.isEmpty()) {
script = new Script(n.get(0));
}
// Add the provided evaluators
evaluators.put(EvaluatorBag.DATE_FORMAT_EVALUATOR, EvaluatorBag
.getDateFormatEvaluator());
evaluators.put(EvaluatorBag.SQL_ESCAPE_EVALUATOR, EvaluatorBag
.getSqlEscapingEvaluator());
evaluators.put(EvaluatorBag.URL_ENCODE_EVALUATOR, EvaluatorBag
.getUrlEvaluator());
n = getChildNodes(e, FUNCTION);
if (!n.isEmpty()) {
for (Element element : n) {
String func = getStringAttribute(element, NAME, null);
String clz = getStringAttribute(element, CLASS, null);
if (func == null || clz == null)
throw new DataImportHandlerException(
DataImportHandlerException.SEVERE,
"<function> must have a 'name' and 'class' attributes");
try {
evaluators.put(func, (Evaluator) DocBuilder.loadClass(clz)
.newInstance());
} catch (Exception exp) {
throw new DataImportHandlerException(
DataImportHandlerException.SEVERE,
"Unable to instantiate evaluator: " + clz, exp);
}
}
}
n = getChildNodes(e, DATA_SRC);
if (!n.isEmpty()) {
for (Element element : n) {
Properties p = new Properties();
HashMap<String, String> attrs = getAllAttributes(element);
for (Map.Entry<String, String> entry : attrs.entrySet()) {
p.setProperty(entry.getKey(), entry.getValue());
}
dataSources.put(p.getProperty("name"), p);
}
}
}
private static String getStringAttribute(Element e, String name, String def) {
String r = e.getAttribute(name);
if (r == null || "".equals(r.trim()))
r = def;
return r;
}
private static HashMap<String, String> getAllAttributes(Element e) {
HashMap<String, String> m = new HashMap<String, String>();
NamedNodeMap nnm = e.getAttributes();
for (int i = 0; i < nnm.getLength(); i++) {
m.put(nnm.item(i).getNodeName(), nnm.item(i).getNodeValue());
}
return m;
}
public static String getTxt(Node elem, StringBuffer buffer) {
if (elem.getNodeType() != Node.CDATA_SECTION_NODE) {
NodeList childs = elem.getChildNodes();
for (int i = 0; i < childs.getLength(); i++) {
Node child = childs.item(i);
short childType = child.getNodeType();
if (childType != Node.COMMENT_NODE
&& childType != Node.PROCESSING_INSTRUCTION_NODE) {
getTxt(child, buffer);
}
}
} else {
buffer.append(elem.getNodeValue());
}
return buffer.toString();
}
public static List<Element> getChildNodes(Element e, String byName) {
List<Element> result = new ArrayList<Element>();
NodeList l = e.getChildNodes();
for (int i = 0; i < l.getLength(); i++) {
if (e.equals(l.item(i).getParentNode())
&& byName.equals(l.item(i).getNodeName()))
result.add((Element) l.item(i));
}
return result;
}
public void clearCaches() {
for (Document document : documents)
for (Entity entity : document.entities)
entity.clearCache();
}
public static final String SCRIPT = "script";
public static final String NAME = "name";
public static final String SCRIPT_LANG = "scriptlanguage";
public static final String SCRIPT_NAME = "scriptname";
public static final String PROCESSOR = "processor";
public static final String IMPORTER_NS = "dataimporter";
public static final String ROOT_ENTITY = "rootEntity";
public static final String FUNCTION = "function";
public static final String CLASS = "class";
public static final String DATA_SRC = "dataSource";
}

View File

@ -0,0 +1,394 @@
/**
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.solr.handler.dataimport;
import org.apache.lucene.document.Document;
import org.apache.solr.common.SolrException;
import org.apache.solr.common.SolrInputDocument;
import org.apache.solr.common.params.CommonParams;
import org.apache.solr.common.params.ModifiableSolrParams;
import org.apache.solr.common.params.SolrParams;
import org.apache.solr.common.util.ContentStreamBase;
import org.apache.solr.common.util.NamedList;
import org.apache.solr.core.SolrConfig;
import org.apache.solr.core.SolrCore;
import org.apache.solr.core.SolrResourceLoader;
import org.apache.solr.handler.RequestHandlerBase;
import org.apache.solr.handler.RequestHandlerUtils;
import org.apache.solr.request.RawResponseWriter;
import org.apache.solr.request.SolrQueryRequest;
import org.apache.solr.request.SolrQueryResponse;
import org.apache.solr.schema.IndexSchema;
import org.apache.solr.update.DocumentBuilder;
import org.apache.solr.update.UpdateHandler;
import org.apache.solr.util.plugin.SolrCoreAware;
import java.util.*;
import java.util.logging.Level;
import java.util.logging.Logger;
/**
* <p>
* Solr Request Handler for data import from databases and REST data sources.
* </p>
* <p>
* It is configured in solrconfig.xml
* </p>
* <p/>
* <p>
* Refer to <a
* href="http://wiki.apache.org/solr/DataImportHandler">http://wiki.apache.org/solr/DataImportHandler</a>
* for more details.
* </p>
* <p/>
* <b>This API is experimental and subject to change</b>
*
* @version $Id$
* @since solr 1.3
*/
public class DataImportHandler extends RequestHandlerBase implements
SolrCoreAware {
private static final Logger LOG = Logger.getLogger(DataImportHandler.class
.getName());
private DataImporter importer;
private Map<String, String> variables = new HashMap<String, String>();
@SuppressWarnings("unchecked")
private NamedList initArgs;
private Map<String, Properties> dataSources = new HashMap<String, Properties>();
private DataImporter.RequestParams requestParams;
private List<Document> debugDocuments;
private DebugLogger debugLogger;
private boolean debugEnabled = true;
@Override
@SuppressWarnings("unchecked")
public void init(NamedList args) {
super.init(args);
initArgs = args;
}
@SuppressWarnings("unchecked")
public void inform(SolrCore core) {
try {
String debug = (String) initArgs.get(ENABLE_DEBUG);
if (debug != null && "no".equals(debug))
debugEnabled = false;
NamedList defaults = (NamedList) initArgs.get("defaults");
if (defaults != null) {
String configLoc = (String) defaults.get("config");
if (configLoc != null && configLoc.length() != 0) {
processConfiguration(defaults);
importer = new DataImporter(SolrWriter.getResourceAsString(core
.getResourceLoader().openResource(configLoc)), core,
dataSources);
}
}
} catch (Throwable e) {
SolrConfig.severeErrors.add(e);
LOG.log(Level.SEVERE, DataImporter.MSG.LOAD_EXP, e);
throw new SolrException(SolrException.ErrorCode.SERVER_ERROR,
DataImporter.MSG.INVALID_CONFIG, e);
}
}
@Override
@SuppressWarnings("unchecked")
public void handleRequestBody(SolrQueryRequest req, SolrQueryResponse rsp)
throws Exception {
rsp.setHttpCaching(false);
SolrParams params = req.getParams();
requestParams = new DataImporter.RequestParams(getParamsMap(params));
String command = requestParams.command;
if (DataImporter.SHOW_CONF_CMD.equals(command)) {
// Modify incoming request params to add wt=raw
ModifiableSolrParams rawParams = new ModifiableSolrParams(req.getParams());
rawParams.set(CommonParams.WT, "raw");
req.setParams(rawParams);
String dataConfigFile = defaults.get("config");
ContentStreamBase content = new ContentStreamBase.StringStream(SolrWriter
.getResourceAsString(req.getCore().getResourceLoader().openResource(
dataConfigFile)));
rsp.add(RawResponseWriter.CONTENT, content);
return;
}
rsp.add("initArgs", initArgs);
String message = "";
if (command != null)
rsp.add("command", command);
if (requestParams.debug) {
// Reload the data-config.xml
importer = null;
if (requestParams.dataConfig != null) {
try {
processConfiguration((NamedList) initArgs.get("defaults"));
importer = new DataImporter(requestParams.dataConfig, req.getCore()
, dataSources);
} catch (RuntimeException e) {
rsp.add("exception", DebugLogger.getStacktraceString(e));
importer = null;
return;
}
} else {
inform(req.getCore());
}
message = DataImporter.MSG.CONFIG_RELOADED;
}
// If importer is still null
if (importer == null) {
rsp.add("status", DataImporter.MSG.NO_INIT);
return;
}
if (command != null && DataImporter.ABORT_CMD.equals(command)) {
importer.rumCmd(requestParams, null, null);
} else if (importer.getStatus() != DataImporter.Status.IDLE) {
message = DataImporter.MSG.CMD_RUNNING;
} else if (command != null) {
if (DataImporter.FULL_IMPORT_CMD.equals(command)
|| DataImporter.DELTA_IMPORT_CMD.equals(command)) {
UpdateHandler updater = req.getCore().getUpdateHandler();
SolrResourceLoader loader = req.getCore().getResourceLoader();
SolrWriter sw = getSolrWriter(updater, loader, req
.getSchema());
if (requestParams.debug) {
if (debugEnabled) {
// Synchronous request for the debug mode
importer.rumCmd(requestParams, sw, variables);
rsp.add("mode", "debug");
rsp.add("documents", debugDocuments);
if (debugLogger != null)
rsp.add("verbose-output", debugLogger.output);
debugLogger = null;
debugDocuments = null;
} else {
message = DataImporter.MSG.DEBUG_NOT_ENABLED;
}
} else {
// Asynchronous request for normal mode
importer.runAsync(requestParams, sw, variables);
}
} else if (DataImporter.RELOAD_CONF_CMD.equals(command)) {
importer = null;
inform(req.getCore());
message = DataImporter.MSG.CONFIG_RELOADED;
}
}
rsp.add("status", importer.getStatus() == DataImporter.Status.IDLE ? "idle"
: "busy");
rsp.add("importResponse", message);
rsp.add("statusMessages", importer.getStatusMessages());
RequestHandlerUtils.addExperimentalFormatWarning(rsp);
}
private Map<String, Object> getParamsMap(SolrParams params) {
Iterator<String> names = params.getParameterNamesIterator();
Map<String, Object> result = new HashMap<String, Object>();
while (names.hasNext()) {
String s = names.next();
String[] val = params.getParams(s);
if (val == null || val.length < 1)
continue;
if (val.length == 1)
result.put(s, val[0]);
else
result.put(s, Arrays.asList(val));
}
return result;
}
@SuppressWarnings("unchecked")
private void processConfiguration(NamedList defaults) {
if (defaults == null) {
LOG
.info("No configuration specified in solrconfig.xml for DataImportHandler");
return;
}
LOG.info("Processing configuration from solrconfig.xml: " + defaults);
dataSources = new HashMap<String, Properties>();
variables = new HashMap<String, String>();
int position = 0;
while (position < defaults.size()) {
if (defaults.getName(position) == null)
break;
String name = defaults.getName(position);
if (name.equals("datasource")) {
NamedList dsConfig = (NamedList) defaults.getVal(position);
Properties props = new Properties();
for (int i = 0; i < dsConfig.size(); i++)
props.put(dsConfig.getName(i), dsConfig.getVal(i));
LOG.info("Adding properties to datasource: " + props);
dataSources.put((String) dsConfig.get("name"), props);
} else if (!name.equals("config")) {
String value = (String) defaults.getVal(position);
variables.put(name, value);
}
position++;
}
}
private SolrWriter getSolrWriter(final UpdateHandler updater,
final SolrResourceLoader loader, final IndexSchema schema) {
return new SolrWriter(updater, loader.getConfigDir()) {
@Override
public boolean upload(SolrDoc d) {
try {
Document document = DocumentBuilder.toDocument(
((SolrDocumentWrapper) d).doc, schema);
if (requestParams.debug) {
if (debugDocuments == null)
debugDocuments = new ArrayList<Document>();
debugDocuments.add(document);
if (debugDocuments.size() >= requestParams.rows) {
// Abort this operation now
importer.getDocBuilder().abort();
}
}
return super.upload(document);
} catch (RuntimeException e) {
LOG.log(Level.SEVERE, "Exception while adding: " + d, e);
return false;
}
}
public void log(int event, String name, Object row) {
if (debugLogger == null) {
debugLogger = new DebugLogger();
}
debugLogger.log(event, name, row);
}
public Class loadClass(String name) throws ClassNotFoundException {
return loader.findClass(name);
}
public SolrDoc getSolrDocInstance() {
return new SolrDocumentWrapper();
}
};
}
static class SolrDocumentWrapper implements SolrWriter.SolrDoc {
SolrInputDocument doc;
public SolrDocumentWrapper() {
doc = new SolrInputDocument();
}
public void setDocumentBoost(float boost) {
doc.setDocumentBoost(boost);
}
public Object getField(String field) {
return doc.getField(field);
}
public void addField(String name, Object value, float boost) {
doc.addField(name, value, boost);
}
public String toString() {
return doc.toString();
}
}
@Override
@SuppressWarnings("unchecked")
public NamedList getStatistics() {
if (importer == null)
return super.getStatistics();
DocBuilder.Statistics cumulative = importer.cumulativeStatistics;
NamedList result = new NamedList();
result.add("Status", importer.getStatus().toString());
if (importer.docBuilder != null) {
DocBuilder.Statistics running = importer.docBuilder.importStatistics;
result.add("Documents Processed", running.docCount);
result.add("Requests made to DataSource", running.queryCount);
result.add("Rows Fetched", running.rowsCount);
result.add("Documents Deleted", running.deletedDocCount);
result.add("Documents Skipped", running.skipDocCount);
}
result.add(DataImporter.MSG.TOTAL_DOC_PROCESSED, cumulative.docCount);
result.add(DataImporter.MSG.TOTAL_QUERIES_EXECUTED, cumulative.queryCount);
result.add(DataImporter.MSG.TOTAL_ROWS_EXECUTED, cumulative.rowsCount);
result.add(DataImporter.MSG.TOTAL_DOCS_DELETED, cumulative.deletedDocCount);
result.add(DataImporter.MSG.TOTAL_DOCS_SKIPPED, cumulative.skipDocCount);
NamedList requestStatistics = super.getStatistics();
if (requestStatistics != null) {
for (int i = 0; i < requestStatistics.size(); i++) {
result.add(requestStatistics.getName(i), requestStatistics.getVal(i));
}
}
return result;
}
// //////////////////////SolrInfoMBeans methods //////////////////////
@Override
public String getDescription() {
return DataImporter.MSG.JMX_DESC;
}
@Override
public String getSourceId() {
return "$Id$";
}
@Override
public String getVersion() {
return "1.0";
}
@Override
public String getSource() {
return "$URL$";
}
public static final String ENABLE_DEBUG = "enableDebug";
}

View File

@ -0,0 +1,63 @@
/**
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.solr.handler.dataimport;
/**
* <p>
* Exception class for all DataImportHandler exceptions
* </p>
* <p/>
* <b>This API is experimental and subject to change</b>
* <p/>
* $Id$
*
* @since solr 1.3
*/
public class DataImportHandlerException extends RuntimeException {
private int errCode;
public boolean debugged = false;
public static final int SEVERE = 500, WARN = 400, SKIP = 300;
public DataImportHandlerException(int err) {
super();
errCode = err;
}
public DataImportHandlerException(int err, String message) {
super(message + MSG + SolrWriter.getDocCount());
errCode = err;
}
public DataImportHandlerException(int err, String message, Throwable cause) {
super(message + MSG + SolrWriter.getDocCount(), cause);
errCode = err;
}
public DataImportHandlerException(int err, Throwable cause) {
super(cause);
errCode = err;
}
public int getErrCode() {
return errCode;
}
public static final String MSG = " Processing Documemt # ";
}

View File

@ -0,0 +1,544 @@
/**
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.solr.handler.dataimport;
import org.apache.solr.core.SolrConfig;
import org.apache.solr.core.SolrCore;
import org.apache.solr.schema.FieldType;
import org.apache.solr.schema.IndexSchema;
import org.apache.solr.schema.SchemaField;
import org.w3c.dom.Document;
import org.w3c.dom.Element;
import org.xml.sax.InputSource;
import javax.xml.parsers.DocumentBuilder;
import javax.xml.parsers.DocumentBuilderFactory;
import java.io.StringReader;
import java.text.SimpleDateFormat;
import java.util.*;
import java.util.concurrent.atomic.AtomicLong;
import java.util.logging.Level;
import java.util.logging.Logger;
/**
* <p>
* Stores all configuration information for pulling and indexing data.
* </p>
* <p/>
* <b>This API is experimental and subject to change</b>
*
* @version $Id$
* @since solr 1.3
*/
public class DataImporter {
public enum Status {
IDLE, RUNNING_FULL_DUMP, RUNNING_DELTA_DUMP, JOB_FAILED
}
private static final Logger LOG = Logger.getLogger(DataImporter.class
.getName());
private Status status = Status.IDLE;
private DataConfig config;
private Date lastIndexTime;
private Date indexStartTime;
private Properties store = new Properties();
private Map<String, Properties> dataSourceProps;
private IndexSchema schema;
public DocBuilder docBuilder;
public DocBuilder.Statistics cumulativeStatistics = new DocBuilder.Statistics();
public Map<String, Evaluator> evaluators;
private SolrCore core;
/**
* Only for testing purposes
*/
DataImporter() {
}
public DataImporter(String dataConfig, SolrCore core,
Map<String, Properties> ds) {
if (dataConfig == null)
throw new DataImportHandlerException(DataImportHandlerException.SEVERE,
"Configuration not found");
this.core = core;
this.schema = core.getSchema();
dataSourceProps = ds;
loadDataConfig(dataConfig);
for (DataConfig.Document document : config.documents) {
for (DataConfig.Entity e : document.entities) {
Map<String, DataConfig.Field> fields = new HashMap<String, DataConfig.Field>();
initEntity(e, fields, false);
e.implicitFields = new ArrayList<DataConfig.Field>();
String errs = verifyWithSchema(fields, e.implicitFields);
if (e.implicitFields.isEmpty())
e.implicitFields = null;
if (errs != null) {
throw new DataImportHandlerException(
DataImportHandlerException.SEVERE, errs);
}
}
}
}
private String verifyWithSchema(Map<String, DataConfig.Field> fields,
List<DataConfig.Field> autoFields) {
List<String> errors = new ArrayList<String>();
Map<String, SchemaField> schemaFields = schema.getFields();
for (Map.Entry<String, SchemaField> entry : schemaFields.entrySet()) {
SchemaField sf = entry.getValue();
if (!fields.containsKey(sf.getName())) {
if (sf.isRequired()) {
LOG
.info(sf.getName()
+ " is a required field in SolrSchema . But not found in DataConfig");
}
autoFields.add(new DataConfig.Field(sf.getName(), sf.multiValued()));
}
}
for (Map.Entry<String, DataConfig.Field> entry : fields.entrySet()) {
DataConfig.Field fld = entry.getValue();
FieldType fieldType = null;
try {
fieldType = schema.getDynamicFieldType(fld.name);
} catch (RuntimeException e) {
// Ignore because it may not be a dynamic field
}
if (fld.name != null) {
if (schema.getFields().get(fld.name) == null && fieldType == null) {
errors
.add("The field :"
+ fld.name
+ " present in DataConfig does not have a counterpart in Solr Schema");
}
} else if (schema.getFields().get(fld.column) == null
&& fieldType == null) {
LOG.info("Column : " + fld.column + " is not a schema field");
}
}
if (!errors.isEmpty()) {
StringBuffer sb = new StringBuffer("There are errors in the Schema\n");
for (String error : errors) {
sb.append(error).append("\n");
}
return sb.toString();
}
return null;
}
void loadDataConfig(String configFile) {
try {
DocumentBuilder builder = DocumentBuilderFactory.newInstance()
.newDocumentBuilder();
Document document = builder.parse(new InputSource(new StringReader(
configFile)));
config = new DataConfig();
config.readFromXml((Element) document.getElementsByTagName("dataConfig")
.item(0));
LOG.info("Data Configuration loaded successfully");
} catch (Exception e) {
SolrConfig.severeErrors.add(e);
throw new DataImportHandlerException(DataImportHandlerException.SEVERE,
"Exception occurred while initializing context", e);
}
}
private void initEntity(DataConfig.Entity e,
Map<String, DataConfig.Field> fields, boolean docRootFound) {
if (e.pk != null)
e.primaryKeys = e.pk.split(",");
e.allAttributes.put(DATA_SRC, e.dataSource);
if (!docRootFound && !"false".equals(e.docRoot)) {
// if in this chain no document root is found()
e.isDocRoot = true;
}
if (e.fields != null) {
for (DataConfig.Field f : e.fields) {
f.nameOrColName = f.getName();
SchemaField schemaField = schema.getFields().get(f.getName());
if (schemaField != null) {
f.multiValued = schemaField.multiValued();
f.allAttributes.put(MULTI_VALUED, Boolean.toString(schemaField
.multiValued()));
f.allAttributes.put(TYPE, schemaField.getType().getTypeName());
f.allAttributes.put("indexed", Boolean
.toString(schemaField.indexed()));
f.allAttributes.put("stored", Boolean.toString(schemaField.stored()));
f.allAttributes.put("defaultValue", schemaField.getDefaultValue());
} else {
try {
f.allAttributes.put(TYPE, schema.getDynamicFieldType(f.getName())
.getTypeName());
f.allAttributes.put(MULTI_VALUED, "true");
f.multiValued = true;
} catch (RuntimeException e2) {
LOG.info("Field in data-config.xml - " + f.getName()
+ " not found in schema.xml");
f.toWrite = false;
}
}
fields.put(f.getName(), f);
f.entity = e;
f.allAttributes.put("boost", f.boost.toString());
f.allAttributes.put("toWrite", Boolean.toString(f.toWrite));
e.allFieldsList.add(Collections.unmodifiableMap(f.allAttributes));
}
}
e.allFieldsList = Collections.unmodifiableList(e.allFieldsList);
e.allAttributes = Collections.unmodifiableMap(e.allAttributes);
addDataSource(e);
if (e.entities == null)
return;
for (DataConfig.Entity e1 : e.entities) {
e1.parentEntity = e;
initEntity(e1, fields, e.isDocRoot || docRootFound);
}
}
public DataConfig getConfig() {
return config;
}
public Date getIndexStartTime() {
return indexStartTime;
}
public void setIndexStartTime(Date indextStartTime) {
this.indexStartTime = indextStartTime;
}
public Date getLastIndexTime() {
return lastIndexTime;
}
public void setLastIndexTime(Date lastIndexTime) {
this.lastIndexTime = lastIndexTime;
}
public void store(Object key, Object value) {
store.put(key, value);
}
public Object retrieve(Object key) {
return store.get(key);
}
@SuppressWarnings("unchecked")
public void addDataSource(DataConfig.Entity key) {
if ("null".equals(key.dataSource)) {
key.dataSrc = new MockDataSource();
return;
}
key.dataSrc = getDataSourceInstance(key);
}
DataSource getDataSourceInstance(DataConfig.Entity key) {
Properties p = dataSourceProps.get(key.dataSource);
if (p == null)
p = config.dataSources.get(key.dataSource);
if (p == null)
p = dataSourceProps.get(null);// for default data source
if (p == null)
p = config.dataSources.get(null);
if (p == null)
throw new DataImportHandlerException(DataImportHandlerException.SEVERE,
"No dataSource :" + key.dataSource + " available for entity :"
+ key.name);
String impl = p.getProperty(TYPE);
DataSource dataSrc = null;
if (impl == null) {
dataSrc = new JdbcDataSource();
} else {
try {
dataSrc = (DataSource) DocBuilder.loadClass(impl).newInstance();
} catch (Exception e) {
throw new DataImportHandlerException(DataImportHandlerException.SEVERE,
"Invalid type for data source: " + impl, e);
}
}
try {
Properties copyProps = new Properties();
copyProps.putAll(p);
dataSrc.init(new ContextImpl(key, null, dataSrc, 0,
Collections.EMPTY_MAP, new HashMap(), null, this), copyProps);
} catch (Exception e) {
throw new DataImportHandlerException(DataImportHandlerException.SEVERE,
"Failed to initialize DataSource: " + key.dataSource, e);
}
return dataSrc;
}
public Status getStatus() {
return status;
}
public void setStatus(Status status) {
this.status = status;
}
public void doFullImport(SolrWriter writer, RequestParams requestParams,
Map<String, String> variables) {
LOG.info("Starting Full Import");
setStatus(Status.RUNNING_FULL_DUMP);
if (requestParams.commit)
setIndexStartTime(new Date());
try {
if (requestParams.clean)
writer.doDeleteAll();
docBuilder = new DocBuilder(this, writer, requestParams, variables);
docBuilder.execute(getConfig().documents.get(0).name);
if (!requestParams.debug)
cumulativeStatistics.add(docBuilder.importStatistics);
} catch (RuntimeException e) {
LOG.log(Level.SEVERE, "Full Import failed", e);
} finally {
setStatus(Status.IDLE);
config.clearCaches();
DocBuilder.INSTANCE.set(null);
}
}
public void doDeltaImport(SolrWriter writer, RequestParams requestParams,
Map<String, String> variables) {
LOG.info("Starting Delta Import");
setStatus(Status.RUNNING_DELTA_DUMP);
try {
if (requestParams.commit) {
Date lastModified = writer.loadIndexStartTime();
setIndexStartTime(new Date());
setLastIndexTime(lastModified);
}
docBuilder = new DocBuilder(this, writer, requestParams, variables);
docBuilder.execute(config.documents.get(0).name);
if (!requestParams.debug)
cumulativeStatistics.add(docBuilder.importStatistics);
} catch (RuntimeException e) {
LOG.log(Level.SEVERE, "Delta Import Failed", e);
} finally {
setStatus(Status.IDLE);
config.clearCaches();
DocBuilder.INSTANCE.set(null);
}
}
public void runAsync(final RequestParams reqParams, final SolrWriter sw,
final Map<String, String> variables) {
new Thread() {
@Override
public void run() {
rumCmd(reqParams, sw, variables);
}
}.start();
}
void rumCmd(RequestParams reqParams, SolrWriter sw,
Map<String, String> variables) {
String command = reqParams.command;
if (command.equals("full-import")) {
doFullImport(sw, reqParams, variables);
} else if (command.equals(DELTA_IMPORT_CMD)) {
doDeltaImport(sw, reqParams, variables);
} else if (command.equals(ABORT_CMD)) {
if (docBuilder != null)
docBuilder.abort();
}
}
@SuppressWarnings("unchecked")
Map<String, String> getStatusMessages() {
Map statusMessages = (Map) retrieve(STATUS_MSGS);
Map<String, String> result = new LinkedHashMap<String, String>();
if (statusMessages != null) {
for (Object o : statusMessages.entrySet()) {
Map.Entry e = (Map.Entry) o;
result.put((String) e.getKey(), e.getValue().toString());
}
}
return result;
}
public DocBuilder getDocBuilder() {
return docBuilder;
}
public static final ThreadLocal<AtomicLong> QUERY_COUNT = new ThreadLocal<AtomicLong>() {
protected AtomicLong initialValue() {
return new AtomicLong();
}
};
static final SimpleDateFormat DATE_TIME_FORMAT = new SimpleDateFormat(
"yyyy-MM-dd HH:mm:ss");
static final class MSG {
public static final String NO_CONFIG_FOUND = "Configuration not found";
public static final String NO_INIT = "DataImportHandler started. Not Initialized. No commands can be run";
public static final String INVALID_CONFIG = "FATAL: Could not create importer. DataImporter config invalid";
public static final String LOAD_EXP = "Exception while loading DataImporter";
public static final String JMX_DESC = "Manage data import from databases to Solr";
public static final String CMD_RUNNING = "A command is still running...";
public static final String DEBUG_NOT_ENABLED = "Debug not enabled. Add a tag <str name=\"enableDebug\">true</str> in solrconfig.xml";
public static final String CONFIG_RELOADED = "Configuration Re-loaded sucessfully";
public static final String TOTAL_DOC_PROCESSED = "Total Documents Processed";
public static final String TOTAL_QUERIES_EXECUTED = "Total Requests made to DataSource";
public static final String TOTAL_ROWS_EXECUTED = "Total Rows Fetched";
public static final String TOTAL_DOCS_DELETED = "Total Documents Deleted";
public static final String TOTAL_DOCS_SKIPPED = "Total Documents Skipped";
}
static final class RequestParams {
public String command = null;
public boolean debug = false;
public boolean verbose = false;
public boolean commit = true;
public boolean optimize = true;
public int start = 0;
public int rows = 10;
public boolean clean = true;
public List<String> entities;
public Map<String, Object> requestParams;
public String dataConfig;
public RequestParams() {
}
public RequestParams(Map<String, Object> requestParams) {
if (requestParams.containsKey("command"))
command = (String) requestParams.get("command");
if ("on".equals(requestParams.get("debug"))) {
debug = true;
// Set default values suitable for debug mode
commit = false;
clean = false;
verbose = "true".equals(requestParams.get("verbose"))
|| "on".equals(requestParams.get("verbose"));
}
if (requestParams.containsKey("commit"))
commit = Boolean.parseBoolean((String) requestParams.get("commit"));
if (requestParams.containsKey("start"))
start = Integer.parseInt((String) requestParams.get("start"));
if (requestParams.containsKey("rows"))
rows = Integer.parseInt((String) requestParams.get("rows"));
if (requestParams.containsKey("clean"))
clean = Boolean.parseBoolean((String) requestParams.get("clean"));
if (requestParams.containsKey("optimize"))
optimize = Boolean.parseBoolean((String) requestParams.get("optimize"));
Object o = requestParams.get("entity");
if (o != null && o instanceof String) {
entities = new ArrayList<String>();
entities.add((String) o);
} else if (o != null && o instanceof List) {
entities = (List<String>) requestParams.get("entity");
}
dataConfig = (String) requestParams.get("dataConfig");
if (dataConfig != null && dataConfig.trim().length() == 0) {
// Empty data-config param is not valid, change it to null
dataConfig = null;
}
this.requestParams = requestParams;
}
}
public SolrCore getCore() {
return core;
}
public static final String COLUMN = "column";
public static final String TYPE = "type";
public static final String DATA_SRC = "dataSource";
public static final String MULTI_VALUED = "multiValued";
public static final String NAME = "name";
public static final String STATUS_MSGS = "status-messages";
public static final String FULL_IMPORT_CMD = "full-import";
public static final String DELTA_IMPORT_CMD = "delta-import";
public static final String ABORT_CMD = "abort";
public static final String DEBUG_MODE = "debug";
public static final String RELOAD_CONF_CMD = "reload-config";
public static final String SHOW_CONF_CMD = "show-config";
}

View File

@ -0,0 +1,72 @@
/**
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.solr.handler.dataimport;
import java.util.Properties;
/**
* <p>
* Provides data from a source with a given query.
* </p>
* <p/>
* <p>
* Implementation of this interface must provide a default no-arg constructor
* </p>
* <p/>
* <p>
* Refer to <a
* href="http://wiki.apache.org/solr/DataImportHandler">http://wiki.apache.org/solr/DataImportHandler</a>
* for more details.
* </p>
* <p/>
* <b>This API is experimental and may change in the future.</b>
*
* @version $Id$
* @since solr 1.3
*/
public abstract class DataSource<T> {
/**
* Initializes the DataSource with the <code>Context</code> and
* initialization properties.
* <p/>
* This is invoked by the <code>DataImporter</code> after creating an
* instance of this class.
*
* @param context
* @param initProps
*/
public abstract void init(Context context, Properties initProps);
/**
* Get records for the given query.The return type depends on the
* implementation .
*
* @param query The query string. It can be a SQL for JdbcDataSource or a URL
* for HttpDataSource or a file location for FileDataSource or a custom
* format for your own custom DataSource.
* @return Depends on the implementation. For instance JdbcDataSource returns
* an Iterator<Map <String,Object>>
*/
public abstract T getData(String query);
/**
* Cleans up resources of this DataSource after use.
*/
public abstract void close();
}

View File

@ -0,0 +1,87 @@
/**
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.solr.handler.dataimport;
import java.text.ParseException;
import java.text.SimpleDateFormat;
import java.util.ArrayList;
import java.util.Date;
import java.util.List;
import java.util.Map;
import java.util.logging.Level;
import java.util.logging.Logger;
/**
* <p>
* Transformer instance which creates Date instances out of Strings.
* </p>
* <p/>
* <p>
* Refer to <a
* href="http://wiki.apache.org/solr/DataImportHandler">http://wiki.apache.org/solr/DataImportHandler</a>
* for more details.
* </p>
* <p/>
* <b>This API is experimental and subject to change</b>
*
* @version $Id$
* @since solr 1.3
*/
public class DateFormatTransformer extends Transformer {
private static final Logger LOG = Logger
.getLogger(DateFormatTransformer.class.getName());
@SuppressWarnings("unchecked")
public Object transformRow(Map<String, Object> aRow, Context context) {
for (Map<String, String> map : context.getAllEntityFields()) {
String fmt = map.get(DATE_TIME_FMT);
if (fmt == null)
continue;
String column = map.get(DataImporter.COLUMN);
String srcCol = map.get(RegexTransformer.SRC_COL_NAME);
if (srcCol == null)
srcCol = column;
try {
Object o = aRow.get(srcCol);
if (o instanceof List) {
List<String> inputs = (List<String>) o;
List<Date> results = new ArrayList<Date>();
for (String input : inputs) {
results.add(process(input, fmt));
}
aRow.put(column, results);
} else {
String value = (String) o;
aRow.put(column, process(value, fmt));
}
} catch (ParseException e) {
LOG.log(Level.WARNING, "Could not parse a Date field ", e);
}
}
return aRow;
}
private Date process(String value, String format) throws ParseException {
if (value == null || value.trim().length() == 0)
return null;
return new SimpleDateFormat(format).parse(value);
}
public static final String DATE_TIME_FMT = "dateTimeFormat";
}

View File

@ -0,0 +1,274 @@
package org.apache.solr.handler.dataimport;
import org.apache.solr.common.util.NamedList;
import java.io.PrintWriter;
import java.io.StringWriter;
import java.text.MessageFormat;
import java.util.List;
import java.util.Map;
import java.util.Properties;
import java.util.Stack;
/**
* <p>
* Implements most of the interactive development functionality
* </p>
* <p/>
* <p>
* Refer to <a
* href="http://wiki.apache.org/solr/DataImportHandler">http://wiki.apache.org/solr/DataImportHandler</a>
* for more details.
* </p>
* <p/>
* <b>This API is experimental and subject to change</b>
*
* @version $Id$
* @since solr 1.3
*/
public class DebugLogger {
private Stack<DebugInfo> debugStack;
NamedList output;
private static final String LINE = "---------------------------------------------";
private MessageFormat fmt = new MessageFormat(
"----------- row #{0}-------------");
boolean enabled = true;
public DebugLogger() {
output = new NamedList();
debugStack = new Stack<DebugInfo>() {
public DebugInfo pop() {
if (size() == 1)
throw new DataImportHandlerException(
DataImportHandlerException.SEVERE, "Stack is becoming empty");
return super.pop();
}
};
debugStack.push(new DebugInfo(null, -1, null));
output = debugStack.peek().lst;
}
private DebugInfo peekStack() {
return debugStack.isEmpty() ? null : debugStack.peek();
}
public void log(int event, String name, Object row) {
if (event == SolrWriter.DISABLE_LOGGING) {
enabled = false;
return;
} else if (event == SolrWriter.ENABLE_LOGGING) {
enabled = true;
return;
}
if (!enabled && event != SolrWriter.START_ENTITY
&& event != SolrWriter.END_ENTITY) {
return;
}
if (event == SolrWriter.START_DOC) {
debugStack.push(new DebugInfo(null, SolrWriter.START_DOC, peekStack()));
} else if (SolrWriter.START_ENTITY == event) {
debugStack
.push(new DebugInfo(name, SolrWriter.START_ENTITY, peekStack()));
} else if (SolrWriter.ENTITY_OUT == event
|| SolrWriter.PRE_TRANSFORMER_ROW == event) {
if (debugStack.peek().type == SolrWriter.START_ENTITY
|| debugStack.peek().type == SolrWriter.START_DOC) {
debugStack.peek().lst.add(null, fmt.format(new Object[]{++debugStack
.peek().rowCount}));
addToNamedList(debugStack.peek().lst, row);
debugStack.peek().lst.add(null, LINE);
}
} else if (event == SolrWriter.ROW_END) {
popAllTransformers();
} else if (SolrWriter.END_ENTITY == event) {
while (debugStack.pop().type != SolrWriter.START_ENTITY)
;
} else if (SolrWriter.END_DOC == event) {
while (debugStack.pop().type != SolrWriter.START_DOC)
;
} else if (event == SolrWriter.TRANSFORMER_EXCEPTION) {
debugStack.push(new DebugInfo(name, event, peekStack()));
debugStack.peek().lst.add("EXCEPTION",
getStacktraceString((Exception) row));
} else if (SolrWriter.TRANSFORMED_ROW == event) {
debugStack.push(new DebugInfo(name, event, peekStack()));
debugStack.peek().lst.add(null, LINE);
addToNamedList(debugStack.peek().lst, row);
debugStack.peek().lst.add(null, LINE);
if (row instanceof DataImportHandlerException) {
DataImportHandlerException dataImportHandlerException = (DataImportHandlerException) row;
dataImportHandlerException.debugged = true;
}
} else if (SolrWriter.ENTITY_META == event) {
popAllTransformers();
debugStack.peek().lst.add(name, row);
} else if (SolrWriter.ENTITY_EXCEPTION == event) {
if (row instanceof DataImportHandlerException) {
DataImportHandlerException dihe = (DataImportHandlerException) row;
if (dihe.debugged)
return;
dihe.debugged = true;
}
popAllTransformers();
debugStack.peek().lst.add("EXCEPTION",
getStacktraceString((Exception) row));
}
}
private void popAllTransformers() {
while (true) {
int type = debugStack.peek().type;
if (type == SolrWriter.START_DOC || type == SolrWriter.START_ENTITY)
break;
debugStack.pop();
}
}
private void addToNamedList(NamedList nl, Object row) {
if (row instanceof List) {
List list = (List) row;
NamedList l = new NamedList();
nl.add(null, l);
for (Object o : list) {
Map<String, Object> map = (Map<String, Object>) o;
for (Map.Entry<String, Object> entry : map.entrySet())
nl.add(entry.getKey(), entry.getValue());
}
} else if (row instanceof Map) {
Map<String, Object> map = (Map<String, Object>) row;
for (Map.Entry<String, Object> entry : map.entrySet())
nl.add(entry.getKey(), entry.getValue());
}
}
static DataSource wrapDs(final DataSource ds) {
final SolrWriter writer = DocBuilder.INSTANCE.get().writer;
return new DataSource() {
public void init(Context context, Properties initProps) {
ds.init(context, initProps);
}
public void close() {
ds.close();
}
public Object getData(String query) {
writer.log(SolrWriter.ENTITY_META, "query", query);
long start = System.currentTimeMillis();
try {
return ds.getData(query);
} catch (DataImportHandlerException de) {
DocBuilder.INSTANCE.get().writer.log(SolrWriter.ENTITY_EXCEPTION,
null, de);
throw de;
} catch (Exception e) {
DocBuilder.INSTANCE.get().writer.log(SolrWriter.ENTITY_EXCEPTION,
null, e);
DataImportHandlerException de = new DataImportHandlerException(
DataImportHandlerException.SEVERE, "", e);
de.debugged = true;
throw de;
} finally {
writer.log(SolrWriter.ENTITY_META, "time-taken", DocBuilder
.getTimeElapsedSince(start));
}
}
};
}
static Transformer wrapTransformer(final Transformer t) {
if (DocBuilder.INSTANCE.get() != null
&& DocBuilder.INSTANCE.get().verboseDebug) {
return new Transformer() {
public Object transformRow(Map<String, Object> row, Context context) {
DocBuilder.INSTANCE.get().writer.log(SolrWriter.PRE_TRANSFORMER_ROW,
null, row);
String tName = getTransformerName(t);
Object result = null;
try {
result = t.transformRow(row, context);
DocBuilder.INSTANCE.get().writer.log(SolrWriter.TRANSFORMED_ROW,
tName, result);
} catch (DataImportHandlerException de) {
DocBuilder.INSTANCE.get().writer.log(
SolrWriter.TRANSFORMER_EXCEPTION, tName, de);
de.debugged = true;
throw de;
} catch (Exception e) {
DocBuilder.INSTANCE.get().writer.log(
SolrWriter.TRANSFORMER_EXCEPTION, tName, e);
DataImportHandlerException de = new DataImportHandlerException(
DataImportHandlerException.SEVERE, "", e);
de.debugged = true;
throw de;
}
return result;
}
};
} else {
return t;
}
}
public static String getStacktraceString(Exception e) {
StringWriter sw = new StringWriter();
e.printStackTrace(new PrintWriter(sw));
return sw.toString();
}
static String getTransformerName(Transformer t) {
Class transClass = t.getClass();
if (t instanceof EntityProcessorBase.ReflectionTransformer) {
return ((EntityProcessorBase.ReflectionTransformer) t).trans;
}
if (t instanceof ScriptTransformer) {
ScriptTransformer scriptTransformer = (ScriptTransformer) t;
return "script:" + scriptTransformer.getFunctionName();
}
if (transClass.getPackage().equals(DebugLogger.class.getPackage())) {
return transClass.getSimpleName();
} else {
return transClass.getName();
}
}
private static class DebugInfo {
String name;
int tCount, rowCount;
NamedList lst;
int type;
DebugInfo parent;
public DebugInfo(String name, int type, DebugInfo parent) {
this.name = name;
this.type = type;
this.parent = parent;
lst = new NamedList();
if (parent != null) {
String displayName = null;
if (type == SolrWriter.START_ENTITY) {
displayName = "entity:" + name;
} else if (type == SolrWriter.TRANSFORMED_ROW
|| type == SolrWriter.TRANSFORMER_EXCEPTION) {
displayName = "transformer:" + name;
} else if (type == SolrWriter.START_DOC) {
name = displayName = "document#" + SolrWriter.getDocCount();
}
parent.lst.add(displayName, lst);
}
}
}
}

View File

@ -0,0 +1,614 @@
/**
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.solr.handler.dataimport;
import java.util.*;
import java.util.concurrent.atomic.AtomicBoolean;
import java.util.concurrent.atomic.AtomicInteger;
import java.util.concurrent.atomic.AtomicLong;
import java.util.logging.Level;
import java.util.logging.Logger;
/**
* <p>
* DocBuilder is responsible for creating Solr documents out of the given
* configuration. It also maintains statistics information. It depends on the
* EntityProcessor implementations to fetch data.
* </p>
* <p/>
* <b>This API is experimental and subject to change</b>
*
* @version $Id$
* @since solr 1.3
*/
public class DocBuilder {
public static final String DOC_BOOST = "$docBoost";
private static final Logger LOG = Logger
.getLogger(DocBuilder.class.getName());
private DataImporter dataImporter;
private DataConfig.Document document;
private DataConfig.Entity root;
@SuppressWarnings("unchecked")
private Map statusMessages = new LinkedHashMap();
public Statistics importStatistics = new Statistics();
SolrWriter writer;
DataImporter.RequestParams requestParameters;
boolean verboseDebug = false;
private Map<String, String> defaultVariables;
private Map<String, Object> session = new HashMap<String, Object>();
static final ThreadLocal<DocBuilder> INSTANCE = new ThreadLocal<DocBuilder>();
public DocBuilder(DataImporter context, SolrWriter writer,
DataImporter.RequestParams reqParams, Map<String, String> variables) {
INSTANCE.set(this);
this.dataImporter = context;
this.writer = writer;
DataImporter.QUERY_COUNT.set(importStatistics.queryCount);
requestParameters = reqParams;
verboseDebug = requestParameters.debug && requestParameters.verbose;
defaultVariables = Collections.unmodifiableMap(variables);
}
public VariableResolverImpl getVariableResolver(DataImporter context) {
VariableResolverImpl resolver = new VariableResolverImpl();
Map<String, Object> indexerNamespace = new HashMap<String, Object>();
if (context.getLastIndexTime() != null)
indexerNamespace.put(LAST_INDEX_TIME, DataImporter.DATE_TIME_FORMAT
.format(context.getLastIndexTime()));
indexerNamespace.put(INDEX_START_TIME, context.getIndexStartTime());
indexerNamespace.put("request", requestParameters);
indexerNamespace.put("defaults", defaultVariables);
indexerNamespace.put("functions", EvaluatorBag.getFunctionsNamespace(resolver,
dataImporter.getConfig().evaluators));
if (context.getConfig().script != null) {
indexerNamespace
.put(DataConfig.SCRIPT, context.getConfig().script.script);
indexerNamespace.put(DataConfig.SCRIPT_LANG,
context.getConfig().script.language);
}
resolver.addNamespace(DataConfig.IMPORTER_NS, indexerNamespace);
return resolver;
}
@SuppressWarnings("unchecked")
public void execute(String docName) {
dataImporter.store(DataImporter.STATUS_MSGS, statusMessages);
document = dataImporter.getConfig().getDocumentByName(docName);
if (document == null)
return;
final AtomicLong startTime = new AtomicLong(System.currentTimeMillis());
statusMessages.put(TIME_ELAPSED, new Object() {
public String toString() {
return getTimeElapsedSince(startTime.get());
}
});
statusMessages.put(DataImporter.MSG.TOTAL_QUERIES_EXECUTED,
importStatistics.queryCount);
statusMessages.put(DataImporter.MSG.TOTAL_ROWS_EXECUTED,
importStatistics.rowsCount);
statusMessages.put(DataImporter.MSG.TOTAL_DOC_PROCESSED,
importStatistics.docCount);
statusMessages.put(DataImporter.MSG.TOTAL_DOCS_SKIPPED,
importStatistics.skipDocCount);
List<String> entities = requestParameters.entities;
for (DataConfig.Entity e : document.entities) {
if (entities != null && !entities.contains(e.name))
continue;
root = e;
if (dataImporter.getStatus() == DataImporter.Status.RUNNING_DELTA_DUMP
&& dataImporter.getLastIndexTime() != null) {
doDelta();
} else {
doFullDump();
}
statusMessages.remove(DataImporter.MSG.TOTAL_DOC_PROCESSED);
}
if (stop.get()) {
if (DataImporter.ABORT_CMD.equals(requestParameters.command)) {
// Dont commit if aborted using command=abort
statusMessages.put("Aborted", DataImporter.DATE_TIME_FORMAT
.format(new Date()));
} else if (requestParameters.commit) {
// Debug mode, commit if commit=true was specified
commit();
}
} else {
// Finished operation normally, commit now
commit();
}
statusMessages.remove(TIME_ELAPSED);
statusMessages.put("Time taken ", getTimeElapsedSince(startTime.get()));
LOG.info("Time taken = " + getTimeElapsedSince(startTime.get()));
}
@SuppressWarnings("unchecked")
private void commit() {
if (requestParameters.commit)
writer.persistIndexStartTime(dataImporter.getIndexStartTime());
LOG.info("Full Import completed successfully");
statusMessages.put("", "Indexing completed. Added/Updated: "
+ importStatistics.docCount + " documents. Deleted "
+ importStatistics.deletedDocCount + " documents.");
writer.commit(requestParameters.optimize);
addStatusMessage("Committed");
if (requestParameters.optimize)
addStatusMessage("Optimized");
}
@SuppressWarnings("unchecked")
private void doFullDump() {
addStatusMessage("Full Dump Started");
buildDocument(getVariableResolver(dataImporter), null, null, root, true,
null);
}
@SuppressWarnings("unchecked")
private void doDelta() {
addStatusMessage("Delta Dump started");
VariableResolverImpl resolver = getVariableResolver(dataImporter);
if (document.deleteQuery != null) {
writer.deleteByQuery(document.deleteQuery);
}
addStatusMessage("Identifying Delta");
LOG.info("Starting delta collection.");
Set<Map<String, Object>> deletedKeys = new HashSet<Map<String, Object>>();
Set<Map<String, Object>> allPks = collectDelta(root, null, resolver,
dataImporter, deletedKeys);
if (stop.get())
return;
addStatusMessage("Deltas Obtained");
addStatusMessage("Building documents");
if (!deletedKeys.isEmpty()) {
deleteAll(deletedKeys);
importStatistics.deletedDocCount.addAndGet(deletedKeys.size());
// Make sure that documents are not re-created
allPks.removeAll(deletedKeys);
}
statusMessages.put("Total Changed Documents", allPks.size());
for (Map<String, Object> pk : allPks) {
VariableResolverImpl vri = getVariableResolver(dataImporter);
vri.addNamespace(DataConfig.IMPORTER_NS + ".delta", pk);
buildDocument(vri, null, pk, root, true, null);
}
if (!stop.get()) {
writer.persistIndexStartTime(dataImporter.getIndexStartTime());
LOG.info("Delta Import completed successfully");
}
}
private void deleteAll(Set<Map<String, Object>> deletedKeys) {
LOG.info("Deleting stale documents ");
for (Map<String, Object> deletedKey : deletedKeys) {
writer.deleteDoc(deletedKey.get(root.pk));
}
}
@SuppressWarnings("unchecked")
public void addStatusMessage(String msg) {
statusMessages.put(msg, DataImporter.DATE_TIME_FORMAT.format(new Date()));
}
@SuppressWarnings("unchecked")
private void buildDocument(VariableResolverImpl vr, SolrWriter.SolrDoc doc,
Map<String, Object> pk, DataConfig.Entity entity, boolean isRoot,
ContextImpl parentCtx) {
EntityProcessor entityProcessor = getEntityProcessor(entity);
DataSource ds = entity.dataSrc;
if (verboseDebug) {
ds = DebugLogger.wrapDs(ds);
}
ContextImpl ctx = new ContextImpl(entity, vr, ds,
pk == null ? Context.FULL_DUMP : Context.DELTA_DUMP,
requestParameters.requestParams, session, parentCtx, dataImporter);
entityProcessor.init(ctx);
if (requestParameters.start > 0) {
writer.log(SolrWriter.DISABLE_LOGGING, null, null);
}
if (verboseDebug) {
writer.log(SolrWriter.START_ENTITY, entity.name, null);
}
int seenDocCount = 0;
try {
while (true) {
if (stop.get())
return;
try {
seenDocCount++;
if (seenDocCount > requestParameters.start) {
writer.log(SolrWriter.ENABLE_LOGGING, null, null);
}
if (verboseDebug && entity.isDocRoot) {
writer.log(SolrWriter.START_DOC, entity.name, null);
}
if (doc == null && entity.isDocRoot) {
if (ctx.getDocSession() != null)
ctx.getDocSession().clear();
else
ctx.setDocSession(new HashMap<String, Object>());
doc = writer.getSolrDocInstance();
DataConfig.Entity e = entity;
while (e.parentEntity != null) {
addFields(e.parentEntity, doc, (Map<String, Object>) vr
.resolve(e.parentEntity.name));
e = e.parentEntity;
}
}
Map<String, Object> arow = entityProcessor.nextRow();
if (arow == null)
break;
if (arow.containsKey(DOC_BOOST)) {
setDocumentBoost(doc, arow);
}
// Support for start parameter in debug mode
if (entity.isDocRoot) {
if (seenDocCount <= requestParameters.start)
continue;
}
if (verboseDebug) {
writer.log(SolrWriter.ENTITY_OUT, entity.name, arow);
}
importStatistics.rowsCount.incrementAndGet();
if (entity.fields != null && doc != null) {
addFields(entity, doc, arow);
}
if (isRoot)
vr.removeNamespace(null);
if (entity.entities != null) {
vr.addNamespace(entity.name, arow);
for (DataConfig.Entity child : entity.entities) {
buildDocument(vr, doc, null, child, false, ctx);
}
vr.removeNamespace(entity.name);
}
if (entity.isDocRoot) {
if (stop.get())
return;
boolean result = writer.upload(doc);
doc = null;
if (result)
importStatistics.docCount.incrementAndGet();
}
} catch (DataImportHandlerException e) {
if (verboseDebug) {
writer.log(SolrWriter.ENTITY_EXCEPTION, entity.name, e);
}
if (isRoot) {
if (e.getErrCode() == DataImportHandlerException.SKIP) {
importStatistics.skipDocCount.getAndIncrement();
} else {
LOG.log(Level.SEVERE, "Exception while processing: "
+ entity.name + " document : " + doc, e);
}
if (e.getErrCode() == DataImportHandlerException.SEVERE)
throw e;
} else
throw e;
} finally {
if (verboseDebug) {
writer.log(SolrWriter.ROW_END, entity.name, null);
if (entity.isDocRoot)
writer.log(SolrWriter.END_DOC, null, null);
}
}
}
} finally {
if (verboseDebug) {
writer.log(SolrWriter.END_ENTITY, null, null);
}
}
}
private void setDocumentBoost(SolrWriter.SolrDoc doc, Map<String, Object> arow) {
Object v = arow.get(DOC_BOOST);
float value = 1.0f;
if (v instanceof Number) {
value = ((Number) v).floatValue();
} else {
value = Float.parseFloat(v.toString());
}
doc.setDocumentBoost(value);
}
@SuppressWarnings("unchecked")
private void addFields(DataConfig.Entity entity, SolrWriter.SolrDoc doc,
Map<String, Object> arow) {
DataConfig.Entity parentMost = entity;
while (parentMost.parentEntity != null)
parentMost = parentMost.parentEntity;
for (DataConfig.Field field : entity.fields) {
addFieldValue(field, arow, null, doc);
}
if (parentMost.implicitFields != null) {
Map<String, Object> lowerCaseMap = new HashMap<String, Object>();
for (Map.Entry<String, Object> entry : arow.entrySet())
lowerCaseMap.put(entry.getKey().toLowerCase(), entry.getValue());
for (DataConfig.Field automaticField : parentMost.implicitFields) {
addFieldValue(automaticField, arow, lowerCaseMap, doc);
}
}
}
private void addFieldValue(DataConfig.Field field, Map<String, Object> arow,
Map<String, Object> lowerCaseMap, SolrWriter.SolrDoc doc) {
if (!field.toWrite)
return;
Object value = arow.get(field.column);
if (value == null) {
if (lowerCaseMap != null) {
value = lowerCaseMap.get(field.column.toLowerCase());
}
if (value == null)
return;
}
if (value instanceof Collection) {
Collection collection = (Collection) value;
if (field.multiValued) {
for (Object o : collection) {
doc.addField(field.nameOrColName, o, field.boost);
}
} else {
if (doc.getField(field.nameOrColName) == null)
for (Object o : collection) {
doc.addField(field.nameOrColName, o, field.boost);
break;
}
}
} else if (field.multiValued) {
doc.addField(field.nameOrColName, value, field.boost);
} else {
if (doc.getField(field.nameOrColName) == null)
doc.addField(field.nameOrColName, value, field.boost);
}
}
public static EntityProcessor getEntityProcessor(DataConfig.Entity entity) {
if (entity.processor != null)
return entity.processor;
EntityProcessor entityProcessor;
if (entity.proc == null) {
entityProcessor = new SqlEntityProcessor();
} else {
try {
entityProcessor = (EntityProcessor) loadClass(entity.proc)
.newInstance();
} catch (Exception e) {
throw new DataImportHandlerException(DataImportHandlerException.SEVERE,
"Unable to load EntityProcessor implementation for entity:"
+ entity.name, e);
}
}
return entity.processor = entityProcessor;
}
/**
* <p>
* Collects unique keys of all Solr documents for whom one or more source
* tables have been changed since the last indexed time.
* </p>
* <p>
* Note: In our definition, unique key of Solr document is the primary key of
* the top level entity (unless skipped using docRoot=false) in the Solr
* document in data-config.xml
* </p>
*
* @return an iterator to the list of keys for which Solr documents should be
* updated.
*/
@SuppressWarnings("unchecked")
public Set<Map<String, Object>> collectDelta(DataConfig.Entity entity,
DataConfig.Entity parentEntity, VariableResolverImpl resolver,
DataImporter context, Set<Map<String, Object>> deletedRows) {
if (stop.get())
return new HashSet();
Set<Map<String, Object>> myModifiedPks = new HashSet<Map<String, Object>>();
if (entity.entities != null) {
for (DataConfig.Entity entity1 : entity.entities) {
myModifiedPks.addAll(collectDelta(entity1, entity, resolver, context,
deletedRows));
}
}
// identifying the modified rows for this entities
Set<Map<String, Object>> deltaSet = new HashSet<Map<String, Object>>();
resolver.addNamespace(null, (Map) entity.allAttributes);
EntityProcessor entityProcessor = getEntityProcessor(entity);
entityProcessor.init(new ContextImpl(entity, resolver, entity.dataSrc,
Context.FIND_DELTA, requestParameters.requestParams, session, null,
dataImporter));
LOG.info("Running ModifiedRowKey() for Entity: " + entity.name);
int count = 0;
while (true) {
Map<String, Object> row = entityProcessor.nextModifiedRowKey();
if (row == null)
break;
deltaSet.add(row);
count++;
importStatistics.rowsCount.incrementAndGet();
}
LOG.info("Completed ModifiedRowKey for Entity: " + entity.name
+ " rows obtained : " + count);
count = 0;
// identifying the deleted rows from this entities
LOG.info("Running DeletedRowKey() for Entity: " + entity.name);
Set<Map<String, Object>> deletedSet = new HashSet<Map<String, Object>>();
while (true) {
Map<String, Object> row = entityProcessor.nextDeletedRowKey();
if (row == null)
break;
deletedSet.add(row);
count++;
importStatistics.rowsCount.incrementAndGet();
}
LOG.info("Completed DeletedRowKey for Entity: " + entity.name
+ " rows obtained : " + count);
myModifiedPks.addAll(deltaSet);
Set<Map<String, Object>> parentKeyList = new HashSet<Map<String, Object>>();
if (parentEntity != null && parentEntity.isDocRoot) {
EntityProcessor parentEntityProcessor = getEntityProcessor(parentEntity);
parentEntityProcessor.init(new ContextImpl(parentEntity, resolver,
parentEntity.dataSrc, Context.FIND_DELTA,
requestParameters.requestParams, session, null, dataImporter));
// identifying deleted rows with deltas
for (Map<String, Object> row : myModifiedPks)
getModifiedParentRows(resolver.addNamespace(entity.name, row),
entity.name, parentEntityProcessor, parentKeyList);
// running the same for deletedrows
for (Map<String, Object> row : deletedSet) {
getModifiedParentRows(resolver.addNamespace(entity.name, row),
entity.name, parentEntityProcessor, parentKeyList);
}
}
LOG.info("Completed parentDeltaQuery for Entity: " + entity.name);
if (entity.isDocRoot)
deletedRows.addAll(deletedSet);
return entity.isDocRoot ? myModifiedPks : new HashSet<Map<String, Object>>(
parentKeyList);
}
private void getModifiedParentRows(VariableResolverImpl resolver,
String entity, EntityProcessor entityProcessor,
Set<Map<String, Object>> parentKeyList) {
try {
while (true) {
Map<String, Object> parentRow = entityProcessor
.nextModifiedParentRowKey();
if (parentRow == null)
break;
parentKeyList.add(parentRow);
importStatistics.rowsCount.incrementAndGet();
}
} finally {
resolver.removeNamespace(entity);
}
}
public void abort() {
stop.set(true);
}
private AtomicBoolean stop = new AtomicBoolean(false);
public static final String TIME_ELAPSED = "Time Elapsed";
public static void main(String[] args) throws InterruptedException {
long l = System.currentTimeMillis();
Thread.sleep(1050);
System.out.println(getTimeElapsedSince(l));
}
static String getTimeElapsedSince(long l) {
l = System.currentTimeMillis() - l;
return (l / (60000 * 60)) % 60 + ":" + (l / 60000) % 60 + ":" + (l / 1000)
% 60 + "." + l % 1000;
}
@SuppressWarnings("unchecked")
static Class loadClass(String name) throws ClassNotFoundException {
DocBuilder inst = INSTANCE.get();
try {
return inst != null ?
inst.writer.loadClass(name) :
Class.forName(name);
} catch (ClassNotFoundException e) {
try {
String n = DocBuilder.class.getPackage().getName() + "." + name;
return inst != null ?
inst.writer.loadClass(n) :
Class.forName(n);
} catch (ClassNotFoundException e1) {
throw e;
}
}
}
public static class Statistics {
public AtomicInteger docCount = new AtomicInteger();
public AtomicInteger deletedDocCount = new AtomicInteger();
public AtomicLong rowsCount = new AtomicLong();
public AtomicLong queryCount = new AtomicLong();
public AtomicLong skipDocCount = new AtomicLong();
public Statistics add(Statistics stats) {
this.docCount.addAndGet(stats.docCount.get());
this.deletedDocCount.addAndGet(stats.deletedDocCount.get());
this.rowsCount.addAndGet(stats.rowsCount.get());
this.queryCount.addAndGet(stats.queryCount.get());
return this;
}
}
public static final String LAST_INDEX_TIME = "last_index_time";
public static final String INDEX_START_TIME = "index_start_time";
}

View File

@ -0,0 +1,96 @@
/**
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.solr.handler.dataimport;
import java.util.Map;
/**
* <p>
* An instance of entity processor serves an entity. It is reused throughout the
* import process.
* </p>
* <p/>
* <p>
* Implementations of this interface must provide a public no-args constructor.
* </p>
* <p/>
* <p>
* Refer to <a
* href="http://wiki.apache.org/solr/DataImportHandler">http://wiki.apache.org/solr/DataImportHandler</a>
* for more details.
* </p>
* <p/>
* <b>This API is experimental and may change in the future.</b>
*
* @version $Id$
* @since solr 1.3
*/
public abstract class EntityProcessor {
/**
* This method is called when it starts processing an entity. When it comes
* back to the entity it is called again. So it can reset anything at that point.
* For a rootmost entity this is called only once for an ingestion. For sub-entities , this
* is called multiple once for each row from its parent entity
*
* @param context The current context
*/
public abstract void init(Context context);
/**
* This method helps streaming the data for each row . The implementation
* would fetch as many rows as needed and gives one 'row' at a time. Only this
* method is used during a full import
*
* @return A 'row' . The 'key' for the map is the column name and the 'value'
* is the value of that column. If there are no more rows to be
* returned, return 'null'
*/
public abstract Map<String, Object> nextRow();
/**
* This is used for delta-import. It gives the pks of the changed rows in this
* entity
*
* @return the pk vs value of all changed rows
*/
public abstract Map<String, Object> nextModifiedRowKey();
/**
* This is used during delta-import. It gives the primary keys of the rows
* that are deleted from this entity. If this entity is the root entity, solr
* document is deleted. If this is a sub-entity, the solr document is
* considered as 'changed' and will be recreated
*
* @return the pk vs value of all changed rows
*/
public abstract Map<String, Object> nextDeletedRowKey();
/**
* This is used during delta-import. This gives the primary keys and their
* values of all the rows changed in a parent entity due to changes in this
* entity.
*
* @return the pk vs value of all changed rows in the parent entity
*/
public abstract Map<String, Object> nextModifiedParentRowKey();
/**
* Invoked when the Entity processor is detroyed. towards the end of injestion. Called only once
*/
public abstract void destroy();
}

View File

@ -0,0 +1,423 @@
/**
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.solr.handler.dataimport;
import java.lang.reflect.Method;
import java.util.*;
import java.util.logging.Level;
import java.util.logging.Logger;
/**
* <p>
* Base class for all implementations of EntityProcessor
* </p>
* <p/>
* <p>
* Most implementations of EntityProcessor extend this base class which provides
* common functionality.
* </p>
* <p/>
* <b>This API is experimental and subject to change</b>
*
* @version $Id$
* @since solr 1.3
*/
public class EntityProcessorBase extends EntityProcessor {
private static final Logger LOG = Logger.getLogger(EntityProcessorBase.class
.getName());
protected String entityName;
protected Context context;
protected VariableResolverImpl resolver;
protected Iterator<Map<String, Object>> rowIterator;
protected List<Transformer> transformers;
protected List<Map<String, Object>> rowcache;
protected String query;
@SuppressWarnings("unchecked")
private Map session;
public void init(Context context) {
rowIterator = null;
rowcache = null;
this.context = context;
entityName = context.getEntityAttribute("name");
resolver = (VariableResolverImpl) context.getVariableResolver();
query = null;
session = null;
}
@SuppressWarnings("unchecked")
void loadTransformers() {
String transClasses = context.getEntityAttribute(TRANSFORMER);
if (transClasses == null) {
transformers = Collections.EMPTY_LIST;
return;
}
String[] transArr = transClasses.split(",");
transformers = new ArrayList<Transformer>() {
public boolean add(Transformer transformer) {
return super.add(DebugLogger.wrapTransformer(transformer));
}
};
for (String aTransArr : transArr) {
String trans = aTransArr.trim();
if (trans.startsWith("script:")) {
String functionName = trans.substring("script:".length());
ScriptTransformer scriptTransformer = new ScriptTransformer();
scriptTransformer.setFunctionName(functionName);
transformers.add(scriptTransformer);
continue;
}
try {
Class clazz = DocBuilder.loadClass(trans);
if (clazz.newInstance() instanceof Transformer) {
transformers.add((Transformer) clazz.newInstance());
} else {
final Method meth = clazz.getMethod(TRANSFORM_ROW, Map.class);
if (meth == null) {
String msg = "Transformer :"
+ trans
+ "does not implement Transformer interface or does not have a transformRow(Map m)method";
LOG.log(Level.SEVERE, msg);
throw new DataImportHandlerException(
DataImportHandlerException.SEVERE, msg);
}
transformers.add(new ReflectionTransformer(meth, clazz, trans));
}
} catch (Exception e) {
LOG.log(Level.SEVERE, "Unable to load Transformer: " + aTransArr, e);
throw new DataImportHandlerException(DataImportHandlerException.SEVERE,
e);
}
}
}
@SuppressWarnings("unchecked")
static class ReflectionTransformer extends Transformer {
final Method meth;
final Class clazz;
final String trans;
final Object o;
public ReflectionTransformer(Method meth, Class clazz, String trans)
throws Exception {
this.meth = meth;
this.clazz = clazz;
this.trans = trans;
o = clazz.newInstance();
}
public Object transformRow(Map<String, Object> aRow, Context context) {
try {
return meth.invoke(o, aRow);
} catch (Exception e) {
LOG.log(Level.WARNING, "method invocation failed on transformer : "
+ trans, e);
throw new DataImportHandlerException(DataImportHandlerException.WARN, e);
}
}
}
protected Map<String, Object> getFromRowCache() {
Map<String, Object> r = rowcache.remove(0);
if (rowcache.isEmpty())
rowcache = null;
return r;
}
@SuppressWarnings("unchecked")
protected Map<String, Object> applyTransformer(Map<String, Object> row) {
if (transformers == null)
loadTransformers();
if (transformers == Collections.EMPTY_LIST)
return row;
Map<String, Object> transformedRow = row;
List<Map<String, Object>> rows = null;
for (Transformer t : transformers) {
try {
if (rows != null) {
List<Map<String, Object>> tmpRows = new ArrayList<Map<String, Object>>();
for (Map<String, Object> map : rows) {
Object o = t.transformRow(map, context);
if (o == null)
continue;
if (o instanceof Map) {
Map oMap = (Map) o;
checkSkipDoc(oMap, t);
tmpRows.add((Map) o);
} else if (o instanceof List) {
tmpRows.addAll((List) o);
} else {
LOG
.log(Level.SEVERE,
"Transformer must return Map<String, Object> or a List<Map<String, Object>>");
}
}
rows = tmpRows;
} else {
Object o = t.transformRow(transformedRow, context);
if (o == null)
return null;
if (o instanceof Map) {
Map oMap = (Map) o;
checkSkipDoc(oMap, t);
transformedRow = (Map) o;
} else if (o instanceof List) {
rows = (List) o;
} else {
LOG
.log(Level.SEVERE,
"Transformer must return Map<String, Object> or a List<Map<String, Object>>");
}
}
} catch (DataImportHandlerException e) {
throw e;
} catch (Exception e) {
LOG.log(Level.WARNING, "transformer threw error", e);
throw new DataImportHandlerException(DataImportHandlerException.WARN, e);
}
}
if (rows == null) {
return transformedRow;
} else {
rowcache = rows;
return getFromRowCache();
}
}
private void checkSkipDoc(Map oMap, Transformer t) {
if (oMap.get(SKIP_DOC) != null
&& Boolean.parseBoolean(oMap.get(SKIP_DOC).toString()))
throw new DataImportHandlerException(DataImportHandlerException.SKIP,
"Document skipped by: " + DebugLogger.getTransformerName(t));
}
protected Map<String, Object> getNext() {
try {
if (rowIterator == null)
return null;
if (rowIterator.hasNext())
return rowIterator.next();
rowIterator = null;
query = null;
return null;
} catch (Exception e) {
LOG.log(Level.SEVERE, "getNext() failed for query '" + query + "'", e);
rowIterator = null;
query = null;
throw new DataImportHandlerException(DataImportHandlerException.WARN, e);
}
}
public Map<String, Object> nextModifiedRowKey() {
return null;
}
public Map<String, Object> nextDeletedRowKey() {
return null;
}
public Map<String, Object> nextModifiedParentRowKey() {
return null;
}
public void setSessionAttribute(Object key, Object val) {
if (session == null) {
session = new HashMap();
}
session.put(key, val);
}
public Object getSessionAttribute(Object key) {
if (session == null)
return null;
return session.get(key);
}
/**
* For a simple implementation, this is the only method that the sub-class
* should implement. This is intended to stream rows one-by-one. Return null
* to signal end of rows
*
* @return a row where the key is the name of the field and value can be any
* Object or a Collection of objects. Return null to signal end of
* rows
*/
public Map<String, Object> nextRow() {
return null;// do not do anything
}
public void destroy() {
/*no op*/
}
/**
* Clears the internal session maintained by this EntityProcessor
*/
public void clearSession() {
if (session != null)
session.clear();
}
/**
* Only used by cache implementations
*/
protected String cachePk;
/**
* Only used by cache implementations
*/
protected String cacheVariableName;
/**
* Only used by cache implementations
*/
protected Map<String, List<Map<String, Object>>> simpleCache;
/**
* Only used by cache implementations
*/
protected Map<String, Map<Object, List<Map<String, Object>>>> cacheWithWhereClause;
protected List<Map<String, Object>> dataSourceRowCache;
/**
* Only used by cache implementations
*/
protected void cacheInit() {
if (simpleCache != null || cacheWithWhereClause != null)
return;
String where = context.getEntityAttribute("where");
if (where == null) {
simpleCache = new HashMap<String, List<Map<String, Object>>>();
} else {
String[] splits = where.split("=");
cachePk = splits[0];
cacheVariableName = splits[1].trim();
cacheWithWhereClause = new HashMap<String, Map<Object, List<Map<String, Object>>>>();
}
}
/**
* If the where clause is present the cache is sql Vs Map of key Vs List of
* Rows. Only used by cache implementations.
*
* @param query
* @return
*/
protected Map<String, Object> getIdCacheData(String query) {
Map<Object, List<Map<String, Object>>> rowIdVsRows = cacheWithWhereClause
.get(query);
List<Map<String, Object>> rows = null;
Object key = resolver.resolve(cacheVariableName);
if (rowIdVsRows != null) {
rows = rowIdVsRows.get(key);
if (rows == null)
return null;
dataSourceRowCache = new ArrayList<Map<String, Object>>(rows);
return getFromRowCacheTransformed();
} else {
rows = getAllNonCachedRows();
if (rows.isEmpty()) {
return null;
} else {
rowIdVsRows = new HashMap<Object, List<Map<String, Object>>>();
for (Map<String, Object> row : rows) {
Object k = row.get(cachePk);
if (rowIdVsRows.get(k) == null)
rowIdVsRows.put(k, new ArrayList<Map<String, Object>>());
rowIdVsRows.get(k).add(row);
}
cacheWithWhereClause.put(query, rowIdVsRows);
if (!rowIdVsRows.containsKey(key))
return null;
dataSourceRowCache = new ArrayList<Map<String, Object>>(rowIdVsRows.get(key));
if (dataSourceRowCache.isEmpty()) {
dataSourceRowCache = null;
return null;
}
return getFromRowCacheTransformed();
}
}
}
/**
* Get all the rows from the the datasource for the given query. Only used by
* cache implementations.
* <p/>
* This <b>must</b> be implemented by sub-classes which intend to provide a
* cached implementation
*
* @return
*/
protected List<Map<String, Object>> getAllNonCachedRows() {
return Collections.EMPTY_LIST;
}
/**
* If where clause is not present the cache is a Map of query vs List of Rows.
* Only used by cache implementations.
*
* @return
*/
protected Map<String, Object> getSimplCacheData(String query) {
List<Map<String, Object>> rows = simpleCache.get(query);
if (rows != null) {
dataSourceRowCache = new ArrayList<Map<String, Object>>(rows);
return getFromRowCacheTransformed();
} else {
rows = getAllNonCachedRows();
if (rows.isEmpty()) {
return null;
} else {
dataSourceRowCache = new ArrayList<Map<String, Object>>(rows);
simpleCache.put(query, rows);
return getFromRowCacheTransformed();
}
}
}
protected Map<String, Object> getFromRowCacheTransformed() {
Map<String, Object> r = dataSourceRowCache.remove(0);
if (dataSourceRowCache.isEmpty())
dataSourceRowCache = null;
return r == null ? null : applyTransformer(r);
}
public static final String TRANSFORMER = "transformer";
public static final String TRANSFORM_ROW = "transformRow";
public static final String SKIP_DOC = "$skipDoc";
}

View File

@ -0,0 +1,49 @@
/**
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.solr.handler.dataimport;
/**
* <p>
* Pluggable functions for resolving variables
* </p>
* <p/>
* <p>
* Implementations of this interface must provide a public no-arg constructor.
* </p>
* <p/>
* <p>
* Refer to <a
* href="http://wiki.apache.org/solr/DataImportHandler">http://wiki.apache.org/solr/DataImportHandler</a>
* for more details.
* </p>
* <p/>
* <b>This API is experimental and may change in the future.</b>
*
* @version $Id$
* @since solr 1.3
*/
public abstract class Evaluator {
/**
* Return a String after processing an expression and a VariableResolver
*
* @param resolver
* @param expression
* @return
*/
public abstract String evaluate(VariableResolver resolver, String expression);
}

View File

@ -0,0 +1,212 @@
package org.apache.solr.handler.dataimport;
import org.apache.solr.util.DateMathParser;
import java.io.UnsupportedEncodingException;
import java.net.URLEncoder;
import java.text.ParseException;
import java.text.SimpleDateFormat;
import java.util.*;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
/**
* <p>
* Holds definitions for evaluators provided by DataImportHandler
* </p>
* <p/>
* <p>
* Refer to <a
* href="http://wiki.apache.org/solr/DataImportHandler">http://wiki.apache.org/solr/DataImportHandler</a>
* for more details.
* </p>
* <p/>
* <b>This API is experimental and may change in the future.</b>
*
* @version $Id$
* @since solr 1.3
*/
public class EvaluatorBag {
public static final String DATE_FORMAT_EVALUATOR = "formatDate";
public static final String URL_ENCODE_EVALUATOR = "encodeUrl";
public static final String SQL_ESCAPE_EVALUATOR = "escapeSql";
static final Pattern FORMAT_METHOD = Pattern
.compile("^(\\w*?)\\((.*?)\\)$");
/**
* <p>
* Returns an <code>Evaluator</code> instance meant to be used for escaping
* values in SQL queries.
* </p>
* <p>
* It escapes the value of the given expression by replacing all occurrences
* of single-quotes by two single-quotes and similarily for double-quotes
* </p>
*
* @return
*/
public static Evaluator getSqlEscapingEvaluator() {
return new Evaluator() {
public String evaluate(VariableResolver resolver, String expression) {
Object o = resolver.resolve(expression);
if (o == null)
return null;
return o.toString().replaceAll("'", "''").replaceAll("\"", "\"\"");
}
};
}
/**
* <p>
* Returns an <code>Evaluator</code> instance capable of URL-encoding
* expressions. The expressions are evaluated using a
* <code>VariableResolver</code>
* </p>
*
* @return an <code>Evaluator</code> instance capable of URL-encoding
* expressions.
*/
public static Evaluator getUrlEvaluator() {
return new Evaluator() {
public String evaluate(VariableResolver resolver, String expression) {
Object value = null;
try {
value = resolver.resolve(expression);
if (value == null)
return null;
return URLEncoder.encode(value.toString(), "UTF-8");
} catch (UnsupportedEncodingException e) {
throw new DataImportHandlerException(
DataImportHandlerException.SEVERE,
"Unable to encode expression: " + expression + " with value: "
+ value, e);
}
}
};
}
/**
* <p>
* Returns an <code>Evaluator</code> instance capable of formatting values
* using a given date format.
* </p>
* <p>
* The value to be formatted can be a entity.field or a date expression parsed
* with <code>DateMathParser</code> class. If the value is in single quotes,
* then it is assumed to be a datemath expression, otherwise it resolved using
* a <code>VariableResolver</code> instance
* </p>
*
* @return an Evaluator instance capable of formatting values to a given date
* format
* @see DateMathParser
*/
public static Evaluator getDateFormatEvaluator() {
return new Evaluator() {
public String evaluate(VariableResolver resolver, String expression) {
CacheEntry e = getCachedData(expression);
String expr = e.key;
SimpleDateFormat fmt = e.format;
Matcher m = IN_SINGLE_QUOTES.matcher(expr);
if (m.find()) {
String datemathExpr = m.group(1);
try {
Date date = dateMathParser.parseMath(datemathExpr);
return fmt.format(date);
} catch (ParseException exp) {
throw new DataImportHandlerException(
DataImportHandlerException.SEVERE,
"Invalid expression for date", exp);
}
} else {
Object o = resolver.resolve(expr);
if (o == null)
return "";
Date date = null;
if (o instanceof Date) {
date = (Date) o;
} else {
String s = o.toString();
try {
date = DataImporter.DATE_TIME_FORMAT.parse(s);
} catch (ParseException exp) {
throw new DataImportHandlerException(
DataImportHandlerException.SEVERE,
"Invalid expression for date", exp);
}
}
return fmt.format(date);
}
}
private CacheEntry getCachedData(String str) {
CacheEntry result = cache.get(str);
if (result != null)
return result;
Matcher m = FORMAT_METHOD.matcher(str);
String expr, pattern;
if (m.find()) {
expr = m.group(1).trim();
if (IN_SINGLE_QUOTES.matcher(expr).find()) {
expr = expr.replaceAll("NOW", "");
}
pattern = m.group(2).trim();
cache.put(str, new CacheEntry(expr, new SimpleDateFormat(pattern)));
return cache.get(str);
} else {
throw new DataImportHandlerException(
DataImportHandlerException.SEVERE, "Invalid format String : "
+ "${dataimporter.functions." + str + "}");
}
}
Map<String, CacheEntry> cache = new HashMap<String, CacheEntry>();
Pattern FORMAT_METHOD = Pattern.compile("^(.*?),(.*?)$");
};
}
static Map<String, Object> getFunctionsNamespace(
final VariableResolver resolver, final Map<String, Evaluator> evaluators) {
return new HashMap<String, Object>() {
@Override
public String get(Object key) {
if (key == null)
return null;
Matcher m = FORMAT_METHOD.matcher((String) key);
if (!m.find())
return null;
String fname = m.group(1);
Evaluator evaluator = evaluators.get(fname);
if (evaluator == null)
return null;
return evaluator.evaluate(resolver, m.group(2));
}
};
}
static class CacheEntry {
public String key;
public SimpleDateFormat format;
public CacheEntry(String key, SimpleDateFormat format) {
this.key = key;
this.format = format;
}
}
static Pattern IN_SINGLE_QUOTES = Pattern.compile("^'(.*?)'$");
static DateMathParser dateMathParser = new DateMathParser(TimeZone
.getDefault(), Locale.getDefault());
}

View File

@ -0,0 +1,103 @@
/**
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.solr.handler.dataimport;
import java.io.*;
import java.util.Properties;
/**
* <p>
* A DataSource which reads from local files
* </p>
* <p>
* The file is read with the default platform encoding. It can be overriden by
* specifying the encoding in solrconfig.xml
* </p>
* <p/>
* <p>
* Refer to <a
* href="http://wiki.apache.org/solr/DataImportHandler">http://wiki.apache.org/solr/DataImportHandler</a>
* for more details.
* </p>
* <p/>
* <b>This API is experimental and may change in the future.</b>
*
* @version $Id$
* @since solr 1.3
*/
public class FileDataSource extends DataSource<Reader> {
public static final String BASE_PATH = "basePath";
private String basePath;
private String encoding = null;
public void init(Context context, Properties initProps) {
basePath = initProps.getProperty(BASE_PATH);
if (initProps.get(HttpDataSource.ENCODING) != null)
encoding = initProps.getProperty(HttpDataSource.ENCODING);
}
/**
* <p>
* Returns a reader for the given file.
* </p>
* <p>
* If the given file is not absolute, we try to construct an absolute path
* using basePath configuration. If that fails, then the relative path is
* tried. If file is not found a RuntimeException is thrown.
* </p>
* <p>
* <b>It is the responsibility of the calling method to properly close the
* returned Reader</b>
* </p>
*/
public Reader getData(String query) {
try {
File file0 = new File(query);
File file = file0;
if (!file.isAbsolute())
file = new File(basePath + query);
if (file.isFile() && file.canRead()) {
return openStream(file);
} else if (file != file0)
if (file0.isFile() && file0.canRead())
return openStream(file0);
throw new FileNotFoundException("Could not find file: " + query);
} catch (UnsupportedEncodingException e) {
throw new RuntimeException(e);
} catch (FileNotFoundException e) {
throw new RuntimeException(e);
}
}
private InputStreamReader openStream(File file) throws FileNotFoundException,
UnsupportedEncodingException {
if (encoding == null) {
return new InputStreamReader(new FileInputStream(file));
} else {
return new InputStreamReader(new FileInputStream(file), encoding);
}
}
public void close() {
}
}

View File

@ -0,0 +1,225 @@
/**
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.solr.handler.dataimport;
import java.io.File;
import java.io.FilenameFilter;
import java.text.ParseException;
import java.util.*;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
/**
* <p>
* An EntityProcessor instance which can stream file names found in a given base
* directory matching patterns and returning rows containing file information.
* </p>
* <p/>
* <p>
* It supports querying a give base directory by matching:
* <ul>
* <li>regular expressions to file names</li>
* <li>excluding certain files based on regular expression</li>
* <li>last modification date (newer or older than a given date or time)</li>
* <li>size (bigger or smaller than size given in bytes)</li>
* <li>recursively iterating through sub-directories</li>
* </ul>
* Its output can be used along with FileDataSource to read from files in file
* systems.
* </p>
* <p/>
* <p>
* Refer to <a
* href="http://wiki.apache.org/solr/DataImportHandler">http://wiki.apache.org/solr/DataImportHandler</a>
* for more details.
* </p>
* <p/>
* <b>This API is experimental and may change in the future.</b>
*
* @version $Id$
* @since solr 1.3
*/
public class FileListEntityProcessor extends EntityProcessorBase {
private String fileName, baseDir, excludes;
private Date newerThan, olderThan;
private long biggerThan = -1, smallerThan = -1;
private boolean recursive = false;
private Pattern fileNamePattern, excludesPattern;
public void init(Context context) {
super.init(context);
fileName = context.getEntityAttribute(FILE_NAME);
if (fileName != null) {
fileName = resolver.replaceTokens(fileName);
fileNamePattern = Pattern.compile(fileName);
}
baseDir = context.getEntityAttribute(BASE_DIR);
if (baseDir == null)
throw new DataImportHandlerException(DataImportHandlerException.SEVERE,
"'baseDir' is a required attribute");
baseDir = resolver.replaceTokens(baseDir);
File dir = new File(baseDir);
if (!dir.isDirectory())
throw new DataImportHandlerException(DataImportHandlerException.SEVERE,
"'baseDir' should point to a directory");
String r = context.getEntityAttribute(RECURSIVE);
if (r != null)
recursive = Boolean.parseBoolean(r);
excludes = context.getEntityAttribute(EXCLUDES);
if (excludes != null)
excludes = resolver.replaceTokens(excludes);
if (excludes != null)
excludesPattern = Pattern.compile(excludes);
}
private Date getDate(String dateStr) {
if (dateStr == null)
return null;
Matcher m = PLACE_HOLDER_PATTERN.matcher(dateStr);
if (m.find()) {
return (Date) resolver.resolve(dateStr);
}
m = EvaluatorBag.IN_SINGLE_QUOTES.matcher(dateStr);
if (m.find()) {
String expr = null;
expr = m.group(1).replaceAll("NOW", "");
try {
return EvaluatorBag.dateMathParser.parseMath(expr);
} catch (ParseException exp) {
throw new DataImportHandlerException(DataImportHandlerException.SEVERE,
"Invalid expression for date", exp);
}
}
try {
return DataImporter.DATE_TIME_FORMAT.parse(dateStr);
} catch (ParseException exp) {
throw new DataImportHandlerException(DataImportHandlerException.SEVERE,
"Invalid expression for date", exp);
}
}
public Map<String, Object> nextRow() {
if (rowIterator != null)
return getAndApplyTrans();
List<Map<String, Object>> fileDetails = new ArrayList<Map<String, Object>>();
File dir = new File(baseDir);
String dateStr = context.getEntityAttribute(NEWER_THAN);
newerThan = getDate(dateStr);
dateStr = context.getEntityAttribute(OLDER_THAN);
olderThan = getDate(dateStr);
getFolderFiles(dir, fileDetails);
rowIterator = fileDetails.iterator();
return getAndApplyTrans();
}
private Map<String, Object> getAndApplyTrans() {
if (rowcache != null)
return getFromRowCache();
while (true) {
Map<String, Object> r = getNext();
if (r == null)
return null;
r = applyTransformer(r);
if (r != null)
return r;
}
}
private void getFolderFiles(File dir,
final List<Map<String, Object>> fileDetails) {
dir.list(new FilenameFilter() {
public boolean accept(File dir, String name) {
if (fileNamePattern == null) {
addDetails(fileDetails, dir, name);
return false;
}
if (fileNamePattern.matcher(name).find()) {
if (excludesPattern != null && excludesPattern.matcher(name).find())
return false;
addDetails(fileDetails, dir, name);
}
return false;
}
});
}
private void addDetails(List<Map<String, Object>> files, File dir, String name) {
Map<String, Object> details = new HashMap<String, Object>();
File aFile = new File(dir, name);
if (aFile.isDirectory()) {
if (!recursive)
return;
getFolderFiles(aFile, files);
return;
}
long sz = aFile.length();
Date lastModified = new Date(aFile.lastModified());
if (biggerThan != -1 && sz <= biggerThan)
return;
if (smallerThan != -1 && sz >= smallerThan)
return;
if (olderThan != null && lastModified.after(olderThan))
return;
if (newerThan != null && lastModified.before(newerThan))
return;
details.put(DIR, dir.getAbsolutePath());
details.put(FILE, name);
details.put(ABSOLUTE_FILE, aFile.getAbsolutePath());
details.put(SIZE, sz);
details.put(LAST_MODIFIED, lastModified);
files.add(details);
}
public static final Pattern PLACE_HOLDER_PATTERN = Pattern
.compile("\\$\\{.*?\\}");
public static final String DIR = "fileDir";
public static final String FILE = "file";
public static final String ABSOLUTE_FILE = "fileAbsolutePath";
public static final String SIZE = "fileSize";
public static final String LAST_MODIFIED = "fileLastModified";
public static final String FILE_NAME = "fileName";
public static final String BASE_DIR = "baseDir";
public static final String EXCLUDES = "excludes";
public static final String NEWER_THAN = "newerThan";
public static final String OLDER_THAN = "olderThan";
public static final String BIGGER_THAN = "biggerThan";
public static final String SMALLER_THAN = "smallerThan";
public static final String RECURSIVE = "recursive";
}

View File

@ -0,0 +1,139 @@
/**
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.solr.handler.dataimport;
import java.io.InputStream;
import java.io.InputStreamReader;
import java.io.Reader;
import java.net.URL;
import java.net.URLConnection;
import java.util.Properties;
import java.util.logging.Level;
import java.util.logging.Logger;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
/**
* <p>
* A data source implementation which can be used to read character files using
* HTTP.
* </p>
* <p/>
* <p>
* Refer to <a
* href="http://wiki.apache.org/solr/DataImportHandler">http://wiki.apache.org/solr/DataImportHandler</a>
* for more details.
* </p>
* <p/>
* <b>This API is experimental and may change in the future.</b>
*
* @version $Id$
* @since solr 1.3
*/
public class HttpDataSource extends DataSource<Reader> {
Logger LOG = Logger.getLogger(HttpDataSource.class.getName());
private String baseUrl;
private String encoding;
private int connectionTimeout = CONNECTION_TIMEOUT;
private int readTimeout = READ_TIMEOUT;
public HttpDataSource() {
}
public void init(Context context, Properties initProps) {
baseUrl = initProps.getProperty(BASE_URL);
if (initProps.get(ENCODING) != null)
encoding = initProps.getProperty(ENCODING);
String cTimeout = initProps.getProperty(CONNECTION_TIMEOUT_FIELD_NAME);
String rTimeout = initProps.getProperty(READ_TIMEOUT_FIELD_NAME);
if (cTimeout != null) {
try {
connectionTimeout = Integer.parseInt(cTimeout);
} catch (NumberFormatException e) {
LOG.log(Level.WARNING, "Invalid connection timeout: " + cTimeout);
}
}
if (rTimeout != null) {
try {
readTimeout = Integer.parseInt(rTimeout);
} catch (NumberFormatException e) {
LOG.log(Level.WARNING, "Invalid read timeout: " + rTimeout);
}
}
}
public Reader getData(String query) {
URL url = null;
try {
if (query.startsWith("http:")) {
url = new URL(query);
} else {
url = new URL(baseUrl + query);
}
LOG.info("Created URL to: " + url.toString());
URLConnection conn = url.openConnection();
conn.setConnectTimeout(connectionTimeout);
conn.setReadTimeout(readTimeout);
InputStream in = conn.getInputStream();
String enc = encoding;
if (enc == null) {
String cType = conn.getContentType();
if (cType != null) {
Matcher m = CHARSET_PATTERN.matcher(cType);
if (m.find()) {
enc = m.group(1);
}
}
}
if (enc == null)
enc = UTF_8;
DataImporter.QUERY_COUNT.get().incrementAndGet();
return new InputStreamReader(in, enc);
} catch (Exception e) {
LOG.log(Level.SEVERE, "Exception thrown while getting data", e);
throw new DataImportHandlerException(DataImportHandlerException.SEVERE,
"Exception in invoking url " + url, e);
}
}
public void close() {
}
private static final Pattern CHARSET_PATTERN = Pattern.compile(
".*?charset=(.*)$", Pattern.CASE_INSENSITIVE);
public static final String ENCODING = "encoding";
public static final String BASE_URL = "baseUrl";
public static final String UTF_8 = "UTF-8";
public static final String CONNECTION_TIMEOUT_FIELD_NAME = "connectionTimeout";
public static final String READ_TIMEOUT_FIELD_NAME = "readTimeout";
public static final int CONNECTION_TIMEOUT = 5000;
public static final int READ_TIMEOUT = 10000;
}

View File

@ -0,0 +1,325 @@
/**
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.solr.handler.dataimport;
import java.sql.*;
import java.util.*;
import java.util.concurrent.Callable;
import java.util.logging.Level;
import java.util.logging.Logger;
/**
* <p>
* A DataSource implementation which can fetch data using JDBC.
* </p>
* <p/>
* <p>
* Refer to <a
* href="http://wiki.apache.org/solr/DataImportHandler">http://wiki.apache.org/solr/DataImportHandler</a>
* for more details.
* </p>
* <p/>
* <b>This API is experimental and may change in the future.</b>
*
* @version $Id$
* @since solr 1.3
*/
public class JdbcDataSource extends
DataSource<Iterator<Map<String, Object>>> {
private static final Logger LOG = Logger.getLogger(JdbcDataSource.class
.getName());
private Callable<Connection> factory;
private long connLastUsed = System.currentTimeMillis();
private Connection conn;
private Map<String, Integer> fieldNameVsType = new HashMap<String, Integer>();
private boolean convertType = false;
private int batchSize = FETCH_SIZE;
public void init(Context context, Properties initProps) {
Object o = initProps.get(CONVERT_TYPE);
if (o != null)
convertType = Boolean.parseBoolean(o.toString());
createConnectionFactory(context, initProps);
try {
conn = factory.call();
} catch (Exception e) {
throw new DataImportHandlerException(DataImportHandlerException.SEVERE,
"Unable to create database connection", e);
}
String bsz = initProps.getProperty("batchSize");
if (bsz != null) {
try {
batchSize = Integer.parseInt(bsz);
if (batchSize == -1)
batchSize = Integer.MIN_VALUE;
} catch (NumberFormatException e) {
LOG.log(Level.WARNING, "Invalid batch size: " + bsz);
}
}
for (Map<String, String> map : context.getAllEntityFields()) {
String n = map.get(DataImporter.COLUMN);
String t = map.get(DataImporter.TYPE);
if ("sint".equals(t) || "integer".equals(t))
fieldNameVsType.put(n, Types.INTEGER);
else if ("slong".equals(t) || "long".equals(t))
fieldNameVsType.put(n, Types.BIGINT);
else if ("float".equals(t) || "sfloat".equals(t))
fieldNameVsType.put(n, Types.FLOAT);
else if ("double".equals(t) || "sdouble".equals(t))
fieldNameVsType.put(n, Types.DOUBLE);
else if ("date".equals(t))
fieldNameVsType.put(n, Types.DATE);
else if ("boolean".equals(t))
fieldNameVsType.put(n, Types.BOOLEAN);
else
fieldNameVsType.put(n, Types.VARCHAR);
}
}
private void createConnectionFactory(final Context context,
final Properties initProps) {
final String url = initProps.getProperty(URL);
String driver = initProps.getProperty(DRIVER);
if (url == null)
throw new DataImportHandlerException(DataImportHandlerException.SEVERE,
"JDBC URL cannot be null");
try {
if (driver != null)
Class.forName(driver);
} catch (ClassNotFoundException e) {
throw new DataImportHandlerException(DataImportHandlerException.SEVERE,
"driver could not be loaded");
}
factory = new Callable<Connection>() {
public Connection call() throws Exception {
LOG.info("Creating a connection for entity "
+ context.getEntityAttribute(DataImporter.NAME) + " with URL: "
+ url);
long start = System.currentTimeMillis();
Connection c = DriverManager.getConnection(url, initProps);
LOG.info("Time taken for getConnection(): "
+ (System.currentTimeMillis() - start));
return c;
}
};
}
public Iterator<Map<String, Object>> getData(String query) {
ResultSetIterator r = new ResultSetIterator(query);
return r.getIterator();
}
private void logError(String msg, Exception e) {
LOG.log(Level.WARNING, msg, e);
}
private List<String> readFieldNames(ResultSetMetaData metaData)
throws SQLException {
List<String> colNames = new ArrayList<String>();
int count = metaData.getColumnCount();
for (int i = 0; i < count; i++) {
colNames.add(metaData.getColumnLabel(i + 1));
}
return colNames;
}
private class ResultSetIterator {
ResultSet resultSet;
Statement stmt = null;
List<String> colNames;
Iterator<Map<String, Object>> rSetIterator;
public ResultSetIterator(String query) {
try {
Connection c = getConnection();
stmt = c.createStatement(ResultSet.TYPE_FORWARD_ONLY,
ResultSet.CONCUR_READ_ONLY);
stmt.setFetchSize(batchSize);
LOG.finer("Executing SQL: " + query);
long start = System.currentTimeMillis();
if (stmt.execute(query)) {
resultSet = stmt.getResultSet();
}
LOG.finest("Time taken for sql :"
+ (System.currentTimeMillis() - start));
colNames = readFieldNames(resultSet.getMetaData());
} catch (Exception e) {
throw new DataImportHandlerException(DataImportHandlerException.SEVERE,
"Unable to execute query: " + query, e);
}
if (resultSet == null) {
rSetIterator = new ArrayList<Map<String, Object>>().iterator();
return;
}
rSetIterator = new Iterator<Map<String, Object>>() {
public boolean hasNext() {
return hasnext();
}
public Map<String, Object> next() {
return getARow();
}
public void remove() {/* do nothing */
}
};
}
private Iterator<Map<String, Object>> getIterator() {
return rSetIterator;
}
private Map<String, Object> getARow() {
if (resultSet == null)
return null;
Map<String, Object> result = new HashMap<String, Object>();
for (String colName : colNames) {
try {
if (!convertType) {
// Use underlying database's type information
result.put(colName, resultSet.getObject(colName));
continue;
}
Integer type = fieldNameVsType.get(colName);
if (type == null)
type = 12;
switch (type) {
case Types.INTEGER:
result.put(colName, resultSet.getInt(colName));
break;
case Types.FLOAT:
result.put(colName, resultSet.getFloat(colName));
break;
case Types.BIGINT:
result.put(colName, resultSet.getLong(colName));
break;
case Types.DOUBLE:
result.put(colName, resultSet.getDouble(colName));
break;
case Types.DATE:
result.put(colName, resultSet.getDate(colName));
break;
case Types.BOOLEAN:
result
.put(colName, resultSet.getBoolean(colName));
break;
default:
result.put(colName, resultSet.getString(colName));
break;
}
} catch (SQLException e) {
logError("Error reading data ", e);
throw new DataImportHandlerException(
DataImportHandlerException.SEVERE,
"Error reading data from database", e);
}
}
return result;
}
private boolean hasnext() {
if (resultSet == null)
return false;
try {
if (resultSet.next()) {
return true;
} else {
close();
return false;
}
} catch (SQLException e) {
logError("Error reading data ", e);
close();
return false;
}
}
private void close() {
try {
if (resultSet != null)
resultSet.close();
if (stmt != null)
stmt.close();
} catch (Exception e) {
logError("Exception while closing result set", e);
} finally {
resultSet = null;
stmt = null;
}
}
}
private Connection getConnection() throws Exception {
long currTime = System.currentTimeMillis();
if (currTime - connLastUsed > CONN_TIME_OUT) {
synchronized (this) {
Connection tmpConn = factory.call();
finalize();
connLastUsed = System.currentTimeMillis();
return conn = tmpConn;
}
} else {
connLastUsed = currTime;
return conn;
}
}
protected void finalize() {
try {
conn.close();
} catch (Exception e) {
}
}
public void close() {
try {
conn.close();
} catch (Exception e) {
}
}
private static final long CONN_TIME_OUT = 10 * 1000; // 10 seconds
private static final int FETCH_SIZE = 500;
public static final String URL = "url";
public static final String DRIVER = "driver";
public static final String CONVERT_TYPE = "convertType";
}

View File

@ -0,0 +1,59 @@
/**
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.solr.handler.dataimport;
import java.util.HashMap;
import java.util.Iterator;
import java.util.Map;
import java.util.Properties;
/**
* <p>
* A mock DataSource implementation which can be used for testing.
* </p>
* <p/>
* <b>This API is experimental and may change in the future.</b>
*
* @version $Id$
* @since solr 1.3
*/
public class MockDataSource extends
DataSource<Iterator<Map<String, Object>>> {
private static Map<String, Iterator<Map<String, Object>>> cache = new HashMap<String, Iterator<Map<String, Object>>>();
public static void setIterator(String query,
Iterator<Map<String, Object>> iter) {
cache.put(query, iter);
}
public static void clearCache() {
cache.clear();
}
public void init(Context context, Properties initProps) {
}
public Iterator<Map<String, Object>> getData(String query) {
return cache.get(query);
}
public void close() {
cache.clear();
}
}

View File

@ -0,0 +1,97 @@
package org.apache.solr.handler.dataimport;
import java.text.NumberFormat;
import java.text.ParseException;
import java.util.ArrayList;
import java.util.List;
import java.util.Map;
/**
* <p>
* A Transformer instance which can extract numbers out of strings. It uses
* <code>java.text.NumberFormat</code> class to parse strings and supports
* Number, Integer, Currency and Percent styles as supported by
* <code>java.text.NumberFormat</code>
* </p>
* <p/>
* <p>
* Refer to <a
* href="http://wiki.apache.org/solr/DataImportHandler">http://wiki.apache.org/solr/DataImportHandler</a>
* for more details.
* </p>
* <p/>
* <b>This API is experimental and may change in the future.</b>
*
* @version $Id$
* @since solr 1.3
*/
public class NumberFormatTransformer extends Transformer {
@SuppressWarnings("unchecked")
public Object transformRow(Map<String, Object> row, Context context) {
for (Map<String, String> fld : context.getAllEntityFields()) {
String style = fld.get(FORMAT_STYLE);
if (style != null) {
String column = fld.get(DataImporter.COLUMN);
String srcCol = fld.get(RegexTransformer.SRC_COL_NAME);
if (srcCol == null)
srcCol = column;
Object val = row.get(srcCol);
String styleSmall = style.toLowerCase();
if (val instanceof List) {
List<String> inputs = (List) val;
List results = new ArrayList();
for (String input : inputs) {
try {
results.add(process(input, styleSmall));
} catch (ParseException e) {
throw new DataImportHandlerException(
DataImportHandlerException.SEVERE,
"Failed to apply NumberFormat on column: " + column, e);
}
}
row.put(column, results);
} else {
if (val == null || val.toString().trim().equals(""))
continue;
try {
row.put(column, process(val.toString(), styleSmall));
} catch (ParseException e) {
throw new DataImportHandlerException(
DataImportHandlerException.SEVERE,
"Failed to apply NumberFormat on column: " + column, e);
}
}
}
}
return row;
}
private Number process(String val, String style) throws ParseException {
if (INTEGER.equals(style)) {
return NumberFormat.getIntegerInstance().parse(val);
} else if (NUMBER.equals(style)) {
return NumberFormat.getNumberInstance().parse(val);
} else if (CURRENCY.equals(style)) {
return NumberFormat.getCurrencyInstance().parse(val);
} else if (PERCENT.equals(style)) {
return NumberFormat.getPercentInstance().parse(val);
}
return null;
}
public static final String FORMAT_STYLE = "formatStyle";
public static final String LOCALE = "locale";
public static final String NUMBER = "number";
public static final String PERCENT = "percent";
public static final String INTEGER = "integer";
public static final String CURRENCY = "currency";
}

View File

@ -0,0 +1,146 @@
/**
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.solr.handler.dataimport;
import java.util.*;
import java.util.logging.Level;
import java.util.logging.Logger;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
/**
* <p>
* A Transformer implementation which uses Regular Expressions to extract, split
* and replace data in fields.
* </p>
* <p/>
* <p>
* Refer to <a
* href="http://wiki.apache.org/solr/DataImportHandler">http://wiki.apache.org/solr/DataImportHandler</a>
* for more details.
* </p>
* <p/>
* <b>This API is experimental and may change in the future.</b>
*
* @version $Id$
* @since solr 1.3
*/
public class RegexTransformer extends Transformer {
private static final Logger LOG = Logger.getLogger(RegexTransformer.class
.getName());
@SuppressWarnings("unchecked")
public Map<String, Object> transformRow(Map<String, Object> row,
Context context) {
List<Map<String, String>> fields = context.getAllEntityFields();
for (Map<String, String> field : fields) {
String col = field.get(DataImporter.COLUMN);
String reStr = field.get(REGEX);
String splitBy = field.get(SPLIT_BY);
String replaceWith = field.get(REPLACE_WITH);
if (reStr != null || splitBy != null) {
String srcColName = field.get(SRC_COL_NAME);
if (srcColName == null) {
srcColName = col;
}
Object tmpVal = row.get(srcColName);
if (tmpVal == null)
continue;
if (tmpVal instanceof List) {
List<String> inputs = (List<String>) tmpVal;
List results = new ArrayList();
for (String input : inputs) {
Object o = process(col, reStr, splitBy, replaceWith, input);
if (o != null)
results.add(o);
}
row.put(col, results);
} else {
String value = tmpVal.toString();
Object o = process(col, reStr, splitBy, replaceWith, value);
if (o != null)
row.put(col, o);
}
}
}
return row;
}
private Object process(String col, String reStr, String splitBy,
String replaceWith, String value) {
if (splitBy != null) {
return readBySplit(splitBy, value);
} else if (replaceWith != null) {
Pattern p = getPattern(reStr);
return p.matcher(value).replaceAll(replaceWith);
} else {
return readfromRegExp(reStr, value, col);
}
}
@SuppressWarnings("unchecked")
private List<String> readBySplit(String splitBy, String value) {
String[] vals = value.split(splitBy);
List<String> l = new ArrayList<String>();
l.addAll(Arrays.asList(vals));
return l;
}
@SuppressWarnings("unchecked")
private Object readfromRegExp(String reStr, String value, String columnName) {
Pattern regexp = getPattern(reStr);
Matcher m = regexp.matcher(value);
if (m.find() && m.groupCount() > 0) {
if (m.groupCount() > 1) {
List l = new ArrayList();
for (int i = 1; i <= m.groupCount(); i++) {
try {
l.add(m.group(i));
} catch (Exception e) {
LOG.log(Level.WARNING, "Parsing failed for field : " + columnName,
e);
}
}
return l;
} else {
return m.group(1);
}
}
return null;
}
private Pattern getPattern(String reStr) {
Pattern result = PATTERN_CACHE.get(reStr);
if (result == null) {
PATTERN_CACHE.put(reStr, result = Pattern.compile(reStr));
}
return result;
}
private HashMap<String, Pattern> PATTERN_CACHE = new HashMap<String, Pattern>();
public static final String REGEX = "regex";
public static final String REPLACE_WITH = "replaceWith";
public static final String SPLIT_BY = "splitBy";
public static final String SRC_COL_NAME = "sourceColName";
}

View File

@ -0,0 +1,102 @@
/**
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.solr.handler.dataimport;
import java.lang.reflect.InvocationTargetException;
import java.lang.reflect.Method;
import java.util.Map;
/**
* <p>
* A Transformer instance capable of executing functions written in scripting
* languages as a Transformer instance.
* </p>
* <p/>
* <p>
* Refer to <a
* href="http://wiki.apache.org/solr/DataImportHandler">http://wiki.apache.org/solr/DataImportHandler</a>
* for more details.
* </p>
* <p/>
* <b>This API is experimental and may change in the future.</b>
*
* @version $Id$
* @since solr 1.3
*/
public class ScriptTransformer extends Transformer {
private Object engine;
private Method invokeFunctionMethod;
private String functionName;
public Object transformRow(Map<String, Object> row, Context context) {
try {
if (engine == null)
initEngine(context);
if (engine == null)
return row;
return invokeFunctionMethod.invoke(engine, functionName, new Object[]{
row, context});
} catch (DataImportHandlerException e) {
throw e;
} catch (InvocationTargetException e) {
throw new DataImportHandlerException(DataImportHandlerException.SEVERE,
"Could not invoke method :"
+ functionName
+ "\n <script>\n"
+ context.getVariableResolver().resolve(
DataConfig.IMPORTER_NS + "." + DataConfig.SCRIPT)
+ "</script>", e);
} catch (Exception e) {
throw new DataImportHandlerException(DataImportHandlerException.SEVERE,
"Error invoking script for entity "
+ context.getEntityAttribute("name"), e);
}
}
private void initEngine(Context context) {
try {
String scriptText = (String) context.getVariableResolver().resolve(
DataConfig.IMPORTER_NS + "." + DataConfig.SCRIPT);
String scriptLang = (String) context.getVariableResolver().resolve(
DataConfig.IMPORTER_NS + "." + DataConfig.SCRIPT_LANG);
Object scriptEngineMgr = Class
.forName("javax.script.ScriptEngineManager").newInstance();
// create a Script engine
Method getEngineMethod = scriptEngineMgr.getClass().getMethod(
"getEngineByName", String.class);
engine = getEngineMethod.invoke(scriptEngineMgr, scriptLang);
Method evalMethod = engine.getClass().getMethod("eval", String.class);
invokeFunctionMethod = engine.getClass().getMethod("invokeFunction",
String.class, Object[].class);
evalMethod.invoke(engine, scriptText);
} catch (Exception e) {
throw new DataImportHandlerException(DataImportHandlerException.SEVERE,
"<script> can be used only in java 6 or above", e);
}
}
public void setFunctionName(String methodName) {
this.functionName = methodName;
}
public String getFunctionName() {
return functionName;
}
}

View File

@ -0,0 +1,289 @@
/**
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.solr.handler.dataimport;
import org.apache.lucene.document.Document;
import org.apache.solr.update.AddUpdateCommand;
import org.apache.solr.update.CommitUpdateCommand;
import org.apache.solr.update.DeleteUpdateCommand;
import org.apache.solr.update.UpdateHandler;
import java.io.*;
import java.text.ParseException;
import java.util.Date;
import java.util.Properties;
import java.util.logging.Level;
import java.util.logging.Logger;
/**
* <p>
* Writes documents to SOLR as well as provides methods for loading and
* persisting last index time.
* </p>
* <p/>
* <b>This API is experimental and may change in the future.</b>
*
* @version $Id$
* @since solr 1.3
*/
public abstract class SolrWriter {
private static final Logger LOG = Logger
.getLogger(SolrWriter.class.getName());
static final String IMPORTER_PROPERTIES = "dataimport.properties";
static final String LAST_INDEX_KEY = "last_index_time";
private final UpdateHandler updater;
private final String configDir;
public SolrWriter(UpdateHandler updater, String confDir) {
this.updater = updater;
configDir = confDir;
}
public boolean upload(Document d) {
try {
AddUpdateCommand command = new AddUpdateCommand();
command.doc = d;
command.allowDups = false;
command.overwritePending = true;
command.overwriteCommitted = true;
updater.addDoc(command);
} catch (IOException e) {
LOG.log(Level.SEVERE, "Exception while adding: " + d, e);
return false;
} catch (Exception e) {
LOG.log(Level.WARNING, "Error creating document : " + d);
return false;
}
return true;
}
public void deleteDoc(Object id) {
try {
LOG.info("deleted from document to Solr: " + id);
DeleteUpdateCommand delCmd = new DeleteUpdateCommand();
delCmd.id = id.toString();
delCmd.fromPending = true;
delCmd.fromCommitted = true;
updater.delete(delCmd);
} catch (IOException e) {
LOG.log(Level.SEVERE, "Exception while deleteing: " + id, e);
}
}
Date getStartTime() {
Properties props = readIndexerProperties();
String result = props.getProperty(SolrWriter.LAST_INDEX_KEY);
try {
if (result != null)
return DataImporter.DATE_TIME_FORMAT.parse(result);
} catch (ParseException e) {
throw new DataImportHandlerException(DataImportHandlerException.WARN,
"Unable to read last indexed time from: "
+ SolrWriter.IMPORTER_PROPERTIES, e);
}
return null;
}
private void persistStartTime(Date date) {
OutputStream propOutput = null;
Properties props = readIndexerProperties();
try {
props.put(SolrWriter.LAST_INDEX_KEY, DataImporter.DATE_TIME_FORMAT
.format(date));
String filePath = configDir;
if (configDir != null && !configDir.endsWith(File.separator))
filePath += File.separator;
filePath += SolrWriter.IMPORTER_PROPERTIES;
propOutput = new FileOutputStream(filePath);
props.store(propOutput, null);
LOG.info("Wrote last indexed time to " + SolrWriter.IMPORTER_PROPERTIES);
} catch (FileNotFoundException e) {
throw new DataImportHandlerException(DataImportHandlerException.SEVERE,
"Unable to persist Index Start Time", e);
} catch (IOException e) {
throw new DataImportHandlerException(DataImportHandlerException.SEVERE,
"Unable to persist Index Start Time", e);
} finally {
try {
if (propOutput != null)
propOutput.close();
} catch (IOException e) {
propOutput = null;
}
}
}
private Properties readIndexerProperties() {
Properties props = new Properties();
InputStream propInput = null;
try {
propInput = new FileInputStream(configDir
+ SolrWriter.IMPORTER_PROPERTIES);
props.load(propInput);
LOG.info("Read " + SolrWriter.IMPORTER_PROPERTIES);
} catch (Exception e) {
LOG.log(Level.WARNING, "Unable to read: "
+ SolrWriter.IMPORTER_PROPERTIES);
} finally {
try {
if (propInput != null)
propInput.close();
} catch (IOException e) {
propInput = null;
}
}
return props;
}
public void deleteByQuery(String query) {
try {
LOG.info("Deleting documents from Solr with query: " + query);
DeleteUpdateCommand delCmd = new DeleteUpdateCommand();
delCmd.query = query;
delCmd.fromCommitted = true;
delCmd.fromPending = true;
updater.deleteByQuery(delCmd);
} catch (IOException e) {
LOG.log(Level.SEVERE, "Exception while deleting by query: " + query, e);
}
}
public void commit(boolean optimize) {
try {
CommitUpdateCommand commit = new CommitUpdateCommand(optimize);
updater.commit(commit);
} catch (Exception e) {
LOG.log(Level.SEVERE, "Exception while solr commit.", e);
}
}
public void doDeleteAll() {
try {
DeleteUpdateCommand deleteCommand = new DeleteUpdateCommand();
deleteCommand.query = "*:*";
deleteCommand.fromCommitted = true;
deleteCommand.fromPending = true;
updater.deleteByQuery(deleteCommand);
} catch (IOException e) {
throw new DataImportHandlerException(DataImportHandlerException.SEVERE,
"Exception in full dump while deleting all documents.", e);
}
}
static String getResourceAsString(InputStream in) throws IOException {
ByteArrayOutputStream baos = new ByteArrayOutputStream(1024);
byte[] buf = new byte[1024];
int sz = 0;
try {
while (true) {
sz = in.read(buf);
baos.write(buf, 0, sz);
if (sz < buf.length)
break;
}
} finally {
try {
in.close();
} catch (Exception e) {
}
}
return new String(baos.toByteArray());
}
static String getDocCount() {
if (DocBuilder.INSTANCE.get() != null) {
return ""
+ (DocBuilder.INSTANCE.get().importStatistics.docCount.get() + 1);
} else {
return "";
}
}
public Date loadIndexStartTime() {
return this.getStartTime();
}
public Class loadClass(String name) throws ClassNotFoundException {
return Class.forName(name);
}
/**
* <p>
* Stores the last indexed time into the <code>IMPORTER_PROPERTIES</code>
* file. If any properties are already defined in the file, then they are
* preserved.
* </p>
*
* @param date
*/
public void persistIndexStartTime(Date date) {
this.persistStartTime(date);
}
public abstract SolrDoc getSolrDocInstance();
/**
* <p>
* Write the document to the index
* </p>
*
* @param d . The Document warapper object
* @return
*/
public abstract boolean upload(SolrDoc d);
/**
* This method is used for verbose debugging
*
* @param event The event name start.entity ,end.entity ,transformer.row
* @param name Name of the entity/transformer
* @param row The actual data . Can be a Map<String,object> or a List<Map<String,object>>
*/
public abstract void log(int event, String name, Object row);
/**
* The purpose of this interface to provide pluggable implementations for Solr
* 1.2 & 1.3 The implementation can choose to wrap appropriate Objects based
* on the version
*/
public static interface SolrDoc {
public void addField(String name, Object value, float boost);
public Object getField(String field);
public void setDocumentBoost(float boost);
}
public static final int START_ENTITY = 1, END_ENTITY = 2,
TRANSFORMED_ROW = 3, ENTITY_META = 4, PRE_TRANSFORMER_ROW = 5,
START_DOC = 6, END_DOC = 7, ENTITY_OUT = 8, ROW_END = 9,
TRANSFORMER_EXCEPTION = 10, ENTITY_EXCEPTION = 11, DISABLE_LOGGING = 12,
ENABLE_LOGGING = 13;
}

View File

@ -0,0 +1,171 @@
/**
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.solr.handler.dataimport;
import java.util.Iterator;
import java.util.Map;
import java.util.logging.Level;
import java.util.logging.Logger;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
/**
* <p>
* An EntityProcessor instance which provides support for reading from
* databases. It is used in conjunction with JdbcDataSource. This is the default
* EntityProcessor if none is specified explicitly in data-config.xml
* </p>
* <p/>
* <p>
* Refer to <a
* href="http://wiki.apache.org/solr/DataImportHandler">http://wiki.apache.org/solr/DataImportHandler</a>
* for more details.
* </p>
* <p/>
* <b>This API is experimental and may change in the future.</b>
*
* @version $Id$
* @since solr 1.3
*/
public class SqlEntityProcessor extends EntityProcessorBase {
private static final Logger LOG = Logger.getLogger(SqlEntityProcessor.class
.getName());
protected DataSource<Iterator<Map<String, Object>>> dataSource;
@SuppressWarnings("unchecked")
public void init(Context context) {
super.init(context);
dataSource = context.getDataSource();
}
protected void initQuery(String q) {
try {
DataImporter.QUERY_COUNT.get().incrementAndGet();
rowIterator = dataSource.getData(q);
this.query = q;
} catch (DataImportHandlerException e) {
throw e;
} catch (Exception e) {
LOG.log(Level.SEVERE, "The query failed '" + q + "'", e);
throw new DataImportHandlerException(DataImportHandlerException.SEVERE, e);
}
}
public Map<String, Object> nextRow() {
if (rowcache != null)
return getFromRowCache();
if (rowIterator == null) {
String q = getQuery();
initQuery(resolver.replaceTokens(q));
}
while (true) {
Map<String, Object> r = getNext();
if (r == null)
return null;
r = applyTransformer(r);
if (r != null)
return r;
}
}
public Map<String, Object> nextModifiedRowKey() {
if (rowIterator == null) {
String deltaQuery = context.getEntityAttribute(DELTA_QUERY);
if (deltaQuery == null)
return null;
initQuery(resolver.replaceTokens(deltaQuery));
}
return getNext();
}
public Map<String, Object> nextDeletedRowKey() {
if (rowIterator == null) {
String deletedPkQuery = context.getEntityAttribute(DEL_PK_QUERY);
if (deletedPkQuery == null)
return null;
initQuery(resolver.replaceTokens(deletedPkQuery));
}
return getNext();
}
public Map<String, Object> nextModifiedParentRowKey() {
if (rowIterator == null) {
String parentDeltaQuery = context.getEntityAttribute(PARENT_DELTA_QUERY);
if (parentDeltaQuery == null)
return null;
LOG.info("Running parentDeltaQuery for Entity: "
+ context.getEntityAttribute("name"));
initQuery(resolver.replaceTokens(parentDeltaQuery));
}
return getNext();
}
public String getQuery() {
String queryString = context.getEntityAttribute(QUERY);
if (context.currentProcess() == Context.FULL_DUMP
|| !context.isRootEntity()) {
return queryString;
}
return getDeltaImportQuery(queryString);
}
public String getDeltaImportQuery(String queryString) {
StringBuffer sb = new StringBuffer(queryString);
if (SELECT_WHERE_PATTERN.matcher(queryString).find()) {
sb.append(" and ");
} else {
sb.append(" where ");
}
boolean first = true;
String[] primaryKeys = context.getEntityAttribute("pk").split(",");
for (String primaryKey : primaryKeys) {
if (!first) {
sb.append(" and ");
}
first = false;
Object val = resolver.resolve("dataimporter.delta." + primaryKey);
if (val == null) {
Matcher m = DOT_PATTERN.matcher(primaryKey);
if (m.find()) {
val = resolver.resolve("dataimporter.delta." + m.group(1));
}
}
sb.append(primaryKey).append(" = ");
if (val instanceof Number) {
sb.append(val.toString());
} else {
sb.append("'").append(val.toString()).append("'");
}
}
return sb.toString();
}
private static Pattern SELECT_WHERE_PATTERN = Pattern.compile(
"^\\s*(select\\b.*?\\b)(where).*", Pattern.CASE_INSENSITIVE);
public static final String QUERY = "query";
public static final String DELTA_QUERY = "deltaQuery";
public static final String PARENT_DELTA_QUERY = "parentDeltaQuery";
public static final String DEL_PK_QUERY = "deletedPkQuery";
public static final Pattern DOT_PATTERN = Pattern.compile(".*?\\.(.*)$");
}

View File

@ -0,0 +1,115 @@
/**
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.solr.handler.dataimport;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
/**
* <p>
* Provides functionality for replacing variables in a templatized string. It
* can also be used to get the place-holders (variables) in a templatized
* string.
* </p>
* <p/>
* <b>This API is experimental and may change in the future.</b>
*
* @version $Id$
* @since solr 1.3
*/
public class TemplateString {
private List<String> variables = new ArrayList<String>();
private List<String> pcs = new ArrayList<String>();
private Map<String, TemplateString> cache;
public TemplateString() {
cache = new HashMap<String, TemplateString>();
}
private TemplateString(String s) {
Matcher m = WORD_PATTERN.matcher(s);
int idx = 0;
while (m.find()) {
String aparam = s.substring(m.start() + 2, m.end() - 1);
variables.add(aparam);
pcs.add(s.substring(idx, m.start()));
idx = m.end();
}
pcs.add(s.substring(idx));
}
/**
* Returns a string with all variables replaced by the known values. An
* unknown variable is replaced by an empty string.
*
* @param string
* @param resolver
* @return
*/
public String replaceTokens(String string, VariableResolver resolver) {
TemplateString ts = cache.get(string);
if (ts == null) {
ts = new TemplateString(string);
cache.put(string, ts);
}
return ts.fillTokens(resolver);
}
private String fillTokens(VariableResolver resolver) {
String[] s = new String[variables.size()];
for (int i = 0; i < variables.size(); i++) {
Object val = resolver.resolve(variables.get(i));
s[i] = val == null ? "" : getObjectAsString(val);
}
StringBuffer sb = new StringBuffer();
for (int i = 0; i < pcs.size(); i++) {
sb.append(pcs.get(i));
if (i < s.length) {
sb.append(s[i]);
}
}
return sb.toString();
}
private String getObjectAsString(Object val) {
if (val instanceof java.sql.Date) {
java.sql.Date d = (java.sql.Date) val;
return DataImporter.DATE_TIME_FORMAT.format(d);
}
return val.toString();
}
/**
* Returns the variables in the given string.
*
* @param the templatized string
* @return the list of variables (strings) in the given templatized string.
*/
public static List<String> getVariables(String s) {
return new TemplateString(s).variables;
}
static final Pattern WORD_PATTERN = Pattern.compile("(\\$\\{.*?\\})");
}

View File

@ -0,0 +1,108 @@
/**
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.solr.handler.dataimport;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import java.util.logging.Logger;
/**
* <p>
* A Transformer which can put values into a column by resolving an expression
* containing other columns
* </p>
* <p/>
* <p>
* For example:<br />
* &lt;field column="name" template="${e.lastName}, ${e.firstName}
* ${e.middleName}" /&gt; will produce the name by combining values from
* lastName, firstName and middleName fields as given in the template attribute.
* </p>
* <p/>
* <p>
* Refer to <a
* href="http://wiki.apache.org/solr/DataImportHandler">http://wiki.apache.org/solr/DataImportHandler</a>
* for more details.
* </p>
* <p/>
* <b>This API is experimental and may change in the future.</b>
*
* @version $Id$
* @since solr 1.3
*/
public class TemplateTransformer extends Transformer {
private static final Logger LOG = Logger.getLogger(TemplateTransformer.class
.getName());
@SuppressWarnings("unchecked")
public Object transformRow(Map<String, Object> row, Context context) {
String entityName = context.getEntityAttribute(DataImporter.NAME);
VariableResolverImpl resolver = (VariableResolverImpl) context
.getVariableResolver();
Map<String, Object> resolverMap = (Map<String, Object>) resolver
.resolve(entityName);
// Clone resolver map because the resolver map contains common fields or any
// others
// that the entity processor chooses to keep.
Map<String, Object> resolverMapCopy = new HashMap<String, Object>();
if (resolverMap != null) {
for (Map.Entry<String, Object> entry : resolverMap.entrySet())
resolverMapCopy.put(entry.getKey(), entry.getValue());
}
// Add current row to the copy of resolver map
for (Map.Entry<String, Object> entry : row.entrySet())
resolverMapCopy.put(entry.getKey(), entry.getValue());
// Add this copy to the namespace of the current entity in the resolver
resolver.addNamespace(entityName, resolverMapCopy);
for (Map<String, String> map : context.getAllEntityFields()) {
String expr = map.get(TEMPLATE);
if (expr == null)
continue;
String column = map.get(DataImporter.COLUMN);
// Verify if all variables can be resolved or not
boolean resolvable = true;
List<String> variables = TemplateString.getVariables(expr);
for (String v : variables) {
if (resolver.resolve(v) == null) {
LOG.warning("Unable to resolve variable: " + v
+ " while parsing expression: " + expr);
resolvable = false;
}
}
if (!resolvable)
continue;
row.put(column, resolver.replaceTokens(expr));
}
// Restore the original resolver map
resolver.addNamespace(entityName, resolverMap);
return row;
}
public static final String TEMPLATE = "template";
}

View File

@ -0,0 +1,52 @@
/**
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.solr.handler.dataimport;
import java.util.Map;
/**
* <p>
* Use this API to implement a custom transformer for any given entity
* </p>
* <p/>
* <p>
* Implementations of this interface must provide a public no-args constructor.
* </p>
* <p/>
* <p>
* Refer to <a
* href="http://wiki.apache.org/solr/DataImportHandler">http://wiki.apache.org/solr/DataImportHandler</a>
* for more details.
* </p>
* <p/>
* <b>This API is experimental and may change in the future.</b>
*
* @version $Id$
* @since solr 1.3
*/
public abstract class Transformer {
/**
* The input is a row of data and the output has to be a new row.
*
* @param context The current context
* @param row A row of data
* @return The changed data. It must be a Map<String, Object> if it returns
* only one row or if there are multiple rows to be returned it must
* be a List<Map<String, Object>>
*/
public abstract Object transformRow(Map<String, Object> row, Context context);
}

View File

@ -0,0 +1,53 @@
/**
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.solr.handler.dataimport;
/**
* <p>
* This class is more or less like a Map. But has more intelligence to resolve
* namespaces. Namespaces are delimited with '.' (period)
* </p>
* <p/>
* <p>
* Refer to <a
* href="http://wiki.apache.org/solr/DataImportHandler">http://wiki.apache.org/solr/DataImportHandler</a>
* for more details.
* </p>
* <p/>
* <b>This API is experimental and may change in the future.</b>
*
* @version $Id$
* @since solr 1.3
*/
public abstract class VariableResolver {
/**
* Resolves a given value with a name
*
* @param name
* @return
*/
public abstract Object resolve(String name);
/**
* Given a String with place holders, replace them with the value tokens.
*
* @param template
* @return the string with the placeholders replaced with their values
*/
public abstract String replaceTokens(String template);
}

View File

@ -0,0 +1,120 @@
/**
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.solr.handler.dataimport;
import java.util.HashMap;
import java.util.Map;
import java.util.regex.Pattern;
/**
* <p>
* The default implementation of VariableResolver interface
* </p>
* <p/>
* <b>This API is experimental and may change in the future.</b>
*
* @version $Id$
* @see VariableResolver
* @since solr 1.3
*/
public class VariableResolverImpl extends VariableResolver {
private Map<String, Object> container = new HashMap<String, Object>();
private static final TemplateString TEMPLATE_STRING = new TemplateString();
public VariableResolverImpl() {
}
@SuppressWarnings("unchecked")
public VariableResolverImpl addNamespace(String name, Map<String, Object> map) {
if (name != null) {
String[] parts = DOT_SPLIT.split(name, 0);
Map ns = container;
for (int i = 0; i < parts.length; i++) {
if (i == parts.length - 1) {
ns.put(parts[i], map);
}
if (ns.get(parts[i]) == null) {
ns.put(parts[i], new HashMap());
ns = (Map) ns.get(parts[i]);
} else {
if (ns.get(parts[i]) instanceof Map) {
ns = (Map) ns.get(parts[i]);
} else {
ns.put(parts[i], new HashMap());
ns = (Map) ns.get(parts[i]);
}
}
}
} else {
container.putAll(map);
}
return this;
}
public void removeNamespace(String name) {
if (name != null)
container.remove(name);
}
public String replaceTokens(String template) {
return TEMPLATE_STRING.replaceTokens(template, this);
}
@SuppressWarnings("unchecked")
public Object resolve(String name) {
if (name == null)
return container;
if ("".equals(name))
return null;
String[] parts = DOT_SPLIT.split(name, 0);
Map<String, Object> namespace = container;
for (int i = 0; i < parts.length; i++) {
String thePart = parts[i];
if (i == parts.length - 1) {
return namespace.get(thePart);
}
Object temp = namespace.get(thePart);
if (temp == null) {
return namespace.get(mergeAll(parts, i));
} else {
if (temp instanceof Map) {
namespace = (Map) temp;
} else {
return null;
}
}
}
return null;
}
private String mergeAll(String[] parts, int i) {
if (i == parts.length - 1)
return parts[parts.length - 1];
StringBuffer sb = new StringBuffer();
for (int j = i; j < parts.length; j++) {
sb.append(parts[j]);
if (j < parts.length - 1)
sb.append(".");
}
return sb.toString();
}
static final Pattern DOT_SPLIT = Pattern.compile("\\.");
}

View File

@ -0,0 +1,329 @@
/**
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.solr.handler.dataimport;
import javax.xml.transform.Source;
import javax.xml.transform.TransformerException;
import javax.xml.transform.TransformerFactory;
import javax.xml.transform.stream.StreamResult;
import javax.xml.transform.stream.StreamSource;
import java.io.CharArrayReader;
import java.io.CharArrayWriter;
import java.io.Reader;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import java.util.logging.Logger;
/**
* <p>
* An implementation of EntityProcessor which uses a streaming xpath parser to
* extract values out of XML documents. It is typically used in conjunction with
* HttpDataSource or FileDataSource.
* </p>
* <p/>
* <p>
* Refer to <a
* href="http://wiki.apache.org/solr/DataImportHandler">http://wiki.apache.org/solr/DataImportHandler</a>
* for more details.
* </p>
* <p/>
* <b>This API is experimental and may change in the future.</b>
*
* @version $Id$
* @see XPathRecordReader
* @since solr 1.3
*/
public class XPathEntityProcessor extends EntityProcessorBase {
private static final Logger LOG = Logger.getLogger(XPathEntityProcessor.class
.getName());
protected List<String> placeHolderVariables;
protected List<String> commonFields;
private String pk;
private XPathRecordReader xpathReader;
protected DataSource<Reader> dataSource;
protected javax.xml.transform.Transformer xslTransformer;
@SuppressWarnings("unchecked")
public void init(Context context) {
super.init(context);
if (xpathReader == null)
initXpathReader();
pk = context.getEntityAttribute("pk");
dataSource = context.getDataSource();
}
private void initXpathReader() {
boolean useSolrAddXml = Boolean.parseBoolean(context
.getEntityAttribute(USE_SOLR_ADD_SCHEMA));
String xslt = context.getEntityAttribute(XSL);
if (xslt != null) {
xslt = resolver.replaceTokens(xslt);
try {
Source xsltSource = new StreamSource(xslt);
// create an instance of TransformerFactory
TransformerFactory transFact = TransformerFactory.newInstance();
xslTransformer = transFact.newTransformer(xsltSource);
LOG
.info("Using xslTransformer: "
+ xslTransformer.getClass().getName());
} catch (Exception e) {
throw new DataImportHandlerException(DataImportHandlerException.SEVERE,
"Error initializing XSL ", e);
}
}
if (useSolrAddXml) {
// Support solr add documents
xpathReader = new XPathRecordReader("/add/doc");
xpathReader.addField("name", "/add/doc/field/@name", true);
xpathReader.addField("value", "/add/doc/field", true);
} else {
String forEachXpath = context.getEntityAttribute(FOR_EACH);
if (forEachXpath == null)
throw new DataImportHandlerException(DataImportHandlerException.SEVERE,
"Entity : " + context.getEntityAttribute("name")
+ " must have a 'forEach' attribute");
try {
xpathReader = new XPathRecordReader(forEachXpath);
for (Map<String, String> field : context.getAllEntityFields()) {
if (field.get(XPATH) == null)
continue;
xpathReader.addField(field.get(DataImporter.COLUMN),
field.get(XPATH), Boolean.parseBoolean(field
.get(DataImporter.MULTI_VALUED)));
}
} catch (RuntimeException e) {
throw new DataImportHandlerException(DataImportHandlerException.SEVERE,
"Exception while reading xpaths for fields", e);
}
}
List<String> l = TemplateString.getVariables(context
.getEntityAttribute(URL));
for (String s : l) {
if (s.startsWith(entityName + ".")) {
if (placeHolderVariables == null)
placeHolderVariables = new ArrayList<String>();
placeHolderVariables.add(s.substring(entityName.length() + 1));
}
}
for (Map<String, String> fld : context.getAllEntityFields()) {
if (fld.get(COMMON_FIELD) != null && "true".equals(fld.get(COMMON_FIELD))) {
if (commonFields == null)
commonFields = new ArrayList<String>();
commonFields.add(fld.get(DataImporter.COLUMN));
}
}
}
public Map<String, Object> nextRow() {
Map<String, Object> result;
if (!context.isRootEntity())
return fetchNextRow();
while (true) {
result = fetchNextRow();
if (result == null)
return null;
if (pk == null || result.get(pk) != null)
return result;
}
}
@SuppressWarnings("unchecked")
private Map<String, Object> fetchNextRow() {
Map<String, Object> r = null;
while (true) {
if (rowcache != null)
return getFromRowCache();
if (rowIterator == null)
initQuery(resolver.replaceTokens(context.getEntityAttribute(URL)));
r = getNext();
if (r == null) {
Object hasMore = getSessionAttribute(HAS_MORE);
if ("true".equals(hasMore) || Boolean.TRUE.equals(hasMore)) {
String url = (String) getSessionAttribute(NEXT_URL);
if (url == null)
url = context.getEntityAttribute(URL);
Map namespace = (Map) getSessionAttribute(entityName);
if (namespace != null)
resolver.addNamespace(entityName, namespace);
clearSession();
initQuery(resolver.replaceTokens(url));
r = getNext();
if (r == null)
return null;
} else {
return null;
}
}
r = applyTransformer(r);
if (r != null)
return readUsefulVars(r);
}
}
private void initQuery(String s) {
Reader data = null;
try {
final List<Map<String, Object>> rows = new ArrayList<Map<String, Object>>();
data = dataSource.getData(s);
if (xslTransformer != null) {
try {
SimpleCharArrayReader caw = new SimpleCharArrayReader();
xslTransformer.transform(new StreamSource(data),
new StreamResult(caw));
data = caw.getReader();
} catch (TransformerException e) {
throw new DataImportHandlerException(
DataImportHandlerException.SEVERE,
"Exception in applying XSL Transformeation", e);
}
}
final List<Map<String, Object>> solrDocs = new ArrayList<Map<String, Object>>();
final boolean useSolrAddXml = Boolean.parseBoolean(context
.getEntityAttribute(USE_SOLR_ADD_SCHEMA));
xpathReader.streamRecords(data, new XPathRecordReader.Handler() {
@SuppressWarnings("unchecked")
public void handle(Map<String, Object> record, String xpath) {
if (useSolrAddXml) {
List<String> names = (List<String>) record.get("name");
List<String> values = (List<String>) record.get("value");
Map<String, Object> row = new HashMap<String, Object>();
for (int i = 0; i < names.size(); i++) {
if (row.containsKey(names.get(i))) {
Object existing = row.get(names.get(i));
if (existing instanceof List) {
List list = (List) existing;
list.add(values.get(i));
} else {
List list = new ArrayList();
list.add(existing);
list.add(values.get(i));
row.put(names.get(i), list);
}
} else {
row.put(names.get(i), values.get(i));
}
}
solrDocs.add(row);
} else {
record.put(XPATH_FIELD_NAME, xpath);
rows.add(record);
}
}
});
if (useSolrAddXml) {
rowIterator = solrDocs.iterator();
} else {
rowIterator = rows.iterator();
}
} finally {
try {
data.close();
} catch (Exception e) { /* Ignore */
}
}
}
private static class SimpleCharArrayReader extends CharArrayWriter {
public Reader getReader() {
return new CharArrayReader(super.buf, 0, super.count);
}
}
@SuppressWarnings("unchecked")
private Map<String, Object> readUsefulVars(Map<String, Object> r) {
Object val = r.get(HAS_MORE);
if (val != null)
setSessionAttribute(HAS_MORE, val);
val = r.get(NEXT_URL);
if (val != null)
setSessionAttribute(NEXT_URL, val);
if (placeHolderVariables != null) {
Map namespace = getNameSpace();
for (String s : placeHolderVariables) {
val = r.get(s);
if (val != null)
namespace.put(s, val);
}
}
if (commonFields != null) {
for (String s : commonFields) {
Object commonVal = r.get(s);
if (commonVal != null) {
setSessionAttribute(s, commonVal);
getNameSpace().put(s, commonVal);
} else {
commonVal = getSessionAttribute(s);
if (commonVal != null)
r.put(s, commonVal);
}
}
}
return r;
}
@SuppressWarnings("unchecked")
private Map getNameSpace() {
Map namespace = (Map) getSessionAttribute(entityName);
if (namespace == null) {
namespace = new HashMap();
setSessionAttribute(entityName, namespace);
}
return namespace;
}
public static final String URL = "url";
public static final String HAS_MORE = "$hasMore";
public static final String NEXT_URL = "$nextUrl";
public static final String XPATH_FIELD_NAME = "$forEach";
public static final String FOR_EACH = "forEach";
public static final String XPATH = "xpath";
public static final String COMMON_FIELD = "commonField";
public static final String USE_SOLR_ADD_SCHEMA = "useSolrAddSchema";
public static final String XSL = "xsl";
}

View File

@ -0,0 +1,327 @@
/**
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.solr.handler.dataimport;
import javax.xml.stream.XMLInputFactory;
import static javax.xml.stream.XMLStreamConstants.*;
import javax.xml.stream.XMLStreamException;
import javax.xml.stream.XMLStreamReader;
import java.io.IOException;
import java.io.Reader;
import java.util.*;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
/**
* <p>
* A streaming xpath parser which uses StAX for XML parsing. It supports only a
* subset of xpath syntax.
* </p>
* <p/>
* <b>This API is experimental and may change in the future.</b>
*
* @version $Id$
* @since solr 1.3
*/
public class XPathRecordReader {
private Node rootNode = new Node("/", null);
public XPathRecordReader(String forEachXpath) {
String[] splits = forEachXpath.split("\\|");
for (String split : splits) {
split = split.trim();
if (split.length() == 0)
continue;
addField0(split, split, false, true);
}
}
public synchronized XPathRecordReader addField(String name, String xpath,
boolean multiValued) {
if (!xpath.startsWith("/"))
throw new RuntimeException("xpath must start with '/' : " + xpath);
addField0(xpath, name, multiValued, false);
return this;
}
private void addField0(String xpath, String name, boolean multiValued,
boolean isRecord) {
List<String> paths = new LinkedList<String>(Arrays.asList(xpath.split("/")));
if ("".equals(paths.get(0).trim()))
paths.remove(0);
rootNode.build(paths, name, multiValued, isRecord);
}
public List<Map<String, Object>> getAllRecords(Reader r) {
final List<Map<String, Object>> results = new ArrayList<Map<String, Object>>();
streamRecords(r, new Handler() {
public void handle(Map<String, Object> record, String s) {
results.add(record);
}
});
return results;
}
public void streamRecords(Reader r, Handler handler) {
try {
XMLStreamReader parser = factory.createXMLStreamReader(r);
rootNode.parse(parser, handler, new HashMap<String, Object>(),
new Stack<Set<String>>(), false);
} catch (Exception e) {
throw new RuntimeException(e);
}
}
private class Node {
String name, fieldName, xpathName, forEachPath;
List<Node> attributes, childNodes;
List<Map.Entry<String, String>> attribAndValues;
Node parent;
boolean hasText = false, multiValued = false, isRecord = false;
public Node(String name, Node p) {
xpathName = this.name = name;
parent = p;
}
public Node(String name, String fieldName, boolean multiValued) {
this.name = name;
this.fieldName = fieldName;
this.multiValued = multiValued;
}
private void parse(XMLStreamReader parser, Handler handler,
Map<String, Object> values, Stack<Set<String>> stack,
boolean recordStarted) throws IOException, XMLStreamException {
Set<String> valuesAddedinThisFrame = null;
if (isRecord) {
recordStarted = true;
valuesAddedinThisFrame = new HashSet<String>();
stack.push(valuesAddedinThisFrame);
} else if (recordStarted) {
valuesAddedinThisFrame = stack.peek();
} else {
if (attributes != null || hasText)
valuesAddedinThisFrame = new HashSet<String>();
stack.push(valuesAddedinThisFrame);
}
try {
if (attributes != null) {
for (Node node : attributes) {
String value = parser.getAttributeValue(null, node.name);
if (value != null || (recordStarted && !isRecord)) {
putText(values, value, node.fieldName, node.multiValued);
valuesAddedinThisFrame.add(node.fieldName);
}
}
}
Set<Node> childrenFound = new HashSet<Node>();
boolean skipNextEvent = false;
int event = -1;
while (true) {
if (!skipNextEvent) {
event = parser.next();
skipNextEvent = false;
}
if (event == END_DOCUMENT) {
return;
}
if (event == END_ELEMENT) {
if (isRecord)
handler.handle(new HashMap<String, Object>(values), forEachPath);
if (recordStarted && !isRecord
&& !childrenFound.containsAll(childNodes)) {
for (Node n : childNodes) {
if (!childrenFound.contains(n))
n.putNulls(values);
}
}
return;
}
if ((event == CDATA || event == CHARACTERS || event == SPACE)
&& hasText) {
valuesAddedinThisFrame.add(fieldName);
skipNextEvent = true;
String text = parser.getText();
event = parser.next();
while (event == CDATA || event == CHARACTERS || event == SPACE) {
text = text + parser.getText();
event = parser.next();
}
putText(values, text, fieldName, multiValued);
} else if (event == START_ELEMENT) {
Node n = getMatchingChild(parser);
if (n != null) {
childrenFound.add(n);
n.parse(parser, handler, values, stack, recordStarted);
} else {
skipTag(parser);
}
}
}
} finally {
Set<String> cleanThis = null;
if (isRecord || !recordStarted) {
cleanThis = stack.pop();
} else {
return;
}
if (cleanThis != null) {
for (String fld : cleanThis) {
values.remove(fld);
}
}
}
}
private Node getMatchingChild(XMLStreamReader parser) {
if (childNodes == null)
return null;
String localName = parser.getLocalName();
for (Node n : childNodes) {
if (n.name.equals(localName)) {
if (n.attribAndValues == null)
return n;
if (checkForAttributes(parser, n.attribAndValues))
return n;
}
}
return null;
}
private boolean checkForAttributes(XMLStreamReader parser,
List<Map.Entry<String, String>> attrs) {
for (Map.Entry<String, String> e : attrs) {
String val = parser.getAttributeValue(null, e.getKey());
if (val == null)
return false;
if (e.getValue() != null && !e.getValue().equals(val))
return false;
}
return true;
}
private void putNulls(Map<String, Object> values) {
if (attributes != null) {
for (Node n : attributes) {
if (n.multiValued)
putText(values, null, n.fieldName, true);
}
}
if (hasText && multiValued)
putText(values, null, fieldName, true);
if (childNodes != null) {
for (Node childNode : childNodes)
childNode.putNulls(values);
}
}
@SuppressWarnings("unchecked")
private void putText(Map<String, Object> values, String value,
String fieldName, boolean multiValued) {
if (multiValued) {
List<String> v = (List<String>) values.get(fieldName);
if (v == null) {
v = new ArrayList<String>();
values.put(fieldName, v);
}
v.add(value);
} else {
values.put(fieldName, value);
}
}
private void skipTag(XMLStreamReader parser) throws IOException,
XMLStreamException {
int type;
while ((type = parser.next()) != END_ELEMENT) {
if (type == START_ELEMENT)
skipTag(parser);
}
}
public void build(List<String> paths, String fieldName,
boolean multiValued, boolean record) {
String name = paths.remove(0);
if (paths.isEmpty() && name.startsWith("@")) {
if (attributes == null) {
attributes = new ArrayList<Node>();
}
name = name.substring(1);
attributes.add(new Node(name, fieldName, multiValued));
} else {
if (childNodes == null)
childNodes = new ArrayList<Node>();
Node n = getOrAddChildNode(name);
if (paths.isEmpty()) {
if (record) {
n.isRecord = true;
n.forEachPath = fieldName;
} else {
n.hasText = true;
n.fieldName = fieldName;
n.multiValued = multiValued;
}
} else {
n.build(paths, fieldName, multiValued, record);
}
}
}
private Node getOrAddChildNode(String xpathName) {
for (Node n : childNodes)
if (n.xpathName.equals(xpathName))
return n;
Node n = new Node(xpathName, this);
Matcher m = ATTRIB_PRESENT_WITHVAL.matcher(xpathName);
if (m.find()) {
n.name = m.group(1);
int start = m.start(2);
while (true) {
HashMap<String, String> attribs = new HashMap<String, String>();
if (!m.find(start))
break;
attribs.put(m.group(3), m.group(5));
start = m.end(6);
if (n.attribAndValues == null)
n.attribAndValues = new ArrayList<Map.Entry<String, String>>();
n.attribAndValues.addAll(attribs.entrySet());
}
}
childNodes.add(n);
return n;
}
}
static XMLInputFactory factory = XMLInputFactory.newInstance();
public static interface Handler {
public void handle(Map<String, Object> record, String xpath);
}
private static final Pattern ATTRIB_PRESENT_WITHVAL = Pattern
.compile("(\\S*?)?(\\[@)(\\S*?)(='(.*?)')?(\\])");
}

View File

@ -0,0 +1,24 @@
<%@ page contentType="text/html; charset=utf-8" pageEncoding="UTF-8"%>
<%--
Licensed to the Apache Software Foundation (ASF) under one or more
contributor license agreements. See the NOTICE file distributed with
this work for additional information regarding copyright ownership.
The ASF licenses this file to You under the Apache License, Version 2.0
(the "License"); you may not use this file except in compliance with
the License. You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
--%>
<%-- do a verbatim include so we can use the local vars --%>
<html>
<frameset cols = "50%, 50%">
<frame src ="debug.jsp" />
<frame src ="../dataimport?command=full-import&debug=on&verbose=true" name="result"/>
</frameset>
</html>

View File

@ -0,0 +1,73 @@
<%@ page contentType="text/html; charset=utf-8" pageEncoding="UTF-8"%>
<%--
Licensed to the Apache Software Foundation (ASF) under one or more
contributor license agreements. See the NOTICE file distributed with
this work for additional information regarding copyright ownership.
The ASF licenses this file to You under the Apache License, Version 2.0
(the "License"); you may not use this file except in compliance with
the License. You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
--%>
<%-- do a verbatim include so we can use the local vars --%>
<%@include file="_info.jsp"%>
<html>
<head>
<title>DataImportHandler Interactive Development</title>
<link rel="stylesheet" type="text/css" href="solr-admin.css">
<link rel="icon" href="favicon.ico" type="image/ico"></link>
<link rel="shortcut icon" href="favicon.ico" type="image/ico"></link>
<script src="jquery-1.2.3.min.js"></script>
</head>
<body>
<h1>DataImportHandler Development Console</h1>
<br />
<form action="../dataimport" target="result" method="post">
<input type="hidden" name="debug" value="on">
<table>
<tr>
<td colspan="2">
<table width="100%">
<tr>
<td>
<select name="command">
<option value="full-import" selected="selected">full-import</option>
<option value="delta-import">delta-import</option>
</select>
</td>
<td><strong>Verbose</strong>&nbsp;<input
name="verbose" type="checkbox"></td>
<td><strong>Commit</strong>&nbsp;<input
name="commit" type="checkbox"></td>
<td><strong>Clean</strong>&nbsp;<input
name="clean" type="checkbox"></td>
<td><strong>Start Row</strong>&nbsp;<input
name="start" size="4" type="text" value="0"></td>
<td><strong>No:of Rows</strong>&nbsp;<input name="rows"
type="text" size="4" value="10"></td>
</tr>
</table>
</td>
<tr>
<td><strong>data config xml</strong></td>
<td><input class="stdbutton" type="submit" value="debug now">
</td>
</tr>
<tr>
<td colspan="2"><textarea id="txtDataConfig" rows="30" cols="80" name="dataConfig"></textarea></td>
<script type="text/javascript" language="Javascript">
$.get('../dataimport?command=show-config', function(data){
$('#txtDataConfig').attr('value', data);
});
</script>
</tr>
</table>
</form>
</body>
</html>

View File

@ -0,0 +1,250 @@
/**
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.solr.handler.dataimport;
import org.junit.Assert;
import org.junit.Test;
import java.util.ArrayList;
import java.util.List;
import java.util.Map;
/**
* <p>
* Test for CachedSqlEntityProcessor
* </p>
*
* @version $Id$
* @since solr 1.3
*/
public class TestCachedSqlEntityProcessor {
@Test
public void withoutWhereClause() {
List fields = new ArrayList();
fields.add(AbstractDataImportHandlerTest.createMap("column", "id"));
fields.add(AbstractDataImportHandlerTest.createMap("column", "desc"));
String q = "select * from x where id=${x.id}";
Map<String, String> entityAttrs = AbstractDataImportHandlerTest.createMap(
"query", q);
MockDataSource ds = new MockDataSource();
VariableResolverImpl vr = new VariableResolverImpl();
vr.addNamespace("x", AbstractDataImportHandlerTest.createMap("id", 1));
Context context = AbstractDataImportHandlerTest.getContext(null, vr, ds, 0,
fields, entityAttrs);
List<Map<String, Object>> rows = new ArrayList<Map<String, Object>>();
rows.add(AbstractDataImportHandlerTest.createMap("id", 1, "desc", "one"));
rows.add(AbstractDataImportHandlerTest.createMap("id", 1, "desc",
"another one"));
MockDataSource.setIterator(vr.replaceTokens(q), rows.iterator());
CachedSqlEntityProcessor csep = new CachedSqlEntityProcessor();
csep.init(context);
rows = new ArrayList<Map<String, Object>>();
while (true) {
Map<String, Object> r = csep.nextRow();
if (r == null)
break;
rows.add(r);
}
Assert.assertEquals(2, rows.size());
ds.close();
csep.init(context);
rows = new ArrayList<Map<String, Object>>();
while (true) {
Map<String, Object> r = csep.nextRow();
if (r == null)
break;
rows.add(r);
}
Assert.assertEquals(2, rows.size());
Assert.assertEquals(2, rows.get(0).size());
Assert.assertEquals(2, rows.get(1).size());
}
@Test
public void withoutWhereClauseWithTransformers() {
List fields = new ArrayList();
fields.add(AbstractDataImportHandlerTest.createMap("column", "id"));
fields.add(AbstractDataImportHandlerTest.createMap("column", "desc"));
String q = "select * from x where id=${x.id}";
Map<String, String> entityAttrs = AbstractDataImportHandlerTest.createMap(
"query", q, "transformer", UppercaseTransformer.class.getName());
MockDataSource ds = new MockDataSource();
VariableResolverImpl vr = new VariableResolverImpl();
vr.addNamespace("x", AbstractDataImportHandlerTest.createMap("id", 1));
Context context = AbstractDataImportHandlerTest.getContext(null, vr, ds, 0,
fields, entityAttrs);
List<Map<String, Object>> rows = new ArrayList<Map<String, Object>>();
rows.add(AbstractDataImportHandlerTest.createMap("id", 1, "desc", "one"));
rows.add(AbstractDataImportHandlerTest.createMap("id", 1, "desc",
"another one"));
MockDataSource.setIterator(vr.replaceTokens(q), rows.iterator());
CachedSqlEntityProcessor csep = new CachedSqlEntityProcessor();
csep.init(context);
rows = new ArrayList<Map<String, Object>>();
while (true) {
Map<String, Object> r = csep.nextRow();
if (r == null)
break;
rows.add(r);
}
Assert.assertEquals(2, rows.size());
ds.close();
csep.init(context);
rows = new ArrayList<Map<String, Object>>();
while (true) {
Map<String, Object> r = csep.nextRow();
if (r == null)
break;
rows.add(r);
Assert.assertEquals(r.get("desc").toString().toUpperCase(), r.get("desc"));
}
Assert.assertEquals(2, rows.size());
Assert.assertEquals(2, rows.get(0).size());
Assert.assertEquals(2, rows.get(1).size());
}
@Test
public void withoutWhereClauseWithMultiRowTransformer() {
List fields = new ArrayList();
fields.add(AbstractDataImportHandlerTest.createMap("column", "id"));
fields.add(AbstractDataImportHandlerTest.createMap("column", "desc"));
String q = "select * from x where id=${x.id}";
Map<String, String> entityAttrs = AbstractDataImportHandlerTest.createMap(
"query", q, "transformer", DoubleTransformer.class.getName());
MockDataSource ds = new MockDataSource();
VariableResolverImpl vr = new VariableResolverImpl();
vr.addNamespace("x", AbstractDataImportHandlerTest.createMap("id", 1));
Context context = AbstractDataImportHandlerTest.getContext(null, vr, ds, 0,
fields, entityAttrs);
List<Map<String, Object>> rows = new ArrayList<Map<String, Object>>();
rows.add(AbstractDataImportHandlerTest.createMap("id", 1, "desc", "one"));
rows.add(AbstractDataImportHandlerTest.createMap("id", 1, "desc",
"another one"));
MockDataSource.setIterator(vr.replaceTokens(q), rows.iterator());
CachedSqlEntityProcessor csep = new CachedSqlEntityProcessor();
csep.init(context);
rows = new ArrayList<Map<String, Object>>();
while (true) {
Map<String, Object> r = csep.nextRow();
if (r == null)
break;
rows.add(r);
}
Assert.assertEquals(4, rows.size());
ds.close();
csep.init(context);
rows = new ArrayList<Map<String, Object>>();
while (true) {
Map<String, Object> r = csep.nextRow();
if (r == null)
break;
rows.add(r);
}
Assert.assertEquals(4, rows.size());
Assert.assertEquals(2, rows.get(0).size());
Assert.assertEquals(2, rows.get(1).size());
}
public static class DoubleTransformer extends Transformer {
public Object transformRow(Map<String, Object> row, Context context) {
List<Map<String, Object>> rows = new ArrayList<Map<String, Object>>();
rows.add(row);
rows.add(row);
return rows;
}
}
public static class UppercaseTransformer extends Transformer {
public Object transformRow(Map<String, Object> row, Context context) {
for (Map.Entry<String, Object> entry : row.entrySet()) {
Object val = entry.getValue();
if (val instanceof String) {
String s = (String) val;
entry.setValue(s.toUpperCase());
}
}
return row;
}
}
@Test
public void withWhereClause() {
List fields = new ArrayList();
fields.add(AbstractDataImportHandlerTest.createMap("column", "id"));
fields.add(AbstractDataImportHandlerTest.createMap("column", "desc"));
String q = "select * from x";
Map<String, String> entityAttrs = AbstractDataImportHandlerTest.createMap(
"query", q, "where", "id=x.id");
MockDataSource ds = new MockDataSource();
VariableResolverImpl vr = new VariableResolverImpl();
Map xNamespace = AbstractDataImportHandlerTest.createMap("id", 0);
vr.addNamespace("x", xNamespace);
Context context = AbstractDataImportHandlerTest.getContext(null, vr, ds, 0,
fields, entityAttrs);
List<Map<String, Object>> rows = new ArrayList<Map<String, Object>>();
rows.add(AbstractDataImportHandlerTest.createMap("id", 1, "desc", "one"));
rows.add(AbstractDataImportHandlerTest.createMap("id", 2, "desc", "two"));
rows.add(AbstractDataImportHandlerTest.createMap("id", 2, "desc",
"another two"));
rows.add(AbstractDataImportHandlerTest.createMap("id", 3, "desc", "three"));
rows.add(AbstractDataImportHandlerTest.createMap("id", 3, "desc",
"another three"));
rows.add(AbstractDataImportHandlerTest.createMap("id", 3, "desc",
"another another three"));
MockDataSource.setIterator(q, rows.iterator());
CachedSqlEntityProcessor csep = new CachedSqlEntityProcessor();
csep.init(context);
rows = new ArrayList<Map<String, Object>>();
while (true) {
Map<String, Object> r = csep.nextRow();
if (r == null)
break;
rows.add(r);
}
Assert.assertEquals(0, rows.size());
ds.close();
csep.init(context);
rows = new ArrayList<Map<String, Object>>();
xNamespace.put("id", 2);
while (true) {
Map<String, Object> r = csep.nextRow();
if (r == null)
break;
rows.add(r);
}
Assert.assertEquals(2, rows.size());
csep.init(context);
rows = new ArrayList<Map<String, Object>>();
xNamespace.put("id", 3);
while (true) {
Map<String, Object> r = csep.nextRow();
if (r == null)
break;
rows.add(r);
}
Assert.assertEquals(3, rows.size());
}
}

View File

@ -0,0 +1,92 @@
/**
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.solr.handler.dataimport;
import junit.framework.Assert;
import org.junit.Test;
import org.w3c.dom.Document;
import javax.xml.parsers.DocumentBuilderFactory;
import java.io.ByteArrayInputStream;
import java.util.ArrayList;
import java.util.List;
/**
* <p>
* Test for DataConfig
* </p>
*
* @version $Id$
* @since solr 1.3
*/
public class TestDataConfig extends AbstractDataImportHandlerTest {
@Override
public void setUp() throws Exception {
super.setUp();
}
@Override
public void tearDown() throws Exception {
super.tearDown();
}
@Override
public String getSchemaFile() {
return "dataimport-schema.xml";
}
@Override
public String getSolrConfigFile() {
return "dataimport-nodatasource-solrconfig.xml";
}
@Test
@SuppressWarnings("unchecked")
public void testDataConfigWithDataSource() throws Exception {
List rows = new ArrayList();
rows.add(createMap("id", "1", "desc", "one"));
MockDataSource.setIterator("select * from x", rows.iterator());
super.runFullImport(loadDataConfig("data-config-with-datasource.xml"));
assertQ(req("id:1"), "//*[@numFound='1']");
}
@Test
public void basic() throws Exception {
javax.xml.parsers.DocumentBuilder builder = DocumentBuilderFactory
.newInstance().newDocumentBuilder();
Document doc = builder.parse(new ByteArrayInputStream(xml.getBytes()));
DataConfig dc = new DataConfig();
dc.readFromXml(doc.getDocumentElement());
Assert.assertEquals("atrimlisting",
dc.documents.get(0).entities.get(0).name);
}
private static final String xml = "<dataConfig>\n"
+ "\t<document name=\"autos\" >\n"
+ "\t\t<entity name=\"atrimlisting\" pk=\"acode\"\n"
+ "\t\t\tquery=\"select acode,make,model,year,msrp,category,image,izmo_image_url,price_range_low,price_range_high,invoice_range_low,invoice_range_high from atrimlisting\"\n"
+ "\t\t\tdeltaQuery=\"select acode from atrimlisting where last_modified > '${indexer.last_index_time}'\">\n"
+
"\t\t</entity>\n" +
"\t</document>\n" + "</dataConfig>";
}

View File

@ -0,0 +1,92 @@
/**
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.solr.handler.dataimport;
import org.junit.Assert;
import org.junit.Test;
import java.text.SimpleDateFormat;
import java.util.*;
/**
* <p>
* Test for DateFormatTransformer
* </p>
*
* @version $Id$
* @since solr 1.3
*/
public class TestDateFormatTransformer {
@Test
@SuppressWarnings("unchecked")
public void testTransformRow_SingleRow() throws Exception {
List fields = new ArrayList();
fields.add(AbstractDataImportHandlerTest.createMap(DataImporter.COLUMN,
"lastModified"));
fields.add(AbstractDataImportHandlerTest.createMap(DataImporter.COLUMN,
"dateAdded", RegexTransformer.SRC_COL_NAME, "lastModified",
DateFormatTransformer.DATE_TIME_FMT, "MM/dd/yyyy"));
SimpleDateFormat format = new SimpleDateFormat("MM/dd/yyyy");
Date now = format.parse(format.format(new Date()));
Map row = AbstractDataImportHandlerTest.createMap("lastModified", format
.format(now));
VariableResolverImpl resolver = new VariableResolverImpl();
resolver.addNamespace("e", row);
Context context = AbstractDataImportHandlerTest.getContext(null, resolver,
null, 0, fields, null);
new DateFormatTransformer().transformRow(row, context);
Assert.assertEquals(now, row.get("dateAdded"));
}
@Test
@SuppressWarnings("unchecked")
public void testTransformRow_MultipleRows() throws Exception {
List fields = new ArrayList();
fields.add(AbstractDataImportHandlerTest.createMap(DataImporter.COLUMN,
"lastModified"));
fields.add(AbstractDataImportHandlerTest.createMap(DataImporter.COLUMN,
"dateAdded", RegexTransformer.SRC_COL_NAME, "lastModified",
DateFormatTransformer.DATE_TIME_FMT, "MM/dd/yyyy hh:mm:ss.SSS"));
SimpleDateFormat format = new SimpleDateFormat("MM/dd/yyyy hh:mm:ss.SSS");
Date now1 = format.parse(format.format(new Date()));
Date now2 = format.parse(format.format(new Date()));
Map row = new HashMap();
List list = new ArrayList();
list.add(format.format(now1));
list.add(format.format(now2));
row.put("lastModified", list);
VariableResolverImpl resolver = new VariableResolverImpl();
resolver.addNamespace("e", row);
Context context = AbstractDataImportHandlerTest.getContext(null, resolver,
null, 0, fields, null);
new DateFormatTransformer().transformRow(row, context);
List output = new ArrayList();
output.add(now1);
output.add(now2);
Assert.assertEquals(output, row.get("dateAdded"));
}
}

View File

@ -0,0 +1,201 @@
/**
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.solr.handler.dataimport;
import org.apache.solr.common.SolrInputDocument;
import static org.apache.solr.handler.dataimport.AbstractDataImportHandlerTest.createMap;
import org.junit.Assert;
import org.junit.Test;
import java.util.ArrayList;
import java.util.Collections;
import java.util.List;
import java.util.Map;
/**
* <p>
* Test for DocBuilder
* </p>
*
* @version $Id$
* @since solr 1.3
*/
public class TestDocBuilder {
@Test
public void loadClass() throws Exception {
Class clz = DocBuilder.loadClass("RegexTransformer");
Assert.assertNotNull(clz);
}
@Test
public void singleEntityNoRows() {
try {
DataImporter di = new DataImporter();
di.loadDataConfig(dc_singleEntity);
DataConfig cfg = di.getConfig();
DataConfig.Entity ent = cfg.documents.get(0).entities.get(0);
for (DataConfig.Field field : ent.fields) {
field.nameOrColName = field.name = field.column;
}
MockDataSource.setIterator("select * from x", new ArrayList().iterator());
ent.dataSrc = new MockDataSource();
ent.isDocRoot = true;
DataImporter.RequestParams rp = new DataImporter.RequestParams();
rp.command = "full-import";
SolrWriterImpl swi = new SolrWriterImpl();
di.rumCmd(rp, swi, Collections.EMPTY_MAP);
Assert.assertEquals(Boolean.TRUE, swi.deleteAllCalled);
Assert.assertEquals(Boolean.TRUE, swi.commitCalled);
Assert.assertEquals(0, swi.docs.size());
Assert.assertEquals(1, di.getDocBuilder().importStatistics.queryCount
.get());
Assert
.assertEquals(0, di.getDocBuilder().importStatistics.docCount.get());
Assert.assertEquals(0, di.getDocBuilder().importStatistics.rowsCount
.get());
} finally {
MockDataSource.clearCache();
}
}
@Test
public void singleEntityOneRow() {
try {
DataImporter di = new DataImporter();
di.loadDataConfig(dc_singleEntity);
DataConfig cfg = di.getConfig();
DataConfig.Entity ent = cfg.documents.get(0).entities.get(0);
for (DataConfig.Field field : ent.fields) {
field.nameOrColName = field.name = field.column;
}
List l = new ArrayList();
l.add(createMap("id", 1, "desc", "one"));
MockDataSource.setIterator("select * from x", l.iterator());
ent.dataSrc = new MockDataSource();
ent.isDocRoot = true;
DataImporter.RequestParams rp = new DataImporter.RequestParams();
rp.command = "full-import";
SolrWriterImpl swi = new SolrWriterImpl();
di.rumCmd(rp, swi, Collections.EMPTY_MAP);
Assert.assertEquals(Boolean.TRUE, swi.deleteAllCalled);
Assert.assertEquals(Boolean.TRUE, swi.commitCalled);
Assert.assertEquals(1, swi.docs.size());
Assert.assertEquals(1, di.getDocBuilder().importStatistics.queryCount
.get());
Assert
.assertEquals(1, di.getDocBuilder().importStatistics.docCount.get());
Assert.assertEquals(1, di.getDocBuilder().importStatistics.rowsCount
.get());
for (int i = 0; i < l.size(); i++) {
Map<String, Object> map = (Map<String, Object>) l.get(i);
SolrInputDocument doc = swi.docs.get(i);
for (Map.Entry<String, Object> entry : map.entrySet()) {
Assert.assertEquals(entry.getValue(), doc.getFieldValue(entry
.getKey()));
}
}
} finally {
MockDataSource.clearCache();
}
}
@Test
public void singleEntityMultipleRows() {
try {
DataImporter di = new DataImporter();
di.loadDataConfig(dc_singleEntity);
DataConfig cfg = di.getConfig();
DataConfig.Entity ent = cfg.documents.get(0).entities.get(0);
ent.isDocRoot = true;
DataImporter.RequestParams rp = new DataImporter.RequestParams();
rp.command = "full-import";
for (DataConfig.Field field : ent.fields) {
field.nameOrColName = field.name = field.column;
}
List l = new ArrayList();
l.add(createMap("id", 1, "desc", "one"));
l.add(createMap("id", 2, "desc", "two"));
l.add(createMap("id", 3, "desc", "three"));
MockDataSource.setIterator("select * from x", l.iterator());
ent.dataSrc = new MockDataSource();
SolrWriterImpl swi = new SolrWriterImpl();
di.rumCmd(rp, swi, Collections.EMPTY_MAP);
Assert.assertEquals(Boolean.TRUE, swi.deleteAllCalled);
Assert.assertEquals(Boolean.TRUE, swi.commitCalled);
Assert.assertEquals(3, swi.docs.size());
for (int i = 0; i < l.size(); i++) {
Map<String, Object> map = (Map<String, Object>) l.get(i);
SolrInputDocument doc = swi.docs.get(i);
for (Map.Entry<String, Object> entry : map.entrySet()) {
Assert.assertEquals(entry.getValue(), doc.getFieldValue(entry
.getKey()));
}
}
Assert.assertEquals(1, di.getDocBuilder().importStatistics.queryCount
.get());
Assert
.assertEquals(3, di.getDocBuilder().importStatistics.docCount.get());
Assert.assertEquals(3, di.getDocBuilder().importStatistics.rowsCount
.get());
} finally {
MockDataSource.clearCache();
}
}
static class SolrWriterImpl extends SolrWriter {
List<SolrInputDocument> docs = new ArrayList<SolrInputDocument>();
Boolean deleteAllCalled;
Boolean commitCalled;
public SolrWriterImpl() {
super(null, ".");
}
public SolrDoc getSolrDocInstance() {
return new DataImportHandler.SolrDocumentWrapper();
}
public boolean upload(SolrDoc d) {
return docs.add(((DataImportHandler.SolrDocumentWrapper) d).doc);
}
public void log(int event, String name, Object row) {
// Do nothing
}
public void doDeleteAll() {
deleteAllCalled = Boolean.TRUE;
}
public void commit(boolean b) {
commitCalled = Boolean.TRUE;
}
}
public static final String dc_singleEntity = "<dataConfig>\n"
+ " <document name=\"X\" >\n"
+ " <entity name=\"x\" query=\"select * from x\">\n"
+ " <field column=\"id\"/>\n"
+ " <field column=\"desc\"/>\n" + " </entity>\n"
+ " </document>\n" + "</dataConfig>";
}

View File

@ -0,0 +1,68 @@
/**
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.solr.handler.dataimport;
import org.junit.After;
import org.junit.Before;
import org.junit.Test;
import java.util.ArrayList;
import java.util.List;
/**
* <p>
* Test for DocBuilder using the test harness
* </p>
*
* @version $Id$
* @since solr 1.3
*/
public class TestDocBuilder2 extends AbstractDataImportHandlerTest {
@Before
public void setUp() throws Exception {
super.setUp();
}
@After
public void tearDown() throws Exception {
super.tearDown();
}
@Override
public String getSchemaFile() {
return "dataimport-schema.xml";
}
@Override
public String getSolrConfigFile() {
return "dataimport-solrconfig.xml";
}
@Test
@SuppressWarnings("unchecked")
public void testSingleEntity() throws Exception {
List rows = new ArrayList();
rows.add(createMap("id", "1", "desc", "one"));
MockDataSource.setIterator("select * from x", rows.iterator());
super.runFullImport(loadDataConfig("single-entity-data-config.xml"));
assertQ(req("id:1"), "//*[@numFound='1']");
}
}

View File

@ -0,0 +1,83 @@
/**
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.solr.handler.dataimport;
import org.junit.Assert;
import org.junit.Test;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
/**
* <p>
* Test for EntityProcessorBase
* </p>
*
* @version $Id$
* @since solr 1.3
*/
public class TestEntityProcessorBase {
@Test
public void multiTransformer() {
List<Map<String, String>> fields = new ArrayList<Map<String, String>>();
Map<String, String> entity = new HashMap<String, String>();
entity.put("transformer", T1.class.getName() + "," + T2.class.getName()
+ "," + T3.class.getName());
fields.add(TestRegexTransformer.getField("A", null, null, null, null));
fields.add(TestRegexTransformer.getField("B", null, null, null, null));
Context context = AbstractDataImportHandlerTest.getContext(null, null,
null, 0, fields, entity);
Map<String, Object> src = new HashMap<String, Object>();
src.put("A", "NA");
src.put("B", "NA");
SqlEntityProcessor sep = new SqlEntityProcessor();
sep.init(context);
Map<String, Object> res = sep.applyTransformer(src);
Assert.assertNotNull(res.get("T1"));
Assert.assertNotNull(res.get("T2"));
Assert.assertNotNull(res.get("T3"));
}
static class T1 extends Transformer {
public Object transformRow(Map<String, Object> aRow, Context context) {
aRow.put("T1", "T1 called");
return aRow;
}
}
static class T2 extends Transformer {
public Object transformRow(Map<String, Object> aRow, Context context) {
aRow.put("T2", "T2 called");
return aRow;
}
}
static class T3 {
public Object transformRow(Map<String, Object> aRow) {
aRow.put("T3", "T3 called");
return aRow;
}
}
}

View File

@ -0,0 +1,118 @@
/**
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.solr.handler.dataimport;
import static org.junit.Assert.assertEquals;
import org.junit.Before;
import org.junit.Ignore;
import org.junit.Test;
import java.net.URLEncoder;
import java.text.SimpleDateFormat;
import java.util.Date;
import java.util.HashMap;
import java.util.Map;
/**
* <p>
* Test for EvaluatorBag
* </p>
*
* @version $Id$
* @since solr 1.3
*/
public class TestEvaluatorBag {
private static final String ENCODING = "UTF-8";
VariableResolverImpl resolver;
Map<String, String> sqlTests;
Map<String, String> urlTests;
@Before
public void setUp() throws Exception {
resolver = new VariableResolverImpl();
sqlTests = new HashMap<String, String>();
sqlTests.put("foo\"", "foo\"\"");
sqlTests.put("foo'", "foo''");
sqlTests.put("foo''", "foo''''");
sqlTests.put("'foo\"", "''foo\"\"");
sqlTests.put("\"Albert D'souza\"", "\"\"Albert D''souza\"\"");
urlTests = new HashMap<String, String>();
urlTests.put("*:*", URLEncoder.encode("*:*", ENCODING));
urlTests.put("price:[* TO 200]", URLEncoder.encode("price:[* TO 200]",
ENCODING));
urlTests.put("review:\"hybrid sedan\"", URLEncoder.encode(
"review:\"hybrid sedan\"", ENCODING));
}
/**
* Test method for
* {@link EvaluatorBag#getSqlEscapingEvaluator()}.
*/
@Test
public void testGetSqlEscapingEvaluator() {
Evaluator sqlEscaper = EvaluatorBag.getSqlEscapingEvaluator();
runTests(sqlTests, sqlEscaper);
}
/**
* Test method for
* {@link EvaluatorBag#getUrlEvaluator()}.
*/
@Test
public void testGetUrlEvaluator() throws Exception {
Evaluator urlEvaluator = EvaluatorBag.getUrlEvaluator();
runTests(urlTests, urlEvaluator);
}
/**
* Test method for
* {@link EvaluatorBag#getDateFormatEvaluator()}.
*/
@Test
@Ignore
public void testGetDateFormatEvaluator() {
Evaluator dateFormatEval = EvaluatorBag.getDateFormatEvaluator();
assertEquals(new SimpleDateFormat("yyyy-MM-dd").format(new Date()),
dateFormatEval.evaluate(resolver, "'NOW',yyyy-MM-dd HH:mm"));
Map<String, Object> map = new HashMap<String, Object>();
map.put("key", new Date());
resolver.addNamespace("A", map);
assertEquals(new SimpleDateFormat("yyyy-MM-dd HH:mm").format(new Date()),
dateFormatEval.evaluate(resolver, "A.key, yyyy-MM-dd HH:mm"));
}
private void runTests(Map<String, String> tests, Evaluator evaluator) {
for (Map.Entry<String, String> entry : tests.entrySet()) {
Map<String, Object> values = new HashMap<String, Object>();
values.put("key", entry.getKey());
resolver.addNamespace("A", values);
String expected = (String) entry.getValue();
String actual = evaluator.evaluate(resolver, "A.key");
assertEquals(expected, actual);
}
}
}

View File

@ -0,0 +1,108 @@
package org.apache.solr.handler.dataimport;
import org.junit.Assert;
import org.junit.Test;
import java.io.File;
import java.io.FileOutputStream;
import java.io.IOException;
import java.util.ArrayList;
import java.util.Collections;
import java.util.List;
import java.util.Map;
/**
* <p>
* Test for FileListEntityProcessor
* </p>
*
* @version $Id$
* @since solr 1.3
*/
public class TestFileListEntityProcessor {
@Test
@SuppressWarnings("unchecked")
public void testSimple() throws IOException {
long time = System.currentTimeMillis();
File tmpdir = new File("." + time);
tmpdir.mkdir();
tmpdir.deleteOnExit();
createFile(tmpdir, "a.xml", "a.xml".getBytes(), false);
createFile(tmpdir, "b.xml", "b.xml".getBytes(), false);
createFile(tmpdir, "c.props", "c.props".getBytes(), false);
Map attrs = AbstractDataImportHandlerTest.createMap(
FileListEntityProcessor.FILE_NAME, "xml$",
FileListEntityProcessor.BASE_DIR, tmpdir.getAbsolutePath());
Context c = AbstractDataImportHandlerTest.getContext(null,
new VariableResolverImpl(), null, 0, Collections.EMPTY_LIST, attrs);
FileListEntityProcessor fileListEntityProcessor = new FileListEntityProcessor();
fileListEntityProcessor.init(c);
List<String> fList = new ArrayList<String>();
while (true) {
Map<String, Object> f = fileListEntityProcessor.nextRow();
if (f == null)
break;
fList.add((String) f.get(FileListEntityProcessor.ABSOLUTE_FILE));
}
Assert.assertEquals(2, fList.size());
}
@Test
public void testNTOT() throws IOException {
long time = System.currentTimeMillis();
File tmpdir = new File("." + time);
tmpdir.mkdir();
tmpdir.deleteOnExit();
createFile(tmpdir, "a.xml", "a.xml".getBytes(), true);
createFile(tmpdir, "b.xml", "b.xml".getBytes(), true);
createFile(tmpdir, "c.props", "c.props".getBytes(), true);
Map attrs = AbstractDataImportHandlerTest.createMap(
FileListEntityProcessor.FILE_NAME, "xml$",
FileListEntityProcessor.BASE_DIR, tmpdir.getAbsolutePath(),
FileListEntityProcessor.OLDER_THAN, "'NOW'");
Context c = AbstractDataImportHandlerTest.getContext(null,
new VariableResolverImpl(), null, 0, Collections.EMPTY_LIST, attrs);
FileListEntityProcessor fileListEntityProcessor = new FileListEntityProcessor();
fileListEntityProcessor.init(c);
List<String> fList = new ArrayList<String>();
while (true) {
Map<String, Object> f = fileListEntityProcessor.nextRow();
if (f == null)
break;
fList.add((String) f.get(FileListEntityProcessor.ABSOLUTE_FILE));
}
System.out.println("List of files when given OLDER_THAN -- " + fList);
Assert.assertEquals(2, fList.size());
attrs = AbstractDataImportHandlerTest.createMap(
FileListEntityProcessor.FILE_NAME, "xml$",
FileListEntityProcessor.BASE_DIR, tmpdir.getAbsolutePath(),
FileListEntityProcessor.NEWER_THAN, "'NOW-2HOURS'");
c = AbstractDataImportHandlerTest.getContext(null,
new VariableResolverImpl(), null, 0, Collections.EMPTY_LIST, attrs);
fileListEntityProcessor.init(c);
fList.clear();
while (true) {
Map<String, Object> f = fileListEntityProcessor.nextRow();
if (f == null)
break;
fList.add((String) f.get(FileListEntityProcessor.ABSOLUTE_FILE));
}
System.out.println("List of files when given NEWER_THAN -- " + fList);
Assert.assertEquals(2, fList.size());
}
public static File createFile(File tmpdir, String name, byte[] content,
boolean changeModifiedTime) throws IOException {
File file = new File(tmpdir.getAbsolutePath() + File.separator + name);
file.deleteOnExit();
FileOutputStream f = new FileOutputStream(file);
f.write(content);
f.close();
// System.out.println("before "+file.lastModified());
if (changeModifiedTime)
file.setLastModified(System.currentTimeMillis() - 3600000);
// System.out.println("after "+file.lastModified());
return file;
}
}

View File

@ -0,0 +1,78 @@
/**
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.solr.handler.dataimport;
import org.junit.Assert;
import org.junit.Ignore;
import org.junit.Test;
import java.util.*;
/**
* <p>
* Test for JdbcDataSource
* </p>
* <p/>
* <p>
* Note: The tests are ignored for the lack of DB support for testing
* </p>
*
* @version $Id$
* @since solr 1.3
*/
public class TestJdbcDataSource {
@Test
@Ignore
public void basic() throws Exception {
JdbcDataSource dataSource = new JdbcDataSource();
Properties p = new Properties();
p.put("driver", "com.mysql.jdbc.Driver");
p.put("url", "jdbc:mysql://localhost/autos");
p.put("user", "root");
p.put("password", "");
List<Map<String, String>> flds = new ArrayList<Map<String, String>>();
Map<String, String> f = new HashMap<String, String>();
f.put("column", "trim_id");
f.put("type", "long");
flds.add(f);
f = new HashMap<String, String>();
f.put("column", "msrp");
f.put("type", "float");
flds.add(f);
Context c = AbstractDataImportHandlerTest.getContext(null, null,
dataSource, 0, flds, null);
dataSource.init(c, p);
Iterator<Map<String, Object>> i = dataSource
.getData("select make,model,year,msrp,trim_id from atrimlisting where make='Acura'");
int count = 0;
Object msrp = null;
Object trim_id = null;
while (i.hasNext()) {
Map<String, Object> map = i.next();
msrp = map.get("msrp");
trim_id = map.get("trim_id");
count++;
}
Assert.assertEquals(5, count);
Assert.assertEquals(Float.class, msrp.getClass());
Assert.assertEquals(Long.class, trim_id.getClass());
}
}

View File

@ -0,0 +1,78 @@
/**
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.solr.handler.dataimport;
import org.junit.Assert;
import org.junit.Test;
import java.util.ArrayList;
import java.util.List;
import java.util.Map;
/**
* <p>
* Test for NumberFormatTransformer
* </p>
*
* @version $Id$
* @since solr 1.3
*/
public class TestNumberFormatTransformer {
@Test
@SuppressWarnings("unchecked")
public void testTransformRow_SingleNumber() {
List l = new ArrayList();
l.add(AbstractDataImportHandlerTest.createMap("column", "num",
NumberFormatTransformer.FORMAT_STYLE, NumberFormatTransformer.NUMBER));
Context c = AbstractDataImportHandlerTest.getContext(null, null, null, 0,
l, null);
Map m = AbstractDataImportHandlerTest.createMap("num", "123,567");
new NumberFormatTransformer().transformRow(m, c);
Assert.assertEquals(new Long(123567), m.get("num"));
}
@Test
@SuppressWarnings("unchecked")
public void testTransformRow_MultipleNumbers() throws Exception {
List fields = new ArrayList();
fields.add(AbstractDataImportHandlerTest.createMap(DataImporter.COLUMN,
"inputs"));
fields.add(AbstractDataImportHandlerTest.createMap(DataImporter.COLUMN,
"outputs", RegexTransformer.SRC_COL_NAME, "inputs",
NumberFormatTransformer.FORMAT_STYLE, NumberFormatTransformer.NUMBER));
List inputs = new ArrayList();
inputs.add("123,567");
inputs.add("245,678");
Map row = AbstractDataImportHandlerTest.createMap("inputs", inputs);
VariableResolverImpl resolver = new VariableResolverImpl();
resolver.addNamespace("e", row);
Context context = AbstractDataImportHandlerTest.getContext(null, resolver,
null, 0, fields, null);
new NumberFormatTransformer().transformRow(row, context);
List output = new ArrayList();
output.add(new Long(123567));
output.add(new Long(245678));
Map outputRow = AbstractDataImportHandlerTest.createMap("inputs", inputs,
"outputs", output);
Assert.assertEquals(outputRow, row);
}
}

View File

@ -0,0 +1,112 @@
/**
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.solr.handler.dataimport;
import org.junit.Assert;
import org.junit.Test;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
/**
* <p>
* Test for RegexTransformer
* </p>
*
* @version $Id$
* @since solr 1.3
*/
public class TestRegexTransformer {
@Test
public void commaSeparated() {
List<Map<String, String>> fields = new ArrayList<Map<String, String>>();
fields.add(getField("col1", "string", null, "a", ","));
Context context = AbstractDataImportHandlerTest.getContext(null, null,
null, 0, fields, null);
Map<String, Object> src = new HashMap<String, Object>();
String s = "a,bb,cc,d";
src.put("a", s);
Map<String, Object> result = new RegexTransformer().transformRow(src,
context);
Assert.assertEquals(2, result.size());
Assert.assertEquals(4, ((List) result.get("col1")).size());
}
@Test
public void replaceWith() {
List<Map<String, String>> fields = new ArrayList<Map<String, String>>();
Map<String, String> fld = getField("name", "string", "'", null, null);
fld.put("replaceWith", "''");
fields.add(fld);
Context context = AbstractDataImportHandlerTest.getContext(null, null,
null, 0, fields, null);
Map<String, Object> src = new HashMap<String, Object>();
String s = "D'souza";
src.put("name", s);
Map<String, Object> result = new RegexTransformer().transformRow(src,
context);
Assert.assertEquals("D''souza", result.get("name"));
}
@Test
public void mileage() {
Context context = AbstractDataImportHandlerTest.getContext(null, null,
null, 0, getFields(), null);
Map<String, Object> src = new HashMap<String, Object>();
String s = "Fuel Economy Range: 26 mpg Hwy, 19 mpg City";
src.put("rowdata", s);
Map<String, Object> result = new RegexTransformer().transformRow(src,
context);
Assert.assertEquals(3, result.size());
Assert.assertEquals(s, result.get("rowdata"));
Assert.assertEquals("26", result.get("highway_mileage"));
Assert.assertEquals("19", result.get("city_mileage"));
}
public static List<Map<String, String>> getFields() {
List<Map<String, String>> fields = new ArrayList<Map<String, String>>();
fields.add(getField("city_mileage", "sint",
"Fuel Economy Range:\\s*?\\d*?\\s*?mpg Hwy,\\s*?(\\d*?)\\s*?mpg City",
"rowdata", null));
fields.add(getField("highway_mileage", "sint",
"Fuel Economy Range:\\s*?(\\d*?)\\s*?mpg Hwy,\\s*?\\d*?\\s*?mpg City",
"rowdata", null));
fields.add(getField("seating_capacity", "sint", "Seating capacity:(.*)",
"rowdata", null));
fields
.add(getField("warranty", "string", "Warranty:(.*)", "rowdata", null));
fields.add(getField("rowdata", "string", null, "rowdata", null));
return fields;
}
public static Map<String, String> getField(String col, String type,
String re, String srcCol, String splitBy) {
HashMap<String, String> vals = new HashMap<String, String>();
vals.put("column", col);
vals.put("type", type);
vals.put("regex", re);
vals.put("sourceColName", srcCol);
vals.put("splitBy", splitBy);
return vals;
}
}

View File

@ -0,0 +1,146 @@
/**
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.solr.handler.dataimport;
import org.junit.Assert;
import org.junit.Ignore;
import org.junit.Test;
import org.w3c.dom.Document;
import org.w3c.dom.Element;
import org.xml.sax.InputSource;
import javax.xml.parsers.DocumentBuilder;
import javax.xml.parsers.DocumentBuilderFactory;
import java.io.StringReader;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
/**
* <p>
* Test for ScriptTransformer
* </p>
* <p/>
* All tests in this have been ignored because script support is only available
* in Java 1.6+
*
* @version $Id$
* @since solr 1.3
*/
public class TestScriptTransformer {
@Test
@Ignore
public void basic() {
String script = "function f1(row,context){"
+ "row.put('name','Hello ' + row.get('name'));" + "return row;\n" + "}";
Context context = getContext("f1", script);
Map<String, Object> map = new HashMap<String, Object>();
map.put("name", "Scott");
SqlEntityProcessor sep = new SqlEntityProcessor();
sep.init(context);
sep.applyTransformer(map);
Assert.assertEquals(map.get("name"), "Hello Scott");
}
private Context getContext(String funcName, String script) {
List<Map<String, String>> fields = new ArrayList<Map<String, String>>();
Map<String, String> entity = new HashMap<String, String>();
entity.put("name", "hello");
entity.put("transformer", "script:" + funcName);
Map<String, Object> dataImporterNs = new HashMap<String, Object>();
dataImporterNs.put(DataConfig.SCRIPT_LANG, "JavaScript");
dataImporterNs.put(DataConfig.SCRIPT, script);
VariableResolverImpl vr = new VariableResolverImpl();
vr.addNamespace(DataConfig.IMPORTER_NS, dataImporterNs);
Context context = AbstractDataImportHandlerTest.getContext(null, vr, null,
0, fields, entity);
return context;
}
@Test
@Ignore
public void oneparam() {
String script = "function f1(row){"
+ "row.put('name','Hello ' + row.get('name'));" + "return row;\n" + "}";
Context context = getContext("f1", script);
Map<String, Object> map = new HashMap<String, Object>();
map.put("name", "Scott");
SqlEntityProcessor sep = new SqlEntityProcessor();
sep.init(context);
sep.applyTransformer(map);
Assert.assertEquals(map.get("name"), "Hello Scott");
}
@Test
@Ignore
public void readScriptTag() throws Exception {
DocumentBuilder builder = DocumentBuilderFactory.newInstance()
.newDocumentBuilder();
Document document = builder.parse(new InputSource(new StringReader(xml)));
DataConfig config = new DataConfig();
config.readFromXml((Element) document.getElementsByTagName("dataConfig")
.item(0));
Assert.assertTrue(config.script.script.indexOf("checkNextToken") > -1);
}
@Test
@Ignore
public void checkScript() throws Exception {
DocumentBuilder builder = DocumentBuilderFactory.newInstance()
.newDocumentBuilder();
Document document = builder.parse(new InputSource(new StringReader(xml)));
DataConfig config = new DataConfig();
config.readFromXml((Element) document.getElementsByTagName("dataConfig")
.item(0));
Context c = getContext("checkNextToken", config.script.script);
Map map = new HashMap();
map.put("nextToken", "hello");
SqlEntityProcessor sep = new SqlEntityProcessor();
sep.init(c);
sep.applyTransformer(map);
Assert.assertEquals("true", map.get("$hasMore"));
map = new HashMap();
map.put("nextToken", "");
sep.applyTransformer(map);
Assert.assertNull(map.get("$hasMore"));
}
static String xml = "<dataConfig>\n"
+ "<script><![CDATA[\n"
+ "function checkNextToken(row)\t{\n"
+ " var nt = row.get('nextToken');"
+ " if (nt && nt !='' ){ "
+ " row.put('$hasMore', 'true');}\n"
+ " return row;\n"
+ "}]]></script>\t<document>\n"
+ "\t\t<entity name=\"mbx\" pk=\"articleNumber\" processor=\"XPathEntityProcessor\"\n"
+ "\t\t\turl=\"?boardId=${dataimporter.defaults.boardId}&amp;maxRecords=20&amp;includeBody=true&amp;startDate=${dataimporter.defaults.startDate}&amp;guid=:autosearch001&amp;reqId=1&amp;transactionId=stringfortracing&amp;listPos=${mbx.nextToken}\"\n"
+ "\t\t\tforEach=\"/mbmessage/articles/navigation | /mbmessage/articles/article\" transformer=\"script:checkNextToken\">\n"
+ "\n" + "\t\t\t<field column=\"nextToken\"\n"
+ "\t\t\t\txpath=\"/mbmessage/articles/navigation/nextToken\" />\n"
+ "\n" + "\t\t</entity>\n" + "\t</document>\n" + "</dataConfig>";
}

View File

@ -0,0 +1,179 @@
/**
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.solr.handler.dataimport;
import org.junit.Assert;
import org.junit.Test;
import java.util.*;
/**
* <p>
* Test for SqlEntityProcessor
* </p>
*
* @version $Id$
* @since solr 1.3
*/
public class TestSqlEntityProcessor {
private static ThreadLocal<Integer> local = new ThreadLocal<Integer>();
@Test
public void singleBatch() {
SqlEntityProcessor sep = new SqlEntityProcessor();
List<Map<String, Object>> rows = getRows(3);
VariableResolverImpl vr = new VariableResolverImpl();
HashMap<String, String> ea = new HashMap<String, String>();
ea.put("query", "SELECT * FROM A");
Context c = AbstractDataImportHandlerTest.getContext(null, vr, getDs(rows),
Context.FULL_DUMP, null, ea);
sep.init(c);
int count = 0;
while (true) {
Map<String, Object> r = sep.nextRow();
if (r == null)
break;
count++;
}
Assert.assertEquals(3, count);
}
@Test
public void tranformer() {
SqlEntityProcessor sep = new SqlEntityProcessor();
List<Map<String, Object>> rows = getRows(2);
VariableResolverImpl vr = new VariableResolverImpl();
HashMap<String, String> ea = new HashMap<String, String>();
ea.put("query", "SELECT * FROM A");
ea.put("transformer", T.class.getName());
sep.init(AbstractDataImportHandlerTest.getContext(null, vr, getDs(rows),
Context.FULL_DUMP, null, ea));
List<Map<String, Object>> rs = new ArrayList<Map<String, Object>>();
Map<String, Object> r = null;
while (true) {
r = sep.nextRow();
if (r == null)
break;
rs.add(r);
}
Assert.assertEquals(2, rs.size());
Assert.assertNotNull(rs.get(0).get("T"));
}
@Test
public void tranformerWithReflection() {
SqlEntityProcessor sep = new SqlEntityProcessor();
List<Map<String, Object>> rows = getRows(2);
VariableResolverImpl vr = new VariableResolverImpl();
HashMap<String, String> ea = new HashMap<String, String>();
ea.put("query", "SELECT * FROM A");
ea.put("transformer", T3.class.getName());
sep.init(AbstractDataImportHandlerTest.getContext(null, vr, getDs(rows),
Context.FULL_DUMP, null, ea));
List<Map<String, Object>> rs = new ArrayList<Map<String, Object>>();
Map<String, Object> r = null;
while (true) {
r = sep.nextRow();
if (r == null)
break;
rs.add(r);
}
Assert.assertEquals(2, rs.size());
Assert.assertNotNull(rs.get(0).get("T3"));
}
@Test
public void tranformerList() {
SqlEntityProcessor sep = new SqlEntityProcessor();
List<Map<String, Object>> rows = getRows(2);
VariableResolverImpl vr = new VariableResolverImpl();
HashMap<String, String> ea = new HashMap<String, String>();
ea.put("query", "SELECT * FROM A");
ea.put("transformer", T2.class.getName());
sep.init(AbstractDataImportHandlerTest.getContext(null, vr, getDs(rows),
Context.FULL_DUMP, null, ea));
local.set(0);
Map<String, Object> r = null;
int count = 0;
while (true) {
r = sep.nextRow();
if (r == null)
break;
count++;
}
Assert.assertEquals(2, local.get());
Assert.assertEquals(4, count);
}
private List<Map<String, Object>> getRows(int count) {
List<Map<String, Object>> rows = new ArrayList<Map<String, Object>>();
for (int i = 0; i < count; i++) {
Map<String, Object> row = new HashMap<String, Object>();
row.put("id", i);
row.put("value", "The value is " + i);
rows.add(row);
}
return rows;
}
private static DataSource<Iterator<Map<String, Object>>> getDs(
final List<Map<String, Object>> rows) {
return new DataSource<Iterator<Map<String, Object>>>() {
public Iterator<Map<String, Object>> getData(String query) {
return rows.iterator();
}
public void init(Context context, Properties initProps) {
}
public void close() {
}
};
}
public static class T extends Transformer {
public Object transformRow(Map<String, Object> aRow, Context context) {
aRow.put("T", "Class T");
return aRow;
}
}
public static class T3 {
public Object transformRow(Map<String, Object> aRow) {
aRow.put("T3", "T3 class");
return aRow;
}
}
public static class T2 extends Transformer {
public Object transformRow(Map<String, Object> aRow, Context context) {
Integer count = local.get();
local.set(count + 1);
List<Map<String, Object>> l = new ArrayList<Map<String, Object>>();
l.add(aRow);
l.add(aRow);
return l;
}
}
}

View File

@ -0,0 +1,105 @@
/**
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.solr.handler.dataimport;
import org.junit.Test;
import java.util.ArrayList;
import java.util.List;
/**
* <p>
* Test for SqlEntityProcessor which checks full and delta imports using the
* test harness
* </p>
*
* @version $Id$
* @since solr 1.3
*/
public class TestSqlEntityProcessor2 extends AbstractDataImportHandlerTest {
@Override
public String getSchemaFile() {
return "dataimport-schema.xml";
}
@Override
public String getSolrConfigFile() {
return "dataimport-solrconfig.xml";
}
@Override
public void setUp() throws Exception {
super.setUp();
}
@Override
public void tearDown() throws Exception {
super.tearDown();
}
@Test
@SuppressWarnings("unchecked")
public void testCompositePk_FullImport() throws Exception {
List parentRow = new ArrayList();
parentRow.add(createMap("id", "1"));
MockDataSource.setIterator("select * from x", parentRow.iterator());
List childRow = new ArrayList();
childRow.add(createMap("desc", "hello"));
MockDataSource.setIterator("select * from y where y.A=1", childRow
.iterator());
super.runFullImport(dataConfig);
assertQ(req("id:1"), "//*[@numFound='1']");
assertQ(req("desc:hello"), "//*[@numFound='1']");
}
@Test
@SuppressWarnings("unchecked")
public void testCompositePk_DeltaImport() throws Exception {
List deltaRow = new ArrayList();
deltaRow.add(createMap("id", "5"));
MockDataSource.setIterator("select id from x where last_modified > NOW",
deltaRow.iterator());
List parentRow = new ArrayList();
parentRow.add(createMap("id", "5"));
MockDataSource.setIterator("select * from x where x.id = '5'", parentRow
.iterator());
List childRow = new ArrayList();
childRow.add(createMap("desc", "hello"));
MockDataSource.setIterator("select * from y where y.A=5", childRow
.iterator());
super.runDeltaImport(dataConfig);
assertQ(req("id:5"), "//*[@numFound='1']");
assertQ(req("desc:hello"), "//*[@numFound='1']");
}
private static String dataConfig = "<dataConfig>\n"
+ " <document>\n"
+ " <entity name=\"x\" pk=\"x.id\" query=\"select * from x\" deltaQuery=\"select id from x where last_modified > NOW\">\n"
+ " <field column=\"id\" />\n"
+ " <entity name=\"y\" query=\"select * from y where y.A=${x.id}\">\n"
+ " <field column=\"desc\" />\n"
+ " </entity>\n" + " </entity>\n"
+ " </document>\n" + "</dataConfig>\n";
}

View File

@ -0,0 +1,55 @@
/**
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.solr.handler.dataimport;
import org.junit.Assert;
import org.junit.Test;
import java.util.HashMap;
import java.util.Map;
import java.util.Properties;
import java.util.regex.Pattern;
/**
* <p>
* Test for TemplateString
* </p>
*
* @version $Id$
* @since solr 1.3
*/
public class TestTemplateString {
@Test
public void testSimple() {
VariableResolverImpl vri = new VariableResolverImpl();
Map<String, Object> ns = new HashMap<String, Object>();
ns.put("last_index_time", Long.valueOf(1199429363730l));
vri.addNamespace("indexer", ns);
Assert
.assertEquals(
"select id from subject where last_modified > 1199429363730",
new TemplateString()
.replaceTokens(
"select id from subject where last_modified > ${indexer.last_index_time}",
vri));
}
private static Properties EMPTY_PROPS = new Properties();
private static Pattern SELECT_WHERE_PATTERN = Pattern.compile(
"^\\s*(select\\b.*?\\b)(where).*", Pattern.CASE_INSENSITIVE);
}

View File

@ -0,0 +1,60 @@
/**
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.solr.handler.dataimport;
import org.junit.Assert;
import org.junit.Test;
import java.util.ArrayList;
import java.util.List;
import java.util.Map;
/**
* <p>
* Test for TemplateTransformer
* </p>
*
* @version $Id$
* @since solr 1.3
*/
public class TestTemplateTransformer {
@Test
@SuppressWarnings("unchecked")
public void testTransformRow() {
List fields = new ArrayList();
fields.add(AbstractDataImportHandlerTest.createMap("column", "firstName"));
fields.add(AbstractDataImportHandlerTest.createMap("column", "lastName"));
fields.add(AbstractDataImportHandlerTest.createMap("column", "middleName"));
fields.add(AbstractDataImportHandlerTest.createMap("column", "name",
TemplateTransformer.TEMPLATE,
"${e.lastName}, ${e.firstName} ${e.middleName}"));
Map row = AbstractDataImportHandlerTest.createMap("firstName", "Shalin",
"middleName", "Shekhar", "lastName", "Mangar");
VariableResolverImpl resolver = new VariableResolverImpl();
Map<String, String> entityAttrs = AbstractDataImportHandlerTest.createMap(
"name", "e");
Context context = AbstractDataImportHandlerTest.getContext(null, resolver,
null, 0, fields, entityAttrs);
new TemplateTransformer().transformRow(row, context);
Assert.assertEquals("Mangar, Shalin Shekhar", row.get("name"));
}
}

View File

@ -0,0 +1,139 @@
/**
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.solr.handler.dataimport;
import org.junit.Assert;
import org.junit.Test;
import java.text.SimpleDateFormat;
import java.util.Date;
import java.util.HashMap;
import java.util.Map;
/**
* <p>
* Test for VariableResolver
* </p>
*
* @version $Id$
* @since solr 1.3
*/
public class TestVariableResolver {
@Test
public void testSimpleNamespace() {
VariableResolverImpl vri = new VariableResolverImpl();
Map<String, Object> ns = new HashMap<String, Object>();
ns.put("world", "WORLD");
vri.addNamespace("hello", ns);
Assert.assertEquals("WORLD", vri.resolve("hello.world"));
}
@Test
public void testNestedNamespace() {
VariableResolverImpl vri = new VariableResolverImpl();
Map<String, Object> ns = new HashMap<String, Object>();
ns.put("world", "WORLD");
vri.addNamespace("hello", ns);
ns = new HashMap<String, Object>();
ns.put("world1", "WORLD1");
vri.addNamespace("hello.my", ns);
Assert.assertEquals("WORLD1", vri.resolve("hello.my.world1"));
}
@Test
public void test3LevelNestedNamespace() {
VariableResolverImpl vri = new VariableResolverImpl();
Map<String, Object> ns = new HashMap<String, Object>();
ns.put("world", "WORLD");
vri.addNamespace("hello", ns);
ns = new HashMap<String, Object>();
ns.put("world1", "WORLD1");
vri.addNamespace("hello.my.new", ns);
Assert.assertEquals("WORLD1", vri.resolve("hello.my.new.world1"));
}
@Test
public void dateNamespaceWithValue() {
VariableResolverImpl vri = new VariableResolverImpl();
HashMap<String, Evaluator> evaluators = new HashMap<String, Evaluator>();
evaluators.put("formatDate", EvaluatorBag.getDateFormatEvaluator());
vri.addNamespace("dataimporter.functions", EvaluatorBag
.getFunctionsNamespace(vri, evaluators));
Map<String, Object> ns = new HashMap<String, Object>();
Date d = new Date();
ns.put("dt", d);
vri.addNamespace("A", ns);
Assert
.assertEquals(
new SimpleDateFormat("yyyy-MM-dd HH:mm:ss").format(d),
vri
.replaceTokens("${dataimporter.functions.formatDate(A.dt,yyyy-MM-dd HH:mm:ss)}"));
}
@Test
public void dateNamespaceWithExpr() {
VariableResolverImpl vri = new VariableResolverImpl();
HashMap<String, Evaluator> evaluators = new HashMap<String, Evaluator>();
evaluators.put("formatDate", EvaluatorBag.getDateFormatEvaluator());
vri.addNamespace("dataimporter.functions", EvaluatorBag
.getFunctionsNamespace(vri, evaluators));
String s = vri
.replaceTokens("${dataimporter.functions.formatDate('NOW',yyyy-MM-dd HH:mm)}");
Assert.assertEquals(new SimpleDateFormat("yyyy-MM-dd HH:mm")
.format(new Date()), s);
}
@Test
public void testDefaultNamespace() {
VariableResolverImpl vri = new VariableResolverImpl();
Map<String, Object> ns = new HashMap<String, Object>();
ns.put("world", "WORLD");
vri.addNamespace(null, ns);
Assert.assertEquals("WORLD", vri.resolve("world"));
}
@Test
public void testDefaultNamespace1() {
VariableResolverImpl vri = new VariableResolverImpl();
Map<String, Object> ns = new HashMap<String, Object>();
ns.put("world", "WORLD");
vri.addNamespace(null, ns);
Assert.assertEquals("WORLD", vri.resolve("world"));
}
@Test
public void testFunctionNamespace1() {
final VariableResolverImpl resolver = new VariableResolverImpl();
final Map<String, Evaluator> evaluators = new HashMap<String, Evaluator>();
evaluators.put("formatDate", EvaluatorBag.getDateFormatEvaluator());
evaluators.put("test", new Evaluator() {
public String evaluate(VariableResolver resolver, String expression) {
return "Hello World";
}
});
resolver.addNamespace("dataimporter.functions", EvaluatorBag
.getFunctionsNamespace(resolver, evaluators));
String s = resolver
.replaceTokens("${dataimporter.functions.formatDate('NOW',yyyy-MM-dd HH:mm)}");
Assert.assertEquals(new SimpleDateFormat("yyyy-MM-dd HH:mm")
.format(new Date()), s);
Assert.assertEquals("Hello World", resolver
.replaceTokens("${dataimporter.functions.test('TEST')}"));
}
}

View File

@ -0,0 +1,160 @@
/**
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.solr.handler.dataimport;
import static org.apache.solr.handler.dataimport.AbstractDataImportHandlerTest.createMap;
import org.junit.Assert;
import org.junit.Test;
import java.io.File;
import java.io.Reader;
import java.io.StringReader;
import java.util.ArrayList;
import java.util.List;
import java.util.Map;
import java.util.Properties;
/**
* <p>
* Test for XPathEntityProcessor
* </p>
*
* @version $Id$
* @since solr 1.3
*/
public class TestXPathEntityProcessor {
@Test
public void withFieldsAndXpath() throws Exception {
long time = System.currentTimeMillis();
File tmpdir = new File("." + time);
tmpdir.mkdir();
tmpdir.deleteOnExit();
TestFileListEntityProcessor.createFile(tmpdir, "x.xsl", xsl.getBytes(),
false);
Map entityAttrs = createMap("name", "e", "url", "cd.xml",
XPathEntityProcessor.FOR_EACH, "/catalog/cd");
List fields = new ArrayList();
fields.add(createMap("column", "title", "xpath", "/catalog/cd/title"));
fields.add(createMap("column", "artist", "xpath", "/catalog/cd/artist"));
fields.add(createMap("column", "year", "xpath", "/catalog/cd/year"));
Context c = AbstractDataImportHandlerTest.getContext(null,
new VariableResolverImpl(), getds(), 0, fields, entityAttrs);
XPathEntityProcessor xPathEntityProcessor = new XPathEntityProcessor();
xPathEntityProcessor.init(c);
List<Map<String, Object>> result = new ArrayList<Map<String, Object>>();
while (true) {
Map<String, Object> row = xPathEntityProcessor.nextRow();
if (row == null)
break;
result.add(row);
}
Assert.assertEquals(3, result.size());
Assert.assertEquals("Empire Burlesque", result.get(0).get("title"));
Assert.assertEquals("Bonnie Tyler", result.get(1).get("artist"));
Assert.assertEquals("1982", result.get(2).get("year"));
}
@Test
public void withDefaultSolrAndXsl() throws Exception {
long time = System.currentTimeMillis();
File tmpdir = new File("." + time);
tmpdir.mkdir();
tmpdir.deleteOnExit();
TestFileListEntityProcessor.createFile(tmpdir, "x.xsl", xsl.getBytes(),
false);
Map entityAttrs = createMap("name", "e",
XPathEntityProcessor.USE_SOLR_ADD_SCHEMA, "true", "xsl", ""
+ new File(tmpdir, "x.xsl").getAbsolutePath(), "url", "cd.xml");
Context c = AbstractDataImportHandlerTest.getContext(null,
new VariableResolverImpl(), getds(), 0, null, entityAttrs);
XPathEntityProcessor xPathEntityProcessor = new XPathEntityProcessor();
xPathEntityProcessor.init(c);
List<Map<String, Object>> result = new ArrayList<Map<String, Object>>();
while (true) {
Map<String, Object> row = xPathEntityProcessor.nextRow();
if (row == null)
break;
result.add(row);
}
Assert.assertEquals(3, result.size());
Assert.assertEquals("Empire Burlesque", result.get(0).get("title"));
Assert.assertEquals("Bonnie Tyler", result.get(1).get("artist"));
Assert.assertEquals("1982", result.get(2).get("year"));
}
private DataSource<Reader> getds() {
return new DataSource<Reader>() {
public void init(Context context, Properties initProps) {
}
public void close() {
}
public Reader getData(String query) {
return new StringReader(cdData);
}
};
}
private static final String xsl = "<?xml version=\"1.0\" encoding=\"UTF-8\"?>\n"
+ "<xsl:stylesheet version=\"1.0\"\n"
+ "xmlns:xsl=\"http://www.w3.org/1999/XSL/Transform\">\n"
+ "<xsl:output version='1.0' method='xml' encoding='UTF-8' indent='yes'/>\n"
+ "\n"
+ "<xsl:template match=\"/\">\n"
+ " <add> \n"
+ " <xsl:for-each select=\"catalog/cd\">\n"
+ " <doc>\n"
+ " <field name=\"title\"><xsl:value-of select=\"title\"/></field>\n"
+ " <field name=\"artist\"><xsl:value-of select=\"artist\"/></field>\n"
+ " <field name=\"country\"><xsl:value-of select=\"country\"/></field>\n"
+ " <field name=\"company\"><xsl:value-of select=\"company\"/></field> \n"
+ " <field name=\"price\"><xsl:value-of select=\"price\"/></field>\n"
+ " <field name=\"year\"><xsl:value-of select=\"year\"/></field> \n"
+ " </doc>\n"
+ " </xsl:for-each>\n"
+ " </add> \n"
+ "</xsl:template>\n" + "</xsl:stylesheet>";
private static final String cdData = "<?xml version=\"1.0\" encoding=\"UTF-8\"?>\n"
+ "<?xml-stylesheet type=\"text/xsl\" href=\"solr.xsl\"?>\n"
+ "<catalog>\n"
+ "\t<cd>\n"
+ "\t\t<title>Empire Burlesque</title>\n"
+ "\t\t<artist>Bob Dylan</artist>\n"
+ "\t\t<country>USA</country>\n"
+ "\t\t<company>Columbia</company>\n"
+ "\t\t<price>10.90</price>\n"
+ "\t\t<year>1985</year>\n"
+ "\t</cd>\n"
+ "\t<cd>\n"
+ "\t\t<title>Hide your heart</title>\n"
+ "\t\t<artist>Bonnie Tyler</artist>\n"
+ "\t\t<country>UK</country>\n"
+ "\t\t<company>CBS Records</company>\n"
+ "\t\t<price>9.90</price>\n"
+ "\t\t<year>1988</year>\n"
+ "\t</cd>\n"
+ "\t<cd>\n"
+ "\t\t<title>Greatest Hits</title>\n"
+ "\t\t<artist>Dolly Parton</artist>\n"
+ "\t\t<country>USA</country>\n"
+ "\t\t<company>RCA</company>\n"
+ "\t\t<price>9.90</price>\n"
+ "\t\t<year>1982</year>\n" + "\t</cd>\n" + "</catalog>\t";
}

View File

@ -0,0 +1,220 @@
/**
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed onT an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.solr.handler.dataimport;
import org.junit.Assert;
import org.junit.Test;
import java.io.StringReader;
import java.util.ArrayList;
import java.util.List;
import java.util.Map;
/**
* <p>
* Test for XPathRecordReader
* </p>
*
* @version $Id$
* @since solr 1.3
*/
public class TestXPathRecordReader {
@Test
public void basic() {
String xml = "<root>\n" + " <b>\n" + " <c>Hello C1</c>\n"
+ " <c>Hello C1</c>\n" + " </b>\n" + " <b>\n"
+ " <c>Hello C2</c>\n" + " </b>\n" + "</root>";
XPathRecordReader rr = new XPathRecordReader("/root/b");
rr.addField("c", "/root/b/c", true);
List<Map<String, Object>> l = rr.getAllRecords(new StringReader(xml));
Assert.assertEquals(2, l.size());
Assert.assertEquals(2, ((List) l.get(0).get("c")).size());
Assert.assertEquals(1, ((List) l.get(1).get("c")).size());
}
@Test
public void attributes() {
String xml = "<root>\n" + " <b a=\"x0\" b=\"y0\" />\n"
+ " <b a=\"x1\" b=\"y1\" />\n" + " <b a=\"x2\" b=\"y2\" />\n"
+ "</root>";
XPathRecordReader rr = new XPathRecordReader("/root/b");
rr.addField("a", "/root/b/@a", false);
rr.addField("b", "/root/b/@b", false);
List<Map<String, Object>> l = rr.getAllRecords(new StringReader(xml));
Assert.assertEquals(3, l.size());
Assert.assertEquals("x0", l.get(0).get("a"));
Assert.assertEquals("y1", l.get(1).get("b"));
}
@Test
public void attributes2Level() {
String xml = "<root>\n" + "<a>\n" + " <b a=\"x0\" b=\"y0\" />\n"
+ " <b a=\"x1\" b=\"y1\" />\n" + " <b a=\"x2\" b=\"y2\" />\n"
+ "</a>" + "</root>";
XPathRecordReader rr = new XPathRecordReader("/root/a/b");
rr.addField("a", "/root/a/b/@a", false);
rr.addField("b", "/root/a/b/@b", false);
List<Map<String, Object>> l = rr.getAllRecords(new StringReader(xml));
Assert.assertEquals(3, l.size());
Assert.assertEquals("x0", l.get(0).get("a"));
Assert.assertEquals("y1", l.get(1).get("b"));
}
@Test
public void attributes2LevelHetero() {
String xml = "<root>\n" + "<a>\n" + " <b a=\"x0\" b=\"y0\" />\n"
+ " <b a=\"x1\" b=\"y1\" />\n" + " <b a=\"x2\" b=\"y2\" />\n"
+ "</a>" + "<x>\n" + " <b a=\"x4\" b=\"y4\" />\n"
+ " <b a=\"x5\" b=\"y5\" />\n" + " <b a=\"x6\" b=\"y6\" />\n"
+ "</x>" + "</root>";
XPathRecordReader rr = new XPathRecordReader("/root/a | /root/x");
rr.addField("a", "/root/a/b/@a", false);
rr.addField("b", "/root/a/b/@b", false);
rr.addField("a", "/root/x/b/@a", false);
rr.addField("b", "/root/x/b/@b", false);
final List<Map<String, Object>> a = new ArrayList<Map<String, Object>>();
final List<Map<String, Object>> x = new ArrayList<Map<String, Object>>();
rr.streamRecords(new StringReader(xml), new XPathRecordReader.Handler() {
public void handle(Map<String, Object> record, String xpath) {
if (record == null)
return;
if (xpath.equals("/root/a"))
a.add(record);
if (xpath.equals("/root/x"))
x.add(record);
}
});
Assert.assertEquals(1, a.size());
Assert.assertEquals(1, x.size());
}
@Test
public void attributes2LevelMissingAttrVal() {
String xml = "<root>\n" + "<a>\n" + " <b a=\"x0\" b=\"y0\" />\n"
+ " <b a=\"x1\" b=\"y1\" />\n" + "</a>" + "<a>\n"
+ " <b a=\"x3\" />\n" + " <b b=\"y4\" />\n" + "</a>" + "</root>";
XPathRecordReader rr = new XPathRecordReader("/root/a");
rr.addField("a", "/root/a/b/@a", true);
rr.addField("b", "/root/a/b/@b", true);
List<Map<String, Object>> l = rr.getAllRecords(new StringReader(xml));
Assert.assertEquals(2, l.size());
Assert.assertNull(((List) l.get(1).get("a")).get(1));
Assert.assertNull(((List) l.get(1).get("b")).get(0));
}
@Test
public void elems2LevelMissing() {
String xml = "<root>\n" + "\t<a>\n" + "\t <b>\n" + "\t <x>x0</x>\n"
+ "\t <y>y0</y>\n" + "\t </b>\n" + "\t <b>\n"
+ "\t \t<x>x1</x>\n" + "\t \t<y>y1</y>\n" + "\t </b>\n"
+ "\t</a>\n" + "\t<a>\n" + "\t <b>\n" + "\t <x>x3</x>\n"
+ "\t </b>\n" + "\t <b>\n" + "\t \t<y>y4</y>\n" + "\t </b>\n"
+ "\t</a>\n" + "</root>";
XPathRecordReader rr = new XPathRecordReader("/root/a");
rr.addField("a", "/root/a/b/x", true);
rr.addField("b", "/root/a/b/y", true);
List<Map<String, Object>> l = rr.getAllRecords(new StringReader(xml));
Assert.assertEquals(2, l.size());
Assert.assertNull(((List) l.get(1).get("a")).get(1));
Assert.assertNull(((List) l.get(1).get("b")).get(0));
}
@Test
public void elems2LevelWithAttrib() {
String xml = "<root>\n" + "\t<a>\n" + "\t <b k=\"x\">\n"
+ "\t <x>x0</x>\n" + "\t <y>y0</y>\n" + "\t </b>\n"
+ "\t <b k=\"y\">\n" + "\t \t<x>x1</x>\n" + "\t \t<y>y1</y>\n"
+ "\t </b>\n" + "\t</a>\n" + "\t<a>\n" + "\t <b>\n"
+ "\t <x>x3</x>\n" + "\t </b>\n" + "\t <b>\n"
+ "\t \t<y>y4</y>\n" + "\t </b>\n" + "\t</a>\n" + "</root>";
XPathRecordReader rr = new XPathRecordReader("/root/a");
rr.addField("x", "/root/a/b[@k]/x", true);
rr.addField("y", "/root/a/b[@k]/y", true);
List<Map<String, Object>> l = rr.getAllRecords(new StringReader(xml));
Assert.assertEquals(2, l.size());
Assert.assertEquals(2, ((List) l.get(0).get("x")).size());
Assert.assertEquals(2, ((List) l.get(0).get("y")).size());
Assert.assertEquals(0, l.get(1).size());
}
@Test
public void elems2LevelWithAttribMultiple() {
String xml = "<root>\n" + "\t<a>\n" + "\t <b k=\"x\" m=\"n\" >\n"
+ "\t <x>x0</x>\n" + "\t <y>y0</y>\n" + "\t </b>\n"
+ "\t <b k=\"y\" m=\"p\">\n" + "\t \t<x>x1</x>\n"
+ "\t \t<y>y1</y>\n" + "\t </b>\n" + "\t</a>\n" + "\t<a>\n"
+ "\t <b k=\"x\">\n" + "\t <x>x3</x>\n" + "\t </b>\n"
+ "\t <b m=\"n\">\n" + "\t \t<y>y4</y>\n" + "\t </b>\n"
+ "\t</a>\n" + "</root>";
XPathRecordReader rr = new XPathRecordReader("/root/a");
rr.addField("x", "/root/a/b[@k][@m='n']/x", true);
rr.addField("y", "/root/a/b[@k][@m='n']/y", true);
List<Map<String, Object>> l = rr.getAllRecords(new StringReader(xml));
Assert.assertEquals(2, l.size());
Assert.assertEquals(1, ((List) l.get(0).get("x")).size());
Assert.assertEquals(1, ((List) l.get(0).get("y")).size());
Assert.assertEquals(0, l.get(1).size());
}
@Test
public void elems2LevelWithAttribVal() {
String xml = "<root>\n" + "\t<a>\n" + "\t <b k=\"x\">\n"
+ "\t <x>x0</x>\n" + "\t <y>y0</y>\n" + "\t </b>\n"
+ "\t <b k=\"y\">\n" + "\t \t<x>x1</x>\n" + "\t \t<y>y1</y>\n"
+ "\t </b>\n" + "\t</a>\n" + "\t<a>\n" + "\t <b>\n"
+ "\t <x>x3</x>\n" + "\t </b>\n" + "\t <b>\n"
+ "\t \t<y>y4</y>\n" + "\t </b>\n" + "\t</a>\n" + "</root>";
XPathRecordReader rr = new XPathRecordReader("/root/a");
rr.addField("x", "/root/a/b[@k='x']/x", true);
rr.addField("y", "/root/a/b[@k='x']/y", true);
List<Map<String, Object>> l = rr.getAllRecords(new StringReader(xml));
Assert.assertEquals(2, l.size());
Assert.assertEquals(1, ((List) l.get(0).get("x")).size());
Assert.assertEquals(1, ((List) l.get(0).get("y")).size());
Assert.assertEquals(0, l.get(1).size());
}
@Test
public void another() {
String xml = "<root>\n"
+ " <contenido id=\"10097\" idioma=\"cat\">\n"
+ " <antetitulo></antetitulo>\n" + " <titulo>\n"
+ " This is my title\n" + " </titulo>\n"
+ " <resumen>\n" + " This is my summary\n"
+ " </resumen>\n" + " <texto>\n"
+ " This is the body of my text\n" + " </texto>\n"
+ " </contenido>\n" + "</root>";
XPathRecordReader rr = new XPathRecordReader("/root/contenido");
rr.addField("id", "/root/contenido/@id", false);
rr.addField("title", "/root/contenido/titulo", false);
rr.addField("resume", "/root/contenido/resumen", false);
rr.addField("text", "/root/contenido/texto", false);
List<Map<String, Object>> l = rr.getAllRecords(new StringReader(xml));
Assert.assertEquals(1, l.size());
Map<String, Object> m = l.get(0);
Assert.assertEquals("10097", m.get("id").toString().trim());
Assert.assertEquals("This is my title", m.get("title").toString().trim());
Assert
.assertEquals("This is my summary", m.get("resume").toString().trim());
Assert.assertEquals("This is the body of my text", m.get("text").toString()
.trim());
}
}

View File

@ -0,0 +1,2 @@
#Thu Jul 31 00:57:19 IST 2008
last_index_time=2008-07-31 00\:57\:19

View File

@ -0,0 +1,9 @@
<dataConfig>
<dataSource type="MockDataSource" />
<document>
<entity name="x" query="select * from x">
<field column="id" />
<field column="desc" />
</entity>
</document>
</dataConfig>

View File

@ -0,0 +1,404 @@
<?xml version="1.0" encoding="UTF-8" ?>
<!--
Licensed to the Apache Software Foundation (ASF) under one or more
contributor license agreements. See the NOTICE file distributed with
this work for additional information regarding copyright ownership.
The ASF licenses this file to You under the Apache License, Version 2.0
(the "License"); you may not use this file except in compliance with
the License. You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
-->
<config>
<!-- Set this to 'false' if you want solr to continue working after it has
encountered an severe configuration error. In a production environment,
you may want solr to keep working even if one handler is mis-configured.
You may also set this to false using by setting the system property:
-Dsolr.abortOnConfigurationError=false
-->
<abortOnConfigurationError>${solr.abortOnConfigurationError:true}</abortOnConfigurationError>
<!-- Used to specify an alternate directory to hold all index data
other than the default ./data under the Solr home.
If replication is in use, this should match the replication configuration. -->
<dataDir>${solr.data.dir:./solr/data}</dataDir>
<indexDefaults>
<!-- Values here affect all index writers and act as a default unless overridden. -->
<useCompoundFile>false</useCompoundFile>
<mergeFactor>10</mergeFactor>
<!--
If both ramBufferSizeMB and maxBufferedDocs is set, then Lucene will flush based on whichever limit is hit first.
-->
<!--<maxBufferedDocs>1000</maxBufferedDocs>-->
<!-- Tell Lucene when to flush documents to disk.
Giving Lucene more memory for indexing means faster indexing at the cost of more RAM
If both ramBufferSizeMB and maxBufferedDocs is set, then Lucene will flush based on whichever limit is hit first.
-->
<ramBufferSizeMB>32</ramBufferSizeMB>
<maxMergeDocs>2147483647</maxMergeDocs>
<maxFieldLength>10000</maxFieldLength>
<writeLockTimeout>1000</writeLockTimeout>
<commitLockTimeout>10000</commitLockTimeout>
<!--
Expert: Turn on Lucene's auto commit capability.
TODO: Add recommendations on why you would want to do this.
NOTE: Despite the name, this value does not have any relation to Solr's autoCommit functionality
-->
<!--<luceneAutoCommit>false</luceneAutoCommit>-->
<!--
Expert:
The Merge Policy in Lucene controls how merging is handled by Lucene. The default in 2.3 is the LogByteSizeMergePolicy, previous
versions used LogDocMergePolicy.
LogByteSizeMergePolicy chooses segments to merge based on their size. The Lucene 2.2 default, LogDocMergePolicy chose when
to merge based on number of documents
Other implementations of MergePolicy must have a no-argument constructor
-->
<!--<mergePolicy>org.apache.lucene.index.LogByteSizeMergePolicy</mergePolicy>-->
<!--
Expert:
The Merge Scheduler in Lucene controls how merges are performed. The ConcurrentMergeScheduler (Lucene 2.3 default)
can perform merges in the background using separate threads. The SerialMergeScheduler (Lucene 2.2 default) does not.
-->
<!--<mergeScheduler>org.apache.lucene.index.ConcurrentMergeScheduler</mergeScheduler>-->
<!--
As long as Solr is the only process modifying your index, it is
safe to use Lucene's in process locking mechanism. But you may
specify one of the other Lucene LockFactory implementations in
the event that you have a custom situation.
none = NoLockFactory (typically only used with read only indexes)
single = SingleInstanceLockFactory (suggested)
native = NativeFSLockFactory
simple = SimpleFSLockFactory
('simple' is the default for backwards compatibility with Solr 1.2)
-->
<lockType>single</lockType>
</indexDefaults>
<mainIndex>
<!-- options specific to the main on-disk lucene index -->
<useCompoundFile>false</useCompoundFile>
<ramBufferSizeMB>32</ramBufferSizeMB>
<mergeFactor>10</mergeFactor>
<!-- Deprecated -->
<!--<maxBufferedDocs>1000</maxBufferedDocs>-->
<maxMergeDocs>2147483647</maxMergeDocs>
<maxFieldLength>10000</maxFieldLength>
<!-- If true, unlock any held write or commit locks on startup.
This defeats the locking mechanism that allows multiple
processes to safely access a lucene index, and should be
used with care.
This is not needed if lock type is 'none' or 'single'
-->
<unlockOnStartup>false</unlockOnStartup>
</mainIndex>
<!-- the default high-performance update handler -->
<updateHandler class="solr.DirectUpdateHandler2">
<!-- A prefix of "solr." for class names is an alias that
causes solr to search appropriate packages, including
org.apache.solr.(search|update|request|core|analysis)
-->
<!-- Limit the number of deletions Solr will buffer during doc updating.
Setting this lower can help bound memory use during indexing.
-->
<maxPendingDeletes>100000</maxPendingDeletes>
</updateHandler>
<query>
<!-- Maximum number of clauses in a boolean query... can affect
range or prefix queries that expand to big boolean
queries. An exception is thrown if exceeded. -->
<maxBooleanClauses>1024</maxBooleanClauses>
<!-- Cache used by SolrIndexSearcher for filters (DocSets),
unordered sets of *all* documents that match a query.
When a new searcher is opened, its caches may be prepopulated
or "autowarmed" using data from caches in the old searcher.
autowarmCount is the number of items to prepopulate. For LRUCache,
the autowarmed items will be the most recently accessed items.
Parameters:
class - the SolrCache implementation (currently only LRUCache)
size - the maximum number of entries in the cache
initialSize - the initial capacity (number of entries) of
the cache. (seel java.util.HashMap)
autowarmCount - the number of entries to prepopulate from
and old cache.
-->
<filterCache
class="solr.LRUCache"
size="512"
initialSize="512"
autowarmCount="256"/>
<!-- queryResultCache caches results of searches - ordered lists of
document ids (DocList) based on a query, a sort, and the range
of documents requested. -->
<queryResultCache
class="solr.LRUCache"
size="512"
initialSize="512"
autowarmCount="256"/>
<!-- documentCache caches Lucene Document objects (the stored fields for each document).
Since Lucene internal document ids are transient, this cache will not be autowarmed. -->
<documentCache
class="solr.LRUCache"
size="512"
initialSize="512"
autowarmCount="0"/>
<!-- If true, stored fields that are not requested will be loaded lazily.
This can result in a significant speed improvement if the usual case is to
not load all stored fields, especially if the skipped fields are large compressed
text fields.
-->
<enableLazyFieldLoading>true</enableLazyFieldLoading>
<!-- Example of a generic cache. These caches may be accessed by name
through SolrIndexSearcher.getCache(),cacheLookup(), and cacheInsert().
The purpose is to enable easy caching of user/application level data.
The regenerator argument should be specified as an implementation
of solr.search.CacheRegenerator if autowarming is desired. -->
<!--
<cache name="myUserCache"
class="solr.LRUCache"
size="4096"
initialSize="1024"
autowarmCount="1024"
regenerator="org.mycompany.mypackage.MyRegenerator"
/>
-->
<!-- An optimization that attempts to use a filter to satisfy a search.
If the requested sort does not include score, then the filterCache
will be checked for a filter matching the query. If found, the filter
will be used as the source of document ids, and then the sort will be
applied to that.
<useFilterForSortedQuery>true</useFilterForSortedQuery>
-->
<!-- An optimization for use with the queryResultCache. When a search
is requested, a superset of the requested number of document ids
are collected. For example, if a search for a particular query
requests matching documents 10 through 19, and queryWindowSize is 50,
then documents 0 through 49 will be collected and cached. Any further
requests in that range can be satisfied via the cache. -->
<queryResultWindowSize>50</queryResultWindowSize>
<!-- Maximum number of documents to cache for any entry in the
queryResultCache. -->
<queryResultMaxDocsCached>200</queryResultMaxDocsCached>
<!-- This entry enables an int hash representation for filters (DocSets)
when the number of items in the set is less than maxSize. For smaller
sets, this representation is more memory efficient, more efficient to
iterate over, and faster to take intersections. -->
<HashDocSet maxSize="3000" loadFactor="0.75"/>
<!-- a newSearcher event is fired whenever a new searcher is being prepared
and there is a current searcher handling requests (aka registered). -->
<!-- QuerySenderListener takes an array of NamedList and executes a
local query request for each NamedList in sequence. -->
<listener event="newSearcher" class="solr.QuerySenderListener">
<arr name="queries">
<lst> <str name="q">solr</str> <str name="start">0</str> <str name="rows">10</str> </lst>
<lst> <str name="q">rocks</str> <str name="start">0</str> <str name="rows">10</str> </lst>
<lst><str name="q">static newSearcher warming query from solrconfig.xml</str></lst>
</arr>
</listener>
<!-- a firstSearcher event is fired whenever a new searcher is being
prepared but there is no current registered searcher to handle
requests or to gain autowarming data from. -->
<listener event="firstSearcher" class="solr.QuerySenderListener">
<arr name="queries">
</arr>
</listener>
<!-- If a search request comes in and there is no current registered searcher,
then immediately register the still warming searcher and use it. If
"false" then all requests will block until the first searcher is done
warming. -->
<useColdSearcher>false</useColdSearcher>
<!-- Maximum number of searchers that may be warming in the background
concurrently. An error is returned if this limit is exceeded. Recommend
1-2 for read-only slaves, higher for masters w/o cache warming. -->
<maxWarmingSearchers>4</maxWarmingSearchers>
</query>
<!--
Let the dispatch filter handler /select?qt=XXX
handleSelect=true will use consistent error handling for /select and /update
handleSelect=false will use solr1.1 style error formatting
-->
<requestDispatcher handleSelect="true" >
<!--Make sure your system has some authentication before enabling remote streaming! -->
<requestParsers enableRemoteStreaming="false" multipartUploadLimitInKB="2048" />
<!-- Set HTTP caching related parameters (for proxy caches and clients).
To get the behaviour of Solr 1.2 (ie: no caching related headers)
use the never304="true" option and do not specify a value for
<cacheControl>
-->
<httpCaching never304="true">
<!--httpCaching lastModifiedFrom="openTime"
etagSeed="Solr"-->
<!-- lastModFrom="openTime" is the default, the Last-Modified value
(and validation against If-Modified-Since requests) will all be
relative to when the current Searcher was opened.
You can change it to lastModFrom="dirLastMod" if you want the
value to exactly corrispond to when the physical index was last
modified.
etagSeed="..." is an option you can change to force the ETag
header (and validation against If-None-Match requests) to be
differnet even if the index has not changed (ie: when making
significant changes to your config file)
lastModifiedFrom and etagSeed are both ignored if you use the
never304="true" option.
-->
<!-- If you include a <cacheControl> directive, it will be used to
generate a Cache-Control header, as well as an Expires header
if the value contains "max-age="
By default, no Cache-Control header is generated.
You can use the <cacheControl> option even if you have set
never304="true"
-->
<!-- <cacheControl>max-age=30, public</cacheControl> -->
</httpCaching>
</requestDispatcher>
<!-- requestHandler plugins... incoming queries will be dispatched to the
correct handler based on the path or the qt (query type) param.
Names starting with a '/' are accessed with the a path equal to the
registered name. Names without a leading '/' are accessed with:
http://host/app/select?qt=name
If no qt is defined, the requestHandler that declares default="true"
will be used.
-->
<requestHandler name="standard" class="solr.StandardRequestHandler" default="true">
<!-- default values for query parameters -->
<lst name="defaults">
<str name="echoParams">explicit</str>
<!--
<int name="rows">10</int>
<str name="fl">*</str>
<str name="version">2.1</str>
-->
</lst>
</requestHandler>
<requestHandler name="/dataimport" class="org.apache.solr.handler.dataimport.DataImportHandler">
</requestHandler>
<!--
Search components are registered to SolrCore and used by Search Handlers
By default, the following components are avaliable:
<searchComponent name="query" class="org.apache.solr.handler.component.QueryComponent" />
<searchComponent name="facet" class="org.apache.solr.handler.component.FacetComponent" />
<searchComponent name="mlt" class="org.apache.solr.handler.component.MoreLikeThisComponent" />
<searchComponent name="highlight" class="org.apache.solr.handler.component.HighlightComponent" />
<searchComponent name="debug" class="org.apache.solr.handler.component.DebugComponent" />
If you register a searchComponent to one of the standard names, that will be used instead.
-->
<requestHandler name="/search" class="org.apache.solr.handler.component.SearchHandler">
<lst name="defaults">
<str name="echoParams">explicit</str>
</lst>
<!--
By default, this will register the following components:
<arr name="components">
<str>query</str>
<str>facet</str>
<str>mlt</str>
<str>highlight</str>
<str>debug</str>
</arr>
To insert handlers before or after the 'standard' components, use:
<arr name="first-components">
<str>first</str>
</arr>
<arr name="last-components">
<str>last</str>
</arr>
-->
</requestHandler>
<!-- Update request handler.
Note: Since solr1.1 requestHandlers requires a valid content type header if posted in
the body. For example, curl now requires: -H 'Content-type:text/xml; charset=utf-8'
The response format differs from solr1.1 formatting and returns a standard error code.
To enable solr1.1 behavior, remove the /update handler or change its path
"update.processor.class" is the class name for the UpdateRequestProcessor. It is initalized
only once. This can not be changed for each request.
-->
<requestHandler name="/update" class="solr.XmlUpdateRequestHandler" >
<!--
<str name="update.processor.class">org.apache.solr.handler.UpdateRequestProcessor</str>
-->
</requestHandler>
<!-- config for the admin interface -->
<admin>
<defaultQuery>*:*</defaultQuery>
<!-- configure a healthcheck file for servers behind a loadbalancer
<healthcheck type="file">server-enabled</healthcheck>
-->
</admin>
</config>

View File

@ -0,0 +1,304 @@
<?xml version="1.0" encoding="UTF-8" ?>
<!--
Licensed to the Apache Software Foundation (ASF) under one or more
contributor license agreements. See the NOTICE file distributed with
this work for additional information regarding copyright ownership.
The ASF licenses this file to You under the Apache License, Version 2.0
(the "License"); you may not use this file except in compliance with
the License. You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
-->
<!--
This is the Solr schema file. This file should be named "schema.xml" and
should be in the conf directory under the solr home
(i.e. ./solr/conf/schema.xml by default)
or located where the classloader for the Solr webapp can find it.
This example schema is the recommended starting point for users.
It should be kept correct and concise, usable out-of-the-box.
For more information, on how to customize this file, please see
http://wiki.apache.org/solr/SchemaXml
-->
<schema name="test" version="1.1">
<!-- attribute "name" is the name of this schema and is only used for display purposes.
Applications should change this to reflect the nature of the search collection.
version="1.1" is Solr's version number for the schema syntax and semantics. It should
not normally be changed by applications.
1.0: multiValued attribute did not exist, all fields are multiValued by nature
1.1: multiValued attribute introduced, false by default -->
<types>
<!-- field type definitions. The "name" attribute is
just a label to be used by field definitions. The "class"
attribute and any other attributes determine the real
behavior of the fieldType.
Class names starting with "solr" refer to java classes in the
org.apache.solr.analysis package.
-->
<!-- The StrField type is not analyzed, but indexed/stored verbatim.
- StrField and TextField support an optional compressThreshold which
limits compression (if enabled in the derived fields) to values which
exceed a certain size (in characters).
-->
<fieldType name="string" class="solr.StrField" sortMissingLast="true" omitNorms="true"/>
<!-- boolean type: "true" or "false" -->
<fieldType name="boolean" class="solr.BoolField" sortMissingLast="true" omitNorms="true"/>
<!-- The optional sortMissingLast and sortMissingFirst attributes are
currently supported on types that are sorted internally as strings.
- If sortMissingLast="true", then a sort on this field will cause documents
without the field to come after documents with the field,
regardless of the requested sort order (asc or desc).
- If sortMissingFirst="true", then a sort on this field will cause documents
without the field to come before documents with the field,
regardless of the requested sort order.
- If sortMissingLast="false" and sortMissingFirst="false" (the default),
then default lucene sorting will be used which places docs without the
field first in an ascending sort and last in a descending sort.
-->
<!-- numeric field types that store and index the text
value verbatim (and hence don't support range queries, since the
lexicographic ordering isn't equal to the numeric ordering) -->
<fieldType name="integer" class="solr.IntField" omitNorms="true"/>
<fieldType name="long" class="solr.LongField" omitNorms="true"/>
<fieldType name="float" class="solr.FloatField" omitNorms="true"/>
<fieldType name="double" class="solr.DoubleField" omitNorms="true"/>
<!-- Numeric field types that manipulate the value into
a string value that isn't human-readable in its internal form,
but with a lexicographic ordering the same as the numeric ordering,
so that range queries work correctly. -->
<fieldType name="sint" class="solr.SortableIntField" sortMissingLast="true" omitNorms="true"/>
<fieldType name="slong" class="solr.SortableLongField" sortMissingLast="true" omitNorms="true"/>
<fieldType name="sfloat" class="solr.SortableFloatField" sortMissingLast="true" omitNorms="true"/>
<fieldType name="sdouble" class="solr.SortableDoubleField" sortMissingLast="true" omitNorms="true"/>
<!-- The format for this date field is of the form 1995-12-31T23:59:59Z, and
is a more restricted form of the canonical representation of dateTime
http://www.w3.org/TR/xmlschema-2/#dateTime
The trailing "Z" designates UTC time and is mandatory.
Optional fractional seconds are allowed: 1995-12-31T23:59:59.999Z
All other components are mandatory.
Expressions can also be used to denote calculations that should be
performed relative to "NOW" to determine the value, ie...
NOW/HOUR
... Round to the start of the current hour
NOW-1DAY
... Exactly 1 day prior to now
NOW/DAY+6MONTHS+3DAYS
... 6 months and 3 days in the future from the start of
the current day
Consult the DateField javadocs for more information.
-->
<fieldType name="date" class="solr.DateField" sortMissingLast="true" omitNorms="true"/>
<!-- The "RandomSortField" is not used to store or search any
data. You can declare fields of this type it in your schema
to generate psuedo-random orderings of your docs for sorting
purposes. The ordering is generated based on the field name
and the version of the index, As long as the index version
remains unchanged, and the same field name is reused,
the ordering of the docs will be consistent.
If you want differend psuedo-random orderings of documents,
for the same version of the index, use a dynamicField and
change the name
-->
<fieldType name="random" class="solr.RandomSortField" indexed="true" />
<!-- solr.TextField allows the specification of custom text analyzers
specified as a tokenizer and a list of token filters. Different
analyzers may be specified for indexing and querying.
The optional positionIncrementGap puts space between multiple fields of
this type on the same document, with the purpose of preventing false phrase
matching across fields.
For more info on customizing your analyzer chain, please see
http://wiki.apache.org/solr/AnalyzersTokenizersTokenFilters
-->
<!-- One can also specify an existing Analyzer class that has a
default constructor via the class attribute on the analyzer element
<fieldType name="text_greek" class="solr.TextField">
<analyzer class="org.apache.lucene.analysis.el.GreekAnalyzer"/>
</fieldType>
-->
<!-- A text field that only splits on whitespace for exact matching of words -->
<fieldType name="text_ws" class="solr.TextField" positionIncrementGap="100">
<analyzer>
<tokenizer class="solr.WhitespaceTokenizerFactory"/>
</analyzer>
</fieldType>
<!-- A text field that uses WordDelimiterFilter to enable splitting and matching of
words on case-change, alpha numeric boundaries, and non-alphanumeric chars,
so that a query of "wifi" or "wi fi" could match a document containing "Wi-Fi".
Synonyms and stopwords are customized by external files, and stemming is enabled.
Duplicate tokens at the same position (which may result from Stemmed Synonyms or
WordDelim parts) are removed.
-->
<fieldType name="text" class="solr.TextField" positionIncrementGap="100">
<analyzer type="index">
<tokenizer class="solr.WhitespaceTokenizerFactory"/>
<!-- in this example, we will only use synonyms at query time
<filter class="solr.SynonymFilterFactory" synonyms="index_synonyms.txt" ignoreCase="true" expand="false"/>
-->
<filter class="solr.StopFilterFactory" ignoreCase="true" words="stopwords.txt"/>
<filter class="solr.WordDelimiterFilterFactory" generateWordParts="1" generateNumberParts="1" catenateWords="1" catenateNumbers="1" catenateAll="0" splitOnCaseChange="1"/>
<filter class="solr.LowerCaseFilterFactory"/>
<filter class="solr.EnglishPorterFilterFactory" protected="protwords.txt"/>
<filter class="solr.RemoveDuplicatesTokenFilterFactory"/>
</analyzer>
<analyzer type="query">
<tokenizer class="solr.WhitespaceTokenizerFactory"/>
<filter class="solr.SynonymFilterFactory" synonyms="synonyms.txt" ignoreCase="true" expand="true"/>
<filter class="solr.StopFilterFactory" ignoreCase="true" words="stopwords.txt"/>
<filter class="solr.WordDelimiterFilterFactory" generateWordParts="1" generateNumberParts="1" catenateWords="0" catenateNumbers="0" catenateAll="0" splitOnCaseChange="1"/>
<filter class="solr.LowerCaseFilterFactory"/>
<filter class="solr.EnglishPorterFilterFactory" protected="protwords.txt"/>
<filter class="solr.RemoveDuplicatesTokenFilterFactory"/>
</analyzer>
</fieldType>
<!-- Less flexible matching, but less false matches. Probably not ideal for product names,
but may be good for SKUs. Can insert dashes in the wrong place and still match. -->
<fieldType name="textTight" class="solr.TextField" positionIncrementGap="100" >
<analyzer>
<tokenizer class="solr.WhitespaceTokenizerFactory"/>
<filter class="solr.SynonymFilterFactory" synonyms="synonyms.txt" ignoreCase="true" expand="false"/>
<filter class="solr.StopFilterFactory" ignoreCase="true" words="stopwords.txt"/>
<filter class="solr.WordDelimiterFilterFactory" generateWordParts="0" generateNumberParts="0" catenateWords="1" catenateNumbers="1" catenateAll="0"/>
<filter class="solr.LowerCaseFilterFactory"/>
<filter class="solr.EnglishPorterFilterFactory" protected="protwords.txt"/>
<filter class="solr.RemoveDuplicatesTokenFilterFactory"/>
</analyzer>
</fieldType>
<!-- This is an example of using the KeywordTokenizer along
With various TokenFilterFactories to produce a sortable field
that does not include some properties of the source text
-->
<fieldType name="alphaOnlySort" class="solr.TextField" sortMissingLast="true" omitNorms="true">
<analyzer>
<!-- KeywordTokenizer does no actual tokenizing, so the entire
input string is preserved as a single token
-->
<tokenizer class="solr.KeywordTokenizerFactory"/>
<!-- The LowerCase TokenFilter does what you expect, which can be
when you want your sorting to be case insensitive
-->
<filter class="solr.LowerCaseFilterFactory" />
<!-- The TrimFilter removes any leading or trailing whitespace -->
<filter class="solr.TrimFilterFactory" />
<!-- The PatternReplaceFilter gives you the flexibility to use
Java Regular expression to replace any sequence of characters
matching a pattern with an arbitrary replacement string,
which may include back refrences to portions of the orriginal
string matched by the pattern.
See the Java Regular Expression documentation for more
infomation on pattern and replacement string syntax.
http://java.sun.com/j2se/1.5.0/docs/api/java/util/regex/package-summary.html
-->
<filter class="solr.PatternReplaceFilterFactory"
pattern="([^a-z])" replacement="" replace="all"
/>
</analyzer>
</fieldType>
<!-- since fields of this type are by default not stored or indexed, any data added to
them will be ignored outright
-->
<fieldtype name="ignored" stored="false" indexed="false" class="solr.StrField" />
</types>
<fields>
<!-- Valid attributes for fields:
name: mandatory - the name for the field
type: mandatory - the name of a previously defined type from the <types> section
indexed: true if this field should be indexed (searchable or sortable)
stored: true if this field should be retrievable
compressed: [false] if this field should be stored using gzip compression
(this will only apply if the field type is compressable; among
the standard field types, only TextField and StrField are)
multiValued: true if this field may contain multiple values per document
omitNorms: (expert) set to true to omit the norms associated with
this field (this disables length normalization and index-time
boosting for the field, and saves some memory). Only full-text
fields or fields that need an index-time boost need norms.
termVectors: [false] set to true to store the term vector for a given field.
When using MoreLikeThis, fields used for similarity should be stored for
best performance.
-->
<field name="id" type="string" indexed="true" stored="true" required="true" />
<field name="desc" type="string" indexed="true" stored="true" multiValued="true" />
<field name="date" type="date" indexed="true" stored="true" />
<field name="timestamp" type="date" indexed="true" stored="true" default="NOW" multiValued="false"/>
<!-- Dynamic field definitions. If a field name is not found, dynamicFields
will be used if the name matches any of the patterns.
RESTRICTION: the glob-like pattern in the name attribute must have
a "*" only at the start or the end.
EXAMPLE: name="*_i" will match any field ending in _i (like myid_i, z_i)
Longer patterns will be matched first. if equal size patterns
both match, the first appearing in the schema will be used. -->
<dynamicField name="*_i" type="sint" indexed="true" stored="true"/>
<dynamicField name="*_s" type="string" indexed="true" stored="true"/>
<dynamicField name="*_l" type="slong" indexed="true" stored="true"/>
<dynamicField name="*_t" type="text" indexed="true" stored="true"/>
<dynamicField name="*_b" type="boolean" indexed="true" stored="true"/>
<dynamicField name="*_f" type="sfloat" indexed="true" stored="true"/>
<dynamicField name="*_d" type="sdouble" indexed="true" stored="true"/>
<dynamicField name="*_dt" type="date" indexed="true" stored="true"/>
<dynamicField name="random*" type="random" />
<!-- uncomment the following to ignore any fields that don't already match an existing
field name or dynamic field, rather than reporting them as an error.
alternately, change the type="ignored" to some other type e.g. "text" if you want
unknown fields indexed and/or stored by default -->
<!--dynamicField name="*" type="ignored" /-->
</fields>
<!-- Field to use to determine and enforce document uniqueness.
Unless this field is marked with required="false", it will be a required field
-->
<uniqueKey>id</uniqueKey>
<!-- field for the QueryParser to use when an explicit fieldname is absent -->
<defaultSearchField>desc</defaultSearchField>
<!-- SolrQueryParser configuration: defaultOperator="AND|OR" -->
<solrQueryParser defaultOperator="OR"/>
</schema>

View File

@ -0,0 +1,409 @@
<?xml version="1.0" encoding="UTF-8" ?>
<!--
Licensed to the Apache Software Foundation (ASF) under one or more
contributor license agreements. See the NOTICE file distributed with
this work for additional information regarding copyright ownership.
The ASF licenses this file to You under the Apache License, Version 2.0
(the "License"); you may not use this file except in compliance with
the License. You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
-->
<config>
<!-- Set this to 'false' if you want solr to continue working after it has
encountered an severe configuration error. In a production environment,
you may want solr to keep working even if one handler is mis-configured.
You may also set this to false using by setting the system property:
-Dsolr.abortOnConfigurationError=false
-->
<abortOnConfigurationError>${solr.abortOnConfigurationError:true}</abortOnConfigurationError>
<!-- Used to specify an alternate directory to hold all index data
other than the default ./data under the Solr home.
If replication is in use, this should match the replication configuration. -->
<dataDir>${solr.data.dir:./solr/data}</dataDir>
<indexDefaults>
<!-- Values here affect all index writers and act as a default unless overridden. -->
<useCompoundFile>false</useCompoundFile>
<mergeFactor>10</mergeFactor>
<!--
If both ramBufferSizeMB and maxBufferedDocs is set, then Lucene will flush based on whichever limit is hit first.
-->
<!--<maxBufferedDocs>1000</maxBufferedDocs>-->
<!-- Tell Lucene when to flush documents to disk.
Giving Lucene more memory for indexing means faster indexing at the cost of more RAM
If both ramBufferSizeMB and maxBufferedDocs is set, then Lucene will flush based on whichever limit is hit first.
-->
<ramBufferSizeMB>32</ramBufferSizeMB>
<maxMergeDocs>2147483647</maxMergeDocs>
<maxFieldLength>10000</maxFieldLength>
<writeLockTimeout>1000</writeLockTimeout>
<commitLockTimeout>10000</commitLockTimeout>
<!--
Expert: Turn on Lucene's auto commit capability.
TODO: Add recommendations on why you would want to do this.
NOTE: Despite the name, this value does not have any relation to Solr's autoCommit functionality
-->
<!--<luceneAutoCommit>false</luceneAutoCommit>-->
<!--
Expert:
The Merge Policy in Lucene controls how merging is handled by Lucene. The default in 2.3 is the LogByteSizeMergePolicy, previous
versions used LogDocMergePolicy.
LogByteSizeMergePolicy chooses segments to merge based on their size. The Lucene 2.2 default, LogDocMergePolicy chose when
to merge based on number of documents
Other implementations of MergePolicy must have a no-argument constructor
-->
<!--<mergePolicy>org.apache.lucene.index.LogByteSizeMergePolicy</mergePolicy>-->
<!--
Expert:
The Merge Scheduler in Lucene controls how merges are performed. The ConcurrentMergeScheduler (Lucene 2.3 default)
can perform merges in the background using separate threads. The SerialMergeScheduler (Lucene 2.2 default) does not.
-->
<!--<mergeScheduler>org.apache.lucene.index.ConcurrentMergeScheduler</mergeScheduler>-->
<!--
As long as Solr is the only process modifying your index, it is
safe to use Lucene's in process locking mechanism. But you may
specify one of the other Lucene LockFactory implementations in
the event that you have a custom situation.
none = NoLockFactory (typically only used with read only indexes)
single = SingleInstanceLockFactory (suggested)
native = NativeFSLockFactory
simple = SimpleFSLockFactory
('simple' is the default for backwards compatibility with Solr 1.2)
-->
<lockType>single</lockType>
</indexDefaults>
<mainIndex>
<!-- options specific to the main on-disk lucene index -->
<useCompoundFile>false</useCompoundFile>
<ramBufferSizeMB>32</ramBufferSizeMB>
<mergeFactor>10</mergeFactor>
<!-- Deprecated -->
<!--<maxBufferedDocs>1000</maxBufferedDocs>-->
<maxMergeDocs>2147483647</maxMergeDocs>
<maxFieldLength>10000</maxFieldLength>
<!-- If true, unlock any held write or commit locks on startup.
This defeats the locking mechanism that allows multiple
processes to safely access a lucene index, and should be
used with care.
This is not needed if lock type is 'none' or 'single'
-->
<unlockOnStartup>false</unlockOnStartup>
</mainIndex>
<!-- the default high-performance update handler -->
<updateHandler class="solr.DirectUpdateHandler2">
<!-- A prefix of "solr." for class names is an alias that
causes solr to search appropriate packages, including
org.apache.solr.(search|update|request|core|analysis)
-->
<!-- Limit the number of deletions Solr will buffer during doc updating.
Setting this lower can help bound memory use during indexing.
-->
<maxPendingDeletes>100000</maxPendingDeletes>
</updateHandler>
<query>
<!-- Maximum number of clauses in a boolean query... can affect
range or prefix queries that expand to big boolean
queries. An exception is thrown if exceeded. -->
<maxBooleanClauses>1024</maxBooleanClauses>
<!-- Cache used by SolrIndexSearcher for filters (DocSets),
unordered sets of *all* documents that match a query.
When a new searcher is opened, its caches may be prepopulated
or "autowarmed" using data from caches in the old searcher.
autowarmCount is the number of items to prepopulate. For LRUCache,
the autowarmed items will be the most recently accessed items.
Parameters:
class - the SolrCache implementation (currently only LRUCache)
size - the maximum number of entries in the cache
initialSize - the initial capacity (number of entries) of
the cache. (seel java.util.HashMap)
autowarmCount - the number of entries to prepopulate from
and old cache.
-->
<filterCache
class="solr.LRUCache"
size="512"
initialSize="512"
autowarmCount="256"/>
<!-- queryResultCache caches results of searches - ordered lists of
document ids (DocList) based on a query, a sort, and the range
of documents requested. -->
<queryResultCache
class="solr.LRUCache"
size="512"
initialSize="512"
autowarmCount="256"/>
<!-- documentCache caches Lucene Document objects (the stored fields for each document).
Since Lucene internal document ids are transient, this cache will not be autowarmed. -->
<documentCache
class="solr.LRUCache"
size="512"
initialSize="512"
autowarmCount="0"/>
<!-- If true, stored fields that are not requested will be loaded lazily.
This can result in a significant speed improvement if the usual case is to
not load all stored fields, especially if the skipped fields are large compressed
text fields.
-->
<enableLazyFieldLoading>true</enableLazyFieldLoading>
<!-- Example of a generic cache. These caches may be accessed by name
through SolrIndexSearcher.getCache(),cacheLookup(), and cacheInsert().
The purpose is to enable easy caching of user/application level data.
The regenerator argument should be specified as an implementation
of solr.search.CacheRegenerator if autowarming is desired. -->
<!--
<cache name="myUserCache"
class="solr.LRUCache"
size="4096"
initialSize="1024"
autowarmCount="1024"
regenerator="org.mycompany.mypackage.MyRegenerator"
/>
-->
<!-- An optimization that attempts to use a filter to satisfy a search.
If the requested sort does not include score, then the filterCache
will be checked for a filter matching the query. If found, the filter
will be used as the source of document ids, and then the sort will be
applied to that.
<useFilterForSortedQuery>true</useFilterForSortedQuery>
-->
<!-- An optimization for use with the queryResultCache. When a search
is requested, a superset of the requested number of document ids
are collected. For example, if a search for a particular query
requests matching documents 10 through 19, and queryWindowSize is 50,
then documents 0 through 49 will be collected and cached. Any further
requests in that range can be satisfied via the cache. -->
<queryResultWindowSize>50</queryResultWindowSize>
<!-- Maximum number of documents to cache for any entry in the
queryResultCache. -->
<queryResultMaxDocsCached>200</queryResultMaxDocsCached>
<!-- This entry enables an int hash representation for filters (DocSets)
when the number of items in the set is less than maxSize. For smaller
sets, this representation is more memory efficient, more efficient to
iterate over, and faster to take intersections. -->
<HashDocSet maxSize="3000" loadFactor="0.75"/>
<!-- a newSearcher event is fired whenever a new searcher is being prepared
and there is a current searcher handling requests (aka registered). -->
<!-- QuerySenderListener takes an array of NamedList and executes a
local query request for each NamedList in sequence. -->
<listener event="newSearcher" class="solr.QuerySenderListener">
<arr name="queries">
<lst> <str name="q">solr</str> <str name="start">0</str> <str name="rows">10</str> </lst>
<lst> <str name="q">rocks</str> <str name="start">0</str> <str name="rows">10</str> </lst>
<lst><str name="q">static newSearcher warming query from solrconfig.xml</str></lst>
</arr>
</listener>
<!-- a firstSearcher event is fired whenever a new searcher is being
prepared but there is no current registered searcher to handle
requests or to gain autowarming data from. -->
<listener event="firstSearcher" class="solr.QuerySenderListener">
<arr name="queries">
</arr>
</listener>
<!-- If a search request comes in and there is no current registered searcher,
then immediately register the still warming searcher and use it. If
"false" then all requests will block until the first searcher is done
warming. -->
<useColdSearcher>false</useColdSearcher>
<!-- Maximum number of searchers that may be warming in the background
concurrently. An error is returned if this limit is exceeded. Recommend
1-2 for read-only slaves, higher for masters w/o cache warming. -->
<maxWarmingSearchers>4</maxWarmingSearchers>
</query>
<!--
Let the dispatch filter handler /select?qt=XXX
handleSelect=true will use consistent error handling for /select and /update
handleSelect=false will use solr1.1 style error formatting
-->
<requestDispatcher handleSelect="true" >
<!--Make sure your system has some authentication before enabling remote streaming! -->
<requestParsers enableRemoteStreaming="false" multipartUploadLimitInKB="2048" />
<!-- Set HTTP caching related parameters (for proxy caches and clients).
To get the behaviour of Solr 1.2 (ie: no caching related headers)
use the never304="true" option and do not specify a value for
<cacheControl>
-->
<httpCaching never304="true">
<!--httpCaching lastModifiedFrom="openTime"
etagSeed="Solr"-->
<!-- lastModFrom="openTime" is the default, the Last-Modified value
(and validation against If-Modified-Since requests) will all be
relative to when the current Searcher was opened.
You can change it to lastModFrom="dirLastMod" if you want the
value to exactly corrispond to when the physical index was last
modified.
etagSeed="..." is an option you can change to force the ETag
header (and validation against If-None-Match requests) to be
differnet even if the index has not changed (ie: when making
significant changes to your config file)
lastModifiedFrom and etagSeed are both ignored if you use the
never304="true" option.
-->
<!-- If you include a <cacheControl> directive, it will be used to
generate a Cache-Control header, as well as an Expires header
if the value contains "max-age="
By default, no Cache-Control header is generated.
You can use the <cacheControl> option even if you have set
never304="true"
-->
<!-- <cacheControl>max-age=30, public</cacheControl> -->
</httpCaching>
</requestDispatcher>
<!-- requestHandler plugins... incoming queries will be dispatched to the
correct handler based on the path or the qt (query type) param.
Names starting with a '/' are accessed with the a path equal to the
registered name. Names without a leading '/' are accessed with:
http://host/app/select?qt=name
If no qt is defined, the requestHandler that declares default="true"
will be used.
-->
<requestHandler name="standard" class="solr.StandardRequestHandler" default="true">
<!-- default values for query parameters -->
<lst name="defaults">
<str name="echoParams">explicit</str>
<!--
<int name="rows">10</int>
<str name="fl">*</str>
<str name="version">2.1</str>
-->
</lst>
</requestHandler>
<requestHandler name="/dataimport" class="org.apache.solr.handler.dataimport.DataImportHandler">
<lst name="defaults">
<lst name="datasource">
<str name="type">MockDataSource</str>
</lst>
</lst>
</requestHandler>
<!--
Search components are registered to SolrCore and used by Search Handlers
By default, the following components are avaliable:
<searchComponent name="query" class="org.apache.solr.handler.component.QueryComponent" />
<searchComponent name="facet" class="org.apache.solr.handler.component.FacetComponent" />
<searchComponent name="mlt" class="org.apache.solr.handler.component.MoreLikeThisComponent" />
<searchComponent name="highlight" class="org.apache.solr.handler.component.HighlightComponent" />
<searchComponent name="debug" class="org.apache.solr.handler.component.DebugComponent" />
If you register a searchComponent to one of the standard names, that will be used instead.
-->
<requestHandler name="/search" class="org.apache.solr.handler.component.SearchHandler">
<lst name="defaults">
<str name="echoParams">explicit</str>
</lst>
<!--
By default, this will register the following components:
<arr name="components">
<str>query</str>
<str>facet</str>
<str>mlt</str>
<str>highlight</str>
<str>debug</str>
</arr>
To insert handlers before or after the 'standard' components, use:
<arr name="first-components">
<str>first</str>
</arr>
<arr name="last-components">
<str>last</str>
</arr>
-->
</requestHandler>
<!-- Update request handler.
Note: Since solr1.1 requestHandlers requires a valid content type header if posted in
the body. For example, curl now requires: -H 'Content-type:text/xml; charset=utf-8'
The response format differs from solr1.1 formatting and returns a standard error code.
To enable solr1.1 behavior, remove the /update handler or change its path
"update.processor.class" is the class name for the UpdateRequestProcessor. It is initalized
only once. This can not be changed for each request.
-->
<requestHandler name="/update" class="solr.XmlUpdateRequestHandler" >
<!--
<str name="update.processor.class">org.apache.solr.handler.UpdateRequestProcessor</str>
-->
</requestHandler>
<!-- config for the admin interface -->
<admin>
<defaultQuery>*:*</defaultQuery>
<!-- configure a healthcheck file for servers behind a loadbalancer
<healthcheck type="file">server-enabled</healthcheck>
-->
</admin>
</config>

View File

@ -0,0 +1,2 @@
#Thu Jul 31 00:57:26 IST 2008
last_index_time=2008-07-31 00\:57\:26

View File

@ -0,0 +1,20 @@
# Licensed to the Apache Software Foundation (ASF) under one or more
# contributor license agreements. See the NOTICE file distributed with
# this work for additional information regarding copyright ownership.
# The ASF licenses this file to You under the Apache License, Version 2.0
# (the "License"); you may not use this file except in compliance with
# the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
#use a protected word file to avoid stemming two
#unrelated words to the same base word.
#to test, we will use words that would normally obviously be stemmed.
cats
ridding

View File

@ -0,0 +1,8 @@
<dataConfig>
<document>
<entity name="x" query="select * from x">
<field column="id" />
<field column="desc" />
</entity>
</document>
</dataConfig>

View File

@ -0,0 +1,16 @@
# Licensed to the Apache Software Foundation (ASF) under one or more
# contributor license agreements. See the NOTICE file distributed with
# this work for additional information regarding copyright ownership.
# The ASF licenses this file to You under the Apache License, Version 2.0
# (the "License"); you may not use this file except in compliance with
# the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
stopworda
stopwordb

View File

@ -0,0 +1,22 @@
# Licensed to the Apache Software Foundation (ASF) under one or more
# contributor license agreements. See the NOTICE file distributed with
# this work for additional information regarding copyright ownership.
# The ASF licenses this file to You under the Apache License, Version 2.0
# (the "License"); you may not use this file except in compliance with
# the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
a => aa
b => b1 b2
c => c1,c2
a\=>a => b\=>b
a\,a => b\,b
foo,bar,baz
Television,TV,Televisions