mirror of https://github.com/apache/lucene.git
SOLR-469 -- Added DataImportHandler as a contrib project.
git-svn-id: https://svn.apache.org/repos/asf/lucene/solr/trunk@681182 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
parent
842680f15d
commit
79e77502f6
|
@ -331,6 +331,11 @@ New Features
|
|||
|
||||
65. SOLR-663: Allow multiple files for stopwords, keepwords, protwords and synonyms (shalin)
|
||||
|
||||
66. SOLR-469: Added DataImportHandler as a contrib project which makes indexing data from Databases, XML files and HTTP
|
||||
data sources into Solr quick and easy. Includes API and implementations for supporting multiple
|
||||
data sources, processors and transformers for importing data. Supports full data imports as well as
|
||||
incremental (delta) indexing. See http://wiki.apache.org/solr/DataImportHandler for more details.
|
||||
(Noble Paul, shalin)
|
||||
|
||||
Changes in runtime behavior
|
||||
1. SOLR-559: use Lucene updateDocument, deleteDocuments methods. This
|
||||
|
|
|
@ -0,0 +1,129 @@
|
|||
<?xml version="1.0"?>
|
||||
|
||||
<!--
|
||||
Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
contributor license agreements. See the NOTICE file distributed with
|
||||
this work for additional information regarding copyright ownership.
|
||||
The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
the "License"); you may not use this file except in compliance with
|
||||
the License. You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
-->
|
||||
|
||||
<project name="solr-dataimporthandler" default="build">
|
||||
|
||||
<property name="solr-path" value="../.." />
|
||||
|
||||
<property name="javadoc.dir" value="target/javadoc"></property>
|
||||
|
||||
<import file="../../common-build.xml"/>
|
||||
|
||||
<description>
|
||||
Data Import Handler
|
||||
</description>
|
||||
|
||||
<path id="common.classpath">
|
||||
<pathelement location="${solr-path}/build/common" />
|
||||
<pathelement location="${solr-path}/build/core" />
|
||||
<fileset dir="${solr-path}/lib" includes="*.jar"></fileset>
|
||||
</path>
|
||||
|
||||
<path id="test.classpath">
|
||||
<path refid="common.classpath" />
|
||||
<pathelement path="target/classes" />
|
||||
<pathelement path="target/test-classes" />
|
||||
</path>
|
||||
|
||||
<target name="clean">
|
||||
<delete failonerror="false" dir="target"/>
|
||||
</target>
|
||||
|
||||
<target name="init">
|
||||
<mkdir dir="target/classes"/>
|
||||
<ant dir="../../" inheritall="false" target="compile" />
|
||||
</target>
|
||||
|
||||
<target name="compile" depends="init">
|
||||
<solr-javac destdir="target/classes"
|
||||
classpathref="common.classpath">
|
||||
<src path="src/main/java" />
|
||||
</solr-javac>
|
||||
</target>
|
||||
|
||||
<target name="build" depends="compile">
|
||||
<jar destfile="target/${fullnamever}.jar" basedir="target/classes" />
|
||||
</target>
|
||||
|
||||
<target name="compileTests" depends="compile">
|
||||
<solr-javac destdir="target/test-classes"
|
||||
classpathref="test.classpath">
|
||||
<src path="src/test/java" />
|
||||
</solr-javac>
|
||||
</target>
|
||||
|
||||
<target name="test" depends="compileTests">
|
||||
<mkdir dir="target/test-results"/>
|
||||
|
||||
<junit printsummary="on"
|
||||
haltonfailure="no"
|
||||
errorProperty="tests.failed"
|
||||
failureProperty="tests.failed"
|
||||
dir="src/test/resources/"
|
||||
>
|
||||
<formatter type="brief" usefile="false" if="junit.details"/>
|
||||
<classpath refid="test.classpath"/>
|
||||
<formatter type="xml"/>
|
||||
<batchtest fork="yes" todir="target/test-results" unless="testcase">
|
||||
<fileset dir="src/test/java" includes="${junit.includes}"/>
|
||||
</batchtest>
|
||||
<batchtest fork="yes" todir="target/test-results" if="testcase">
|
||||
<fileset dir="src/test/java" includes="**/${testcase}.java"/>
|
||||
</batchtest>
|
||||
</junit>
|
||||
|
||||
<fail if="tests.failed">Tests failed!</fail>
|
||||
</target>
|
||||
|
||||
<target name="dist" depends="build">
|
||||
<copy todir="../../build/web">
|
||||
<fileset dir="src/main/webapp" includes="**" />
|
||||
</copy>
|
||||
<mkdir dir="../../build/web/WEB-INF/lib"/>
|
||||
<copy file="target/${fullnamever}.jar" todir="${solr-path}/build/web/WEB-INF/lib"></copy>
|
||||
<copy file="target/${fullnamever}.jar" todir="${solr-path}/dist"></copy>
|
||||
</target>
|
||||
|
||||
<target name="javadoc">
|
||||
<sequential>
|
||||
<mkdir dir="${javadoc.dir}/contrib-${fullnamever}"/>
|
||||
<javadoc
|
||||
destdir="${javadoc.dir}/contrib-${fullnamever}"
|
||||
author="true"
|
||||
version="true"
|
||||
failonerror="true"
|
||||
use="true"
|
||||
encoding="utf8"
|
||||
access="${javadoc.access}"
|
||||
windowtitle="${Name} ${version} contrib-${fullnamever} API"
|
||||
doctitle="${Name} ${version} API (${specversion})"
|
||||
bottom="Copyright &copy; ${javadoc.years} The Apache Software Foundation"
|
||||
>
|
||||
<packageset dir="src/main/java"/>
|
||||
<link href="${javadoc.link.java}"/>
|
||||
<link href="${javadoc.link.junit}"/>
|
||||
<link href="${javadoc.link.lucene}"/>
|
||||
<classpath refid="common.classpath"/>
|
||||
</javadoc>
|
||||
<jar basedir="${javadoc.dir}/contrib-${fullname}" destfile="target/contrib-${fullnamever}-javadoc.jar"/>
|
||||
<copy file="target/contrib-${fullnamever}-javadoc.jar" todir="${solr-path}/dist"></copy>
|
||||
</sequential>
|
||||
</target>
|
||||
|
||||
</project>
|
|
@ -0,0 +1,161 @@
|
|||
/**
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
package org.apache.solr.handler.dataimport;
|
||||
|
||||
import org.apache.solr.core.SolrCore;
|
||||
import org.apache.solr.request.LocalSolrQueryRequest;
|
||||
import org.apache.solr.util.AbstractSolrTestCase;
|
||||
|
||||
import java.io.IOException;
|
||||
import java.util.Collections;
|
||||
import java.util.HashMap;
|
||||
import java.util.List;
|
||||
import java.util.Map;
|
||||
|
||||
/**
|
||||
* <p>
|
||||
* Abstract base class for DataImportHandler tests
|
||||
* </p>
|
||||
* <p/>
|
||||
* <b>This API is experimental and subject to change</b>
|
||||
*
|
||||
* @version $Id$
|
||||
* @since solr 1.3
|
||||
*/
|
||||
public abstract class AbstractDataImportHandlerTest extends
|
||||
AbstractSolrTestCase {
|
||||
|
||||
@Override
|
||||
public void setUp() throws Exception {
|
||||
super.setUp();
|
||||
}
|
||||
|
||||
@Override
|
||||
public void tearDown() throws Exception {
|
||||
super.tearDown();
|
||||
}
|
||||
|
||||
protected String loadDataConfig(String dataConfigFileName) {
|
||||
try {
|
||||
SolrCore core = h.getCore();
|
||||
return SolrWriter.getResourceAsString(core.getResourceLoader()
|
||||
.openResource(dataConfigFileName));
|
||||
} catch (IOException e) {
|
||||
e.printStackTrace();
|
||||
return null;
|
||||
}
|
||||
}
|
||||
|
||||
protected void runFullImport(String dataConfig) throws Exception {
|
||||
LocalSolrQueryRequest request = lrf.makeRequest("command", "full-import",
|
||||
"debug", "on", "clean", "true", "commit", "true", "dataConfig",
|
||||
dataConfig);
|
||||
h.query("/dataimport", request);
|
||||
}
|
||||
|
||||
protected void runDeltaImport(String dataConfig) throws Exception {
|
||||
LocalSolrQueryRequest request = lrf.makeRequest("command", "delta-import",
|
||||
"debug", "on", "clean", "true", "commit", "true", "dataConfig",
|
||||
dataConfig);
|
||||
h.query("/dataimport", request);
|
||||
}
|
||||
|
||||
/**
|
||||
* Helper for creating a Context instance. Useful for testing Transformers
|
||||
*/
|
||||
@SuppressWarnings("unchecked")
|
||||
public static Context getContext(DataConfig.Entity parentEntity,
|
||||
VariableResolverImpl resolver, DataSource parentDataSource,
|
||||
int currProcess, final List<Map<String, String>> entityFields,
|
||||
final Map<String, String> entityAttrs) {
|
||||
final Context delegate = new ContextImpl(parentEntity, resolver,
|
||||
parentDataSource, currProcess, Collections.EMPTY_MAP,
|
||||
new HashMap<String, Object>(), null, null);
|
||||
return new Context() {
|
||||
public String getEntityAttribute(String name) {
|
||||
return entityAttrs == null ? delegate.getEntityAttribute(name)
|
||||
: entityAttrs.get(name);
|
||||
}
|
||||
|
||||
public List<Map<String, String>> getAllEntityFields() {
|
||||
return entityFields == null ? delegate.getAllEntityFields()
|
||||
: entityFields;
|
||||
}
|
||||
|
||||
public VariableResolver getVariableResolver() {
|
||||
return delegate.getVariableResolver();
|
||||
}
|
||||
|
||||
public DataSource getDataSource() {
|
||||
return delegate.getDataSource();
|
||||
}
|
||||
|
||||
public boolean isRootEntity() {
|
||||
return false;
|
||||
}
|
||||
|
||||
public int currentProcess() {
|
||||
return delegate.currentProcess();
|
||||
}
|
||||
|
||||
public Map<String, Object> getRequestParameters() {
|
||||
return delegate.getRequestParameters();
|
||||
}
|
||||
|
||||
public EntityProcessor getEntityProcessor() {
|
||||
return null;
|
||||
}
|
||||
|
||||
public void setSessionAttribute(String name, Object val, String scope) {
|
||||
delegate.setSessionAttribute(name, val, scope);
|
||||
}
|
||||
|
||||
public Object getSessionAttribute(String name, String scope) {
|
||||
return delegate.getSessionAttribute(name, scope);
|
||||
}
|
||||
|
||||
public Context getParentContext() {
|
||||
return delegate.getParentContext();
|
||||
}
|
||||
|
||||
public DataSource getDataSource(String name) {
|
||||
return delegate.getDataSource(name);
|
||||
}
|
||||
|
||||
public SolrCore getSolrCore() {
|
||||
return delegate.getSolrCore();
|
||||
}
|
||||
};
|
||||
}
|
||||
|
||||
/**
|
||||
* Strings at even index are keys, odd-index strings are values in the
|
||||
* returned map
|
||||
*/
|
||||
@SuppressWarnings("unchecked")
|
||||
public static Map createMap(Object... args) {
|
||||
Map result = new HashMap();
|
||||
|
||||
if (args == null || args.length == 0)
|
||||
return result;
|
||||
|
||||
for (int i = 0; i < args.length - 1; i += 2)
|
||||
result.put(args[i], args[i + 1]);
|
||||
|
||||
return result;
|
||||
}
|
||||
}
|
|
@ -0,0 +1,80 @@
|
|||
/**
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
package org.apache.solr.handler.dataimport;
|
||||
|
||||
import java.util.ArrayList;
|
||||
import java.util.List;
|
||||
import java.util.Map;
|
||||
|
||||
/**
|
||||
* This class enables caching of data obtained from the DB to avoid too many sql
|
||||
* queries
|
||||
* <p/>
|
||||
* <p>
|
||||
* Refer to <a
|
||||
* href="http://wiki.apache.org/solr/DataImportHandler">http://wiki.apache.org/solr/DataImportHandler</a>
|
||||
* for more details.
|
||||
* </p>
|
||||
* <p/>
|
||||
* <b>This API is experimental and subject to change</b>
|
||||
*
|
||||
* @version $Id$
|
||||
* @since solr 1.3
|
||||
*/
|
||||
public class CachedSqlEntityProcessor extends SqlEntityProcessor {
|
||||
private boolean isFirst;
|
||||
|
||||
@SuppressWarnings("unchecked")
|
||||
public void init(Context context) {
|
||||
super.init(context);
|
||||
super.cacheInit();
|
||||
isFirst = true;
|
||||
}
|
||||
|
||||
public Map<String, Object> nextRow() {
|
||||
if (rowcache != null) return getFromRowCache();
|
||||
if (dataSourceRowCache != null)
|
||||
return getFromRowCacheTransformed();
|
||||
if (!isFirst)
|
||||
return null;
|
||||
String query = resolver.replaceTokens(context.getEntityAttribute("query"));
|
||||
isFirst = false;
|
||||
if (simpleCache != null) {
|
||||
return getSimplCacheData(query);
|
||||
} else {
|
||||
return getIdCacheData(query);
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
protected List<Map<String, Object>> getAllNonCachedRows() {
|
||||
List<Map<String, Object>> rows = new ArrayList<Map<String, Object>>();
|
||||
String q = getQuery();
|
||||
initQuery(resolver.replaceTokens(q));
|
||||
if (rowIterator == null)
|
||||
return rows;
|
||||
while (rowIterator.hasNext()) {
|
||||
Map<String, Object> arow = rowIterator.next();
|
||||
if (arow == null) {
|
||||
break;
|
||||
} else {
|
||||
rows.add(arow);
|
||||
}
|
||||
}
|
||||
return rows;
|
||||
}
|
||||
}
|
|
@ -0,0 +1,154 @@
|
|||
/**
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
package org.apache.solr.handler.dataimport;
|
||||
|
||||
import org.apache.solr.core.SolrCore;
|
||||
|
||||
import java.util.List;
|
||||
import java.util.Map;
|
||||
|
||||
/**
|
||||
* <p>
|
||||
* This interface is supposed to give access to all available objects. So any
|
||||
* component implemented by a user can have the full power of DataImportHandler
|
||||
* </p>
|
||||
* <p>
|
||||
* Refer to <a
|
||||
* href="http://wiki.apache.org/solr/DataImportHandler">http://wiki.apache.org/solr/DataImportHandler</a>
|
||||
* for more details.
|
||||
* </p>
|
||||
* <p/>
|
||||
* <b>This API is experimental and subject to change</b>
|
||||
*
|
||||
* @version $Id$
|
||||
* @since solr 1.3
|
||||
*/
|
||||
public abstract class Context {
|
||||
public static final int FULL_DUMP = 1, DELTA_DUMP = 2, FIND_DELTA = 3;
|
||||
|
||||
public static final String SCOPE_ENTITY = "entity", SCOPE_GLOBAL = "global",
|
||||
SCOPE_DOC = "document";
|
||||
|
||||
/**
|
||||
* Get the value of any attribute put into this entity
|
||||
*
|
||||
* @param name name of the attribute eg: 'name'
|
||||
* @return value of named attribute in entity
|
||||
*/
|
||||
public abstract String getEntityAttribute(String name);
|
||||
|
||||
/**
|
||||
* Returns all the fields put into an entity. each item (which is a map ) in
|
||||
* the list corresponds to one field. each if the map contains the attribute
|
||||
* names and values in a field
|
||||
*
|
||||
* @return all fields in an entity
|
||||
*/
|
||||
public abstract List<Map<String, String>> getAllEntityFields();
|
||||
|
||||
/**
|
||||
* Returns the VariableResolver used in this entity which can be used to
|
||||
* resolve the tokens in ${<namespce.name>}
|
||||
*
|
||||
* @return a VariableResolver instance
|
||||
* @see org.apache.solr.handler.dataimport.VariableResolver
|
||||
*/
|
||||
|
||||
public abstract VariableResolver getVariableResolver();
|
||||
|
||||
/**
|
||||
* Gets the datasource instance defined for this entity.
|
||||
*
|
||||
* @return a new DataSource instance as configured for the current entity
|
||||
* @see org.apache.solr.handler.dataimport.DataSource
|
||||
*/
|
||||
public abstract DataSource getDataSource();
|
||||
|
||||
/**
|
||||
* Gets a new DataSource instance with a name.
|
||||
*
|
||||
* @param name Name of the dataSource as defined in the dataSource tag
|
||||
* @return a new DataSource instance as configured for the named entity
|
||||
* @see org.apache.solr.handler.dataimport.DataSource
|
||||
*/
|
||||
public abstract DataSource getDataSource(String name);
|
||||
|
||||
/**
|
||||
* Returns the instance of EntityProcessor used for this entity
|
||||
*
|
||||
* @return instance of EntityProcessor used for the current entity
|
||||
* @see org.apache.solr.handler.dataimport.EntityProcessor
|
||||
*/
|
||||
public abstract EntityProcessor getEntityProcessor();
|
||||
|
||||
/**
|
||||
* Store values in a certain name and scope (entity, document,global)
|
||||
*
|
||||
* @param name the key
|
||||
* @param val the value
|
||||
* @param scope the scope in which the given key, value pair is to be stored
|
||||
*/
|
||||
public abstract void setSessionAttribute(String name, Object val, String scope);
|
||||
|
||||
/**
|
||||
* get a value by name in the given scope (entity, document,global)
|
||||
*
|
||||
* @param name the key
|
||||
* @param scope the scope from which the value is to be retreived
|
||||
* @return the object stored in the given scope with the given key
|
||||
*/
|
||||
public abstract Object getSessionAttribute(String name, String scope);
|
||||
|
||||
/**
|
||||
* Get the context instance for the parent entity. works only in the full dump
|
||||
* If the current entity is rootmost a null is returned
|
||||
*
|
||||
* @return parent entity's Context
|
||||
*/
|
||||
public abstract Context getParentContext();
|
||||
|
||||
/**
|
||||
* The request parameters passed over HTTP for this command the values in the
|
||||
* map are either String(for single valued parameters) or List<String> (for
|
||||
* multi-valued parameters)
|
||||
*
|
||||
* @return the request parameters passed in the URL to initiate this process
|
||||
*/
|
||||
public abstract Map<String, Object> getRequestParameters();
|
||||
|
||||
/**
|
||||
* Returns if the current entity is the root entity
|
||||
*
|
||||
* @return true if current entity is the root entity, false otherwise
|
||||
*/
|
||||
public abstract boolean isRootEntity();
|
||||
|
||||
/**
|
||||
* Returns the current process FULL_DUMP =1, DELTA_DUMP=2, FIND_DELTA=3
|
||||
*
|
||||
* @return the code of the current running process
|
||||
*/
|
||||
public abstract int currentProcess();
|
||||
|
||||
/**
|
||||
* Exposing the actual SolrCore to the components
|
||||
*
|
||||
* @return the core
|
||||
*/
|
||||
public abstract SolrCore getSolrCore();
|
||||
}
|
|
@ -0,0 +1,143 @@
|
|||
package org.apache.solr.handler.dataimport;
|
||||
|
||||
import org.apache.solr.core.SolrCore;
|
||||
|
||||
import java.util.Collections;
|
||||
import java.util.HashMap;
|
||||
import java.util.List;
|
||||
import java.util.Map;
|
||||
|
||||
/**
|
||||
* <p>
|
||||
* An implementation for the Context
|
||||
* </p>
|
||||
* <p/>
|
||||
* <b>This API is experimental and subject to change</b>
|
||||
*
|
||||
* @version $Id$
|
||||
* @since solr 1.3
|
||||
*/
|
||||
public class ContextImpl extends Context {
|
||||
private DataConfig.Entity entity;
|
||||
|
||||
private ContextImpl parent;
|
||||
|
||||
private VariableResolverImpl resolver;
|
||||
|
||||
private DataSource ds;
|
||||
|
||||
private int currProcess;
|
||||
|
||||
private Map<String, Object> requestParams;
|
||||
|
||||
private DataImporter dataImporter;
|
||||
|
||||
private Map<String, Object> entitySession, globalSession, docSession;
|
||||
|
||||
public ContextImpl(DataConfig.Entity entity, VariableResolverImpl resolver,
|
||||
DataSource ds, int currProcess, Map<String, Object> requestParams,
|
||||
Map<String, Object> global, ContextImpl p, DataImporter di) {
|
||||
this.entity = entity;
|
||||
this.resolver = resolver;
|
||||
this.ds = ds;
|
||||
this.currProcess = currProcess;
|
||||
this.requestParams = requestParams;
|
||||
globalSession = global;
|
||||
parent = p;
|
||||
dataImporter = di;
|
||||
}
|
||||
|
||||
public String getEntityAttribute(String name) {
|
||||
return entity == null ? null : entity.allAttributes.get(name);
|
||||
}
|
||||
|
||||
public List<Map<String, String>> getAllEntityFields() {
|
||||
return entity == null ? Collections.EMPTY_LIST : entity.allFieldsList;
|
||||
}
|
||||
|
||||
public VariableResolver getVariableResolver() {
|
||||
return resolver;
|
||||
}
|
||||
|
||||
public DataSource getDataSource() {
|
||||
return ds;
|
||||
}
|
||||
|
||||
public DataSource getDataSource(String name) {
|
||||
return dataImporter.getDataSourceInstance(entity);
|
||||
}
|
||||
|
||||
public boolean isRootEntity() {
|
||||
return entity.isDocRoot;
|
||||
}
|
||||
|
||||
public int currentProcess() {
|
||||
return currProcess;
|
||||
}
|
||||
|
||||
public Map<String, Object> getRequestParameters() {
|
||||
return requestParams;
|
||||
}
|
||||
|
||||
public EntityProcessor getEntityProcessor() {
|
||||
return entity == null ? null : entity.processor;
|
||||
}
|
||||
|
||||
public void setSessionAttribute(String name, Object val, String scope) {
|
||||
if (Context.SCOPE_ENTITY.equals(scope)) {
|
||||
if (entitySession == null)
|
||||
entitySession = new HashMap<String, Object>();
|
||||
entitySession.put(name, val);
|
||||
} else if (Context.SCOPE_GLOBAL.equals(scope)) {
|
||||
if (globalSession != null) {
|
||||
globalSession.put(name, val);
|
||||
}
|
||||
} else if (Context.SCOPE_DOC.equals(scope)) {
|
||||
Map<String, Object> docsession = getDocSession();
|
||||
if (docsession != null)
|
||||
docsession.put(name, val);
|
||||
}
|
||||
}
|
||||
|
||||
public Object getSessionAttribute(String name, String scope) {
|
||||
if (Context.SCOPE_ENTITY.equals(scope)) {
|
||||
if (entitySession == null)
|
||||
return null;
|
||||
return entitySession.get(name);
|
||||
} else if (Context.SCOPE_GLOBAL.equals(scope)) {
|
||||
if (globalSession != null) {
|
||||
return globalSession.get(name);
|
||||
}
|
||||
} else if (Context.SCOPE_DOC.equals(scope)) {
|
||||
Map<String, Object> docsession = getDocSession();
|
||||
if (docsession != null)
|
||||
return docsession.get(name);
|
||||
}
|
||||
return null;
|
||||
}
|
||||
|
||||
public Context getParentContext() {
|
||||
return parent;
|
||||
}
|
||||
|
||||
public Map<String, Object> getDocSession() {
|
||||
ContextImpl c = this;
|
||||
while (true) {
|
||||
if (c.docSession != null)
|
||||
return c.docSession;
|
||||
if (c.parent != null)
|
||||
c = c.parent;
|
||||
else
|
||||
return null;
|
||||
}
|
||||
}
|
||||
|
||||
public void setDocSession(Map<String, Object> docSession) {
|
||||
this.docSession = docSession;
|
||||
}
|
||||
|
||||
|
||||
public SolrCore getSolrCore() {
|
||||
return dataImporter.getCore();
|
||||
}
|
||||
}
|
|
@ -0,0 +1,360 @@
|
|||
/**
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
package org.apache.solr.handler.dataimport;
|
||||
|
||||
import org.w3c.dom.Element;
|
||||
import org.w3c.dom.NamedNodeMap;
|
||||
import org.w3c.dom.Node;
|
||||
import org.w3c.dom.NodeList;
|
||||
|
||||
import java.util.*;
|
||||
|
||||
/**
|
||||
* <p>
|
||||
* Mapping for data-config.xml
|
||||
* </p>
|
||||
* <p/>
|
||||
* <p>
|
||||
* Refer to <a
|
||||
* href="http://wiki.apache.org/solr/DataImportHandler">http://wiki.apache.org/solr/DataImportHandler</a>
|
||||
* for more details.
|
||||
* </p>
|
||||
* <p/>
|
||||
* <b>This API is experimental and subject to change</b>
|
||||
*
|
||||
* @version $Id$
|
||||
* @since solr 1.3
|
||||
*/
|
||||
public class DataConfig {
|
||||
public List<Document> documents;
|
||||
|
||||
public List<Props> properties;
|
||||
|
||||
private Map<String, Document> documentCache;
|
||||
|
||||
public Map<String, Evaluator> evaluators = new HashMap<String, Evaluator>();
|
||||
|
||||
public Script script;
|
||||
|
||||
public Map<String, Properties> dataSources = new HashMap<String, Properties>();
|
||||
|
||||
public Document getDocumentByName(String name) {
|
||||
if (documentCache == null) {
|
||||
documentCache = new HashMap<String, Document>();
|
||||
for (Document document : documents)
|
||||
documentCache.put(document.name, document);
|
||||
}
|
||||
|
||||
return documentCache.get(name);
|
||||
}
|
||||
|
||||
public static class Document {
|
||||
public String name;
|
||||
|
||||
public String deleteQuery;
|
||||
|
||||
public List<Entity> entities = new ArrayList<Entity>();
|
||||
|
||||
public List<Field> fields;
|
||||
|
||||
public Document() {
|
||||
}
|
||||
|
||||
public Document(Element element) {
|
||||
this.name = getStringAttribute(element, NAME, null);
|
||||
this.deleteQuery = getStringAttribute(element, "deleteQuery", null);
|
||||
List<Element> l = getChildNodes(element, "entity");
|
||||
for (Element e : l)
|
||||
entities.add(new Entity(e));
|
||||
// entities = new Entity(l.get(0));
|
||||
l = getChildNodes(element, "field");
|
||||
if (!l.isEmpty())
|
||||
fields = new ArrayList<Field>();
|
||||
for (Element e : l)
|
||||
fields.add(new Field(e));
|
||||
}
|
||||
}
|
||||
|
||||
public static class Props {
|
||||
public String name;
|
||||
|
||||
public String file;
|
||||
}
|
||||
|
||||
public static class Entity {
|
||||
public String name;
|
||||
|
||||
public String pk;
|
||||
|
||||
public String dataSource;
|
||||
|
||||
public Map<String, String> allAttributes;
|
||||
|
||||
public String proc;
|
||||
|
||||
public String docRoot;
|
||||
|
||||
public boolean isDocRoot = false;
|
||||
|
||||
public List<Field> fields;
|
||||
|
||||
public List<Map<String, String>> allFieldsList = new ArrayList<Map<String, String>>();
|
||||
|
||||
public List<Entity> entities;
|
||||
|
||||
public String[] primaryKeys;
|
||||
|
||||
public Entity parentEntity;
|
||||
|
||||
public EntityProcessor processor;
|
||||
|
||||
@SuppressWarnings("unchecked")
|
||||
public DataSource dataSrc;
|
||||
|
||||
public Script script;
|
||||
|
||||
public List<Field> implicitFields;
|
||||
|
||||
public Entity() {
|
||||
}
|
||||
|
||||
public Entity(Element element) {
|
||||
name = getStringAttribute(element, NAME, null);
|
||||
pk = getStringAttribute(element, "pk", null);
|
||||
docRoot = getStringAttribute(element, ROOT_ENTITY, null);
|
||||
proc = getStringAttribute(element, PROCESSOR, null);
|
||||
dataSource = getStringAttribute(element, DataImporter.DATA_SRC, null);
|
||||
allAttributes = getAllAttributes(element);
|
||||
List<Element> n = getChildNodes(element, "field");
|
||||
fields = new ArrayList<Field>();
|
||||
for (Element elem : n)
|
||||
fields.add(new Field(elem));
|
||||
n = getChildNodes(element, "entity");
|
||||
if (!n.isEmpty())
|
||||
entities = new ArrayList<Entity>();
|
||||
for (Element elem : n)
|
||||
entities.add(new Entity(elem));
|
||||
|
||||
}
|
||||
|
||||
public void clearCache() {
|
||||
if (entities != null) {
|
||||
for (Entity entity : entities)
|
||||
entity.clearCache();
|
||||
}
|
||||
|
||||
try {
|
||||
processor.destroy();
|
||||
} catch (Exception e) {
|
||||
/*no op*/
|
||||
}
|
||||
processor = null;
|
||||
if (dataSrc != null)
|
||||
dataSrc.close();
|
||||
|
||||
}
|
||||
}
|
||||
|
||||
public static class Script {
|
||||
public String language;
|
||||
|
||||
public String script;
|
||||
|
||||
public Script() {
|
||||
}
|
||||
|
||||
public Script(Element e) {
|
||||
this.language = getStringAttribute(e, "language", "JavaScript");
|
||||
StringBuffer buffer = new StringBuffer();
|
||||
String script = getTxt(e, buffer);
|
||||
if (script != null)
|
||||
this.script = script.trim();
|
||||
}
|
||||
}
|
||||
|
||||
public static class Field {
|
||||
|
||||
public String column;
|
||||
|
||||
public String name;
|
||||
|
||||
public Float boost = 1.0f;
|
||||
|
||||
public boolean toWrite = true;
|
||||
|
||||
public boolean multiValued = false;
|
||||
|
||||
public String nameOrColName;
|
||||
|
||||
public Map<String, String> allAttributes = new HashMap<String, String>() {
|
||||
public String put(String key, String value) {
|
||||
if (super.containsKey(key))
|
||||
return super.get(key);
|
||||
return super.put(key, value);
|
||||
}
|
||||
};
|
||||
|
||||
public Field() {
|
||||
}
|
||||
|
||||
public Field(Element e) {
|
||||
this.name = getStringAttribute(e, DataImporter.NAME, null);
|
||||
this.column = getStringAttribute(e, DataImporter.COLUMN, null);
|
||||
this.boost = Float.parseFloat(getStringAttribute(e, "boost", "1.0f"));
|
||||
allAttributes.putAll(getAllAttributes(e));
|
||||
}
|
||||
|
||||
public Field(String name, boolean b) {
|
||||
name = nameOrColName = column = name;
|
||||
multiValued = b;
|
||||
|
||||
}
|
||||
|
||||
public String getName() {
|
||||
return name == null ? column : name;
|
||||
}
|
||||
|
||||
public Entity entity;
|
||||
|
||||
}
|
||||
|
||||
public void readFromXml(Element e) {
|
||||
List<Element> n = getChildNodes(e, "document");
|
||||
if (!n.isEmpty())
|
||||
documents = new ArrayList<Document>();
|
||||
for (Element element : n)
|
||||
documents.add(new Document(element));
|
||||
|
||||
n = getChildNodes(e, SCRIPT);
|
||||
if (!n.isEmpty()) {
|
||||
script = new Script(n.get(0));
|
||||
}
|
||||
|
||||
// Add the provided evaluators
|
||||
evaluators.put(EvaluatorBag.DATE_FORMAT_EVALUATOR, EvaluatorBag
|
||||
.getDateFormatEvaluator());
|
||||
evaluators.put(EvaluatorBag.SQL_ESCAPE_EVALUATOR, EvaluatorBag
|
||||
.getSqlEscapingEvaluator());
|
||||
evaluators.put(EvaluatorBag.URL_ENCODE_EVALUATOR, EvaluatorBag
|
||||
.getUrlEvaluator());
|
||||
|
||||
n = getChildNodes(e, FUNCTION);
|
||||
if (!n.isEmpty()) {
|
||||
for (Element element : n) {
|
||||
String func = getStringAttribute(element, NAME, null);
|
||||
String clz = getStringAttribute(element, CLASS, null);
|
||||
if (func == null || clz == null)
|
||||
throw new DataImportHandlerException(
|
||||
DataImportHandlerException.SEVERE,
|
||||
"<function> must have a 'name' and 'class' attributes");
|
||||
try {
|
||||
evaluators.put(func, (Evaluator) DocBuilder.loadClass(clz)
|
||||
.newInstance());
|
||||
} catch (Exception exp) {
|
||||
throw new DataImportHandlerException(
|
||||
DataImportHandlerException.SEVERE,
|
||||
"Unable to instantiate evaluator: " + clz, exp);
|
||||
}
|
||||
}
|
||||
}
|
||||
n = getChildNodes(e, DATA_SRC);
|
||||
if (!n.isEmpty()) {
|
||||
for (Element element : n) {
|
||||
Properties p = new Properties();
|
||||
HashMap<String, String> attrs = getAllAttributes(element);
|
||||
for (Map.Entry<String, String> entry : attrs.entrySet()) {
|
||||
p.setProperty(entry.getKey(), entry.getValue());
|
||||
}
|
||||
dataSources.put(p.getProperty("name"), p);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
private static String getStringAttribute(Element e, String name, String def) {
|
||||
String r = e.getAttribute(name);
|
||||
if (r == null || "".equals(r.trim()))
|
||||
r = def;
|
||||
return r;
|
||||
}
|
||||
|
||||
private static HashMap<String, String> getAllAttributes(Element e) {
|
||||
HashMap<String, String> m = new HashMap<String, String>();
|
||||
NamedNodeMap nnm = e.getAttributes();
|
||||
for (int i = 0; i < nnm.getLength(); i++) {
|
||||
m.put(nnm.item(i).getNodeName(), nnm.item(i).getNodeValue());
|
||||
}
|
||||
return m;
|
||||
}
|
||||
|
||||
public static String getTxt(Node elem, StringBuffer buffer) {
|
||||
|
||||
if (elem.getNodeType() != Node.CDATA_SECTION_NODE) {
|
||||
NodeList childs = elem.getChildNodes();
|
||||
for (int i = 0; i < childs.getLength(); i++) {
|
||||
Node child = childs.item(i);
|
||||
short childType = child.getNodeType();
|
||||
if (childType != Node.COMMENT_NODE
|
||||
&& childType != Node.PROCESSING_INSTRUCTION_NODE) {
|
||||
getTxt(child, buffer);
|
||||
}
|
||||
}
|
||||
} else {
|
||||
buffer.append(elem.getNodeValue());
|
||||
}
|
||||
|
||||
return buffer.toString();
|
||||
}
|
||||
|
||||
public static List<Element> getChildNodes(Element e, String byName) {
|
||||
List<Element> result = new ArrayList<Element>();
|
||||
NodeList l = e.getChildNodes();
|
||||
for (int i = 0; i < l.getLength(); i++) {
|
||||
if (e.equals(l.item(i).getParentNode())
|
||||
&& byName.equals(l.item(i).getNodeName()))
|
||||
result.add((Element) l.item(i));
|
||||
}
|
||||
return result;
|
||||
}
|
||||
|
||||
public void clearCaches() {
|
||||
for (Document document : documents)
|
||||
for (Entity entity : document.entities)
|
||||
entity.clearCache();
|
||||
|
||||
}
|
||||
|
||||
public static final String SCRIPT = "script";
|
||||
|
||||
public static final String NAME = "name";
|
||||
|
||||
public static final String SCRIPT_LANG = "scriptlanguage";
|
||||
|
||||
public static final String SCRIPT_NAME = "scriptname";
|
||||
|
||||
public static final String PROCESSOR = "processor";
|
||||
|
||||
public static final String IMPORTER_NS = "dataimporter";
|
||||
|
||||
public static final String ROOT_ENTITY = "rootEntity";
|
||||
|
||||
public static final String FUNCTION = "function";
|
||||
|
||||
public static final String CLASS = "class";
|
||||
|
||||
public static final String DATA_SRC = "dataSource";
|
||||
|
||||
}
|
|
@ -0,0 +1,394 @@
|
|||
/**
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
package org.apache.solr.handler.dataimport;
|
||||
|
||||
import org.apache.lucene.document.Document;
|
||||
import org.apache.solr.common.SolrException;
|
||||
import org.apache.solr.common.SolrInputDocument;
|
||||
import org.apache.solr.common.params.CommonParams;
|
||||
import org.apache.solr.common.params.ModifiableSolrParams;
|
||||
import org.apache.solr.common.params.SolrParams;
|
||||
import org.apache.solr.common.util.ContentStreamBase;
|
||||
import org.apache.solr.common.util.NamedList;
|
||||
import org.apache.solr.core.SolrConfig;
|
||||
import org.apache.solr.core.SolrCore;
|
||||
import org.apache.solr.core.SolrResourceLoader;
|
||||
import org.apache.solr.handler.RequestHandlerBase;
|
||||
import org.apache.solr.handler.RequestHandlerUtils;
|
||||
import org.apache.solr.request.RawResponseWriter;
|
||||
import org.apache.solr.request.SolrQueryRequest;
|
||||
import org.apache.solr.request.SolrQueryResponse;
|
||||
import org.apache.solr.schema.IndexSchema;
|
||||
import org.apache.solr.update.DocumentBuilder;
|
||||
import org.apache.solr.update.UpdateHandler;
|
||||
import org.apache.solr.util.plugin.SolrCoreAware;
|
||||
|
||||
import java.util.*;
|
||||
import java.util.logging.Level;
|
||||
import java.util.logging.Logger;
|
||||
|
||||
/**
|
||||
* <p>
|
||||
* Solr Request Handler for data import from databases and REST data sources.
|
||||
* </p>
|
||||
* <p>
|
||||
* It is configured in solrconfig.xml
|
||||
* </p>
|
||||
* <p/>
|
||||
* <p>
|
||||
* Refer to <a
|
||||
* href="http://wiki.apache.org/solr/DataImportHandler">http://wiki.apache.org/solr/DataImportHandler</a>
|
||||
* for more details.
|
||||
* </p>
|
||||
* <p/>
|
||||
* <b>This API is experimental and subject to change</b>
|
||||
*
|
||||
* @version $Id$
|
||||
* @since solr 1.3
|
||||
*/
|
||||
public class DataImportHandler extends RequestHandlerBase implements
|
||||
SolrCoreAware {
|
||||
|
||||
private static final Logger LOG = Logger.getLogger(DataImportHandler.class
|
||||
.getName());
|
||||
|
||||
private DataImporter importer;
|
||||
|
||||
private Map<String, String> variables = new HashMap<String, String>();
|
||||
|
||||
@SuppressWarnings("unchecked")
|
||||
private NamedList initArgs;
|
||||
|
||||
private Map<String, Properties> dataSources = new HashMap<String, Properties>();
|
||||
|
||||
private DataImporter.RequestParams requestParams;
|
||||
|
||||
private List<Document> debugDocuments;
|
||||
|
||||
private DebugLogger debugLogger;
|
||||
|
||||
private boolean debugEnabled = true;
|
||||
|
||||
@Override
|
||||
@SuppressWarnings("unchecked")
|
||||
public void init(NamedList args) {
|
||||
super.init(args);
|
||||
|
||||
initArgs = args;
|
||||
}
|
||||
|
||||
@SuppressWarnings("unchecked")
|
||||
public void inform(SolrCore core) {
|
||||
try {
|
||||
String debug = (String) initArgs.get(ENABLE_DEBUG);
|
||||
if (debug != null && "no".equals(debug))
|
||||
debugEnabled = false;
|
||||
NamedList defaults = (NamedList) initArgs.get("defaults");
|
||||
if (defaults != null) {
|
||||
String configLoc = (String) defaults.get("config");
|
||||
if (configLoc != null && configLoc.length() != 0) {
|
||||
processConfiguration(defaults);
|
||||
|
||||
importer = new DataImporter(SolrWriter.getResourceAsString(core
|
||||
.getResourceLoader().openResource(configLoc)), core,
|
||||
dataSources);
|
||||
}
|
||||
}
|
||||
} catch (Throwable e) {
|
||||
SolrConfig.severeErrors.add(e);
|
||||
LOG.log(Level.SEVERE, DataImporter.MSG.LOAD_EXP, e);
|
||||
throw new SolrException(SolrException.ErrorCode.SERVER_ERROR,
|
||||
DataImporter.MSG.INVALID_CONFIG, e);
|
||||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
@SuppressWarnings("unchecked")
|
||||
public void handleRequestBody(SolrQueryRequest req, SolrQueryResponse rsp)
|
||||
throws Exception {
|
||||
rsp.setHttpCaching(false);
|
||||
SolrParams params = req.getParams();
|
||||
requestParams = new DataImporter.RequestParams(getParamsMap(params));
|
||||
String command = requestParams.command;
|
||||
|
||||
if (DataImporter.SHOW_CONF_CMD.equals(command)) {
|
||||
// Modify incoming request params to add wt=raw
|
||||
ModifiableSolrParams rawParams = new ModifiableSolrParams(req.getParams());
|
||||
rawParams.set(CommonParams.WT, "raw");
|
||||
req.setParams(rawParams);
|
||||
String dataConfigFile = defaults.get("config");
|
||||
ContentStreamBase content = new ContentStreamBase.StringStream(SolrWriter
|
||||
.getResourceAsString(req.getCore().getResourceLoader().openResource(
|
||||
dataConfigFile)));
|
||||
rsp.add(RawResponseWriter.CONTENT, content);
|
||||
return;
|
||||
}
|
||||
|
||||
rsp.add("initArgs", initArgs);
|
||||
String message = "";
|
||||
|
||||
if (command != null)
|
||||
rsp.add("command", command);
|
||||
|
||||
if (requestParams.debug) {
|
||||
// Reload the data-config.xml
|
||||
importer = null;
|
||||
if (requestParams.dataConfig != null) {
|
||||
try {
|
||||
processConfiguration((NamedList) initArgs.get("defaults"));
|
||||
importer = new DataImporter(requestParams.dataConfig, req.getCore()
|
||||
, dataSources);
|
||||
} catch (RuntimeException e) {
|
||||
rsp.add("exception", DebugLogger.getStacktraceString(e));
|
||||
importer = null;
|
||||
return;
|
||||
}
|
||||
} else {
|
||||
inform(req.getCore());
|
||||
}
|
||||
message = DataImporter.MSG.CONFIG_RELOADED;
|
||||
}
|
||||
|
||||
// If importer is still null
|
||||
if (importer == null) {
|
||||
rsp.add("status", DataImporter.MSG.NO_INIT);
|
||||
return;
|
||||
}
|
||||
|
||||
if (command != null && DataImporter.ABORT_CMD.equals(command)) {
|
||||
importer.rumCmd(requestParams, null, null);
|
||||
} else if (importer.getStatus() != DataImporter.Status.IDLE) {
|
||||
message = DataImporter.MSG.CMD_RUNNING;
|
||||
} else if (command != null) {
|
||||
if (DataImporter.FULL_IMPORT_CMD.equals(command)
|
||||
|| DataImporter.DELTA_IMPORT_CMD.equals(command)) {
|
||||
UpdateHandler updater = req.getCore().getUpdateHandler();
|
||||
SolrResourceLoader loader = req.getCore().getResourceLoader();
|
||||
SolrWriter sw = getSolrWriter(updater, loader, req
|
||||
.getSchema());
|
||||
|
||||
if (requestParams.debug) {
|
||||
if (debugEnabled) {
|
||||
// Synchronous request for the debug mode
|
||||
importer.rumCmd(requestParams, sw, variables);
|
||||
rsp.add("mode", "debug");
|
||||
rsp.add("documents", debugDocuments);
|
||||
if (debugLogger != null)
|
||||
rsp.add("verbose-output", debugLogger.output);
|
||||
debugLogger = null;
|
||||
debugDocuments = null;
|
||||
} else {
|
||||
message = DataImporter.MSG.DEBUG_NOT_ENABLED;
|
||||
}
|
||||
} else {
|
||||
// Asynchronous request for normal mode
|
||||
importer.runAsync(requestParams, sw, variables);
|
||||
}
|
||||
} else if (DataImporter.RELOAD_CONF_CMD.equals(command)) {
|
||||
importer = null;
|
||||
inform(req.getCore());
|
||||
message = DataImporter.MSG.CONFIG_RELOADED;
|
||||
}
|
||||
}
|
||||
rsp.add("status", importer.getStatus() == DataImporter.Status.IDLE ? "idle"
|
||||
: "busy");
|
||||
rsp.add("importResponse", message);
|
||||
rsp.add("statusMessages", importer.getStatusMessages());
|
||||
|
||||
RequestHandlerUtils.addExperimentalFormatWarning(rsp);
|
||||
}
|
||||
|
||||
private Map<String, Object> getParamsMap(SolrParams params) {
|
||||
Iterator<String> names = params.getParameterNamesIterator();
|
||||
Map<String, Object> result = new HashMap<String, Object>();
|
||||
while (names.hasNext()) {
|
||||
String s = names.next();
|
||||
String[] val = params.getParams(s);
|
||||
if (val == null || val.length < 1)
|
||||
continue;
|
||||
if (val.length == 1)
|
||||
result.put(s, val[0]);
|
||||
else
|
||||
result.put(s, Arrays.asList(val));
|
||||
}
|
||||
return result;
|
||||
}
|
||||
|
||||
@SuppressWarnings("unchecked")
|
||||
private void processConfiguration(NamedList defaults) {
|
||||
if (defaults == null) {
|
||||
LOG
|
||||
.info("No configuration specified in solrconfig.xml for DataImportHandler");
|
||||
return;
|
||||
}
|
||||
|
||||
LOG.info("Processing configuration from solrconfig.xml: " + defaults);
|
||||
|
||||
dataSources = new HashMap<String, Properties>();
|
||||
variables = new HashMap<String, String>();
|
||||
|
||||
int position = 0;
|
||||
|
||||
while (position < defaults.size()) {
|
||||
if (defaults.getName(position) == null)
|
||||
break;
|
||||
|
||||
String name = defaults.getName(position);
|
||||
if (name.equals("datasource")) {
|
||||
NamedList dsConfig = (NamedList) defaults.getVal(position);
|
||||
Properties props = new Properties();
|
||||
for (int i = 0; i < dsConfig.size(); i++)
|
||||
props.put(dsConfig.getName(i), dsConfig.getVal(i));
|
||||
LOG.info("Adding properties to datasource: " + props);
|
||||
dataSources.put((String) dsConfig.get("name"), props);
|
||||
} else if (!name.equals("config")) {
|
||||
String value = (String) defaults.getVal(position);
|
||||
variables.put(name, value);
|
||||
}
|
||||
position++;
|
||||
}
|
||||
}
|
||||
|
||||
private SolrWriter getSolrWriter(final UpdateHandler updater,
|
||||
final SolrResourceLoader loader, final IndexSchema schema) {
|
||||
|
||||
return new SolrWriter(updater, loader.getConfigDir()) {
|
||||
|
||||
@Override
|
||||
public boolean upload(SolrDoc d) {
|
||||
try {
|
||||
Document document = DocumentBuilder.toDocument(
|
||||
((SolrDocumentWrapper) d).doc, schema);
|
||||
if (requestParams.debug) {
|
||||
if (debugDocuments == null)
|
||||
debugDocuments = new ArrayList<Document>();
|
||||
debugDocuments.add(document);
|
||||
if (debugDocuments.size() >= requestParams.rows) {
|
||||
// Abort this operation now
|
||||
importer.getDocBuilder().abort();
|
||||
}
|
||||
}
|
||||
|
||||
return super.upload(document);
|
||||
} catch (RuntimeException e) {
|
||||
LOG.log(Level.SEVERE, "Exception while adding: " + d, e);
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
public void log(int event, String name, Object row) {
|
||||
if (debugLogger == null) {
|
||||
debugLogger = new DebugLogger();
|
||||
}
|
||||
debugLogger.log(event, name, row);
|
||||
}
|
||||
|
||||
|
||||
public Class loadClass(String name) throws ClassNotFoundException {
|
||||
return loader.findClass(name);
|
||||
}
|
||||
|
||||
public SolrDoc getSolrDocInstance() {
|
||||
return new SolrDocumentWrapper();
|
||||
}
|
||||
};
|
||||
}
|
||||
|
||||
static class SolrDocumentWrapper implements SolrWriter.SolrDoc {
|
||||
SolrInputDocument doc;
|
||||
|
||||
public SolrDocumentWrapper() {
|
||||
doc = new SolrInputDocument();
|
||||
}
|
||||
|
||||
public void setDocumentBoost(float boost) {
|
||||
doc.setDocumentBoost(boost);
|
||||
}
|
||||
|
||||
public Object getField(String field) {
|
||||
return doc.getField(field);
|
||||
}
|
||||
|
||||
public void addField(String name, Object value, float boost) {
|
||||
doc.addField(name, value, boost);
|
||||
}
|
||||
|
||||
public String toString() {
|
||||
return doc.toString();
|
||||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
@SuppressWarnings("unchecked")
|
||||
public NamedList getStatistics() {
|
||||
if (importer == null)
|
||||
return super.getStatistics();
|
||||
|
||||
DocBuilder.Statistics cumulative = importer.cumulativeStatistics;
|
||||
NamedList result = new NamedList();
|
||||
|
||||
result.add("Status", importer.getStatus().toString());
|
||||
|
||||
if (importer.docBuilder != null) {
|
||||
DocBuilder.Statistics running = importer.docBuilder.importStatistics;
|
||||
result.add("Documents Processed", running.docCount);
|
||||
result.add("Requests made to DataSource", running.queryCount);
|
||||
result.add("Rows Fetched", running.rowsCount);
|
||||
result.add("Documents Deleted", running.deletedDocCount);
|
||||
result.add("Documents Skipped", running.skipDocCount);
|
||||
}
|
||||
|
||||
result.add(DataImporter.MSG.TOTAL_DOC_PROCESSED, cumulative.docCount);
|
||||
result.add(DataImporter.MSG.TOTAL_QUERIES_EXECUTED, cumulative.queryCount);
|
||||
result.add(DataImporter.MSG.TOTAL_ROWS_EXECUTED, cumulative.rowsCount);
|
||||
result.add(DataImporter.MSG.TOTAL_DOCS_DELETED, cumulative.deletedDocCount);
|
||||
result.add(DataImporter.MSG.TOTAL_DOCS_SKIPPED, cumulative.skipDocCount);
|
||||
|
||||
NamedList requestStatistics = super.getStatistics();
|
||||
if (requestStatistics != null) {
|
||||
for (int i = 0; i < requestStatistics.size(); i++) {
|
||||
result.add(requestStatistics.getName(i), requestStatistics.getVal(i));
|
||||
}
|
||||
}
|
||||
|
||||
return result;
|
||||
}
|
||||
|
||||
// //////////////////////SolrInfoMBeans methods //////////////////////
|
||||
|
||||
@Override
|
||||
public String getDescription() {
|
||||
return DataImporter.MSG.JMX_DESC;
|
||||
}
|
||||
|
||||
@Override
|
||||
public String getSourceId() {
|
||||
return "$Id$";
|
||||
}
|
||||
|
||||
@Override
|
||||
public String getVersion() {
|
||||
return "1.0";
|
||||
}
|
||||
|
||||
@Override
|
||||
public String getSource() {
|
||||
return "$URL$";
|
||||
}
|
||||
|
||||
public static final String ENABLE_DEBUG = "enableDebug";
|
||||
}
|
|
@ -0,0 +1,63 @@
|
|||
/**
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
package org.apache.solr.handler.dataimport;
|
||||
|
||||
/**
|
||||
* <p>
|
||||
* Exception class for all DataImportHandler exceptions
|
||||
* </p>
|
||||
* <p/>
|
||||
* <b>This API is experimental and subject to change</b>
|
||||
* <p/>
|
||||
* $Id$
|
||||
*
|
||||
* @since solr 1.3
|
||||
*/
|
||||
public class DataImportHandlerException extends RuntimeException {
|
||||
private int errCode;
|
||||
|
||||
public boolean debugged = false;
|
||||
|
||||
public static final int SEVERE = 500, WARN = 400, SKIP = 300;
|
||||
|
||||
public DataImportHandlerException(int err) {
|
||||
super();
|
||||
errCode = err;
|
||||
}
|
||||
|
||||
public DataImportHandlerException(int err, String message) {
|
||||
super(message + MSG + SolrWriter.getDocCount());
|
||||
errCode = err;
|
||||
}
|
||||
|
||||
public DataImportHandlerException(int err, String message, Throwable cause) {
|
||||
super(message + MSG + SolrWriter.getDocCount(), cause);
|
||||
errCode = err;
|
||||
}
|
||||
|
||||
public DataImportHandlerException(int err, Throwable cause) {
|
||||
super(cause);
|
||||
errCode = err;
|
||||
}
|
||||
|
||||
public int getErrCode() {
|
||||
return errCode;
|
||||
}
|
||||
|
||||
public static final String MSG = " Processing Documemt # ";
|
||||
}
|
|
@ -0,0 +1,544 @@
|
|||
/**
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
package org.apache.solr.handler.dataimport;
|
||||
|
||||
import org.apache.solr.core.SolrConfig;
|
||||
import org.apache.solr.core.SolrCore;
|
||||
import org.apache.solr.schema.FieldType;
|
||||
import org.apache.solr.schema.IndexSchema;
|
||||
import org.apache.solr.schema.SchemaField;
|
||||
import org.w3c.dom.Document;
|
||||
import org.w3c.dom.Element;
|
||||
import org.xml.sax.InputSource;
|
||||
|
||||
import javax.xml.parsers.DocumentBuilder;
|
||||
import javax.xml.parsers.DocumentBuilderFactory;
|
||||
import java.io.StringReader;
|
||||
import java.text.SimpleDateFormat;
|
||||
import java.util.*;
|
||||
import java.util.concurrent.atomic.AtomicLong;
|
||||
import java.util.logging.Level;
|
||||
import java.util.logging.Logger;
|
||||
|
||||
/**
|
||||
* <p>
|
||||
* Stores all configuration information for pulling and indexing data.
|
||||
* </p>
|
||||
* <p/>
|
||||
* <b>This API is experimental and subject to change</b>
|
||||
*
|
||||
* @version $Id$
|
||||
* @since solr 1.3
|
||||
*/
|
||||
public class DataImporter {
|
||||
|
||||
public enum Status {
|
||||
IDLE, RUNNING_FULL_DUMP, RUNNING_DELTA_DUMP, JOB_FAILED
|
||||
}
|
||||
|
||||
private static final Logger LOG = Logger.getLogger(DataImporter.class
|
||||
.getName());
|
||||
|
||||
private Status status = Status.IDLE;
|
||||
|
||||
private DataConfig config;
|
||||
|
||||
private Date lastIndexTime;
|
||||
|
||||
private Date indexStartTime;
|
||||
|
||||
private Properties store = new Properties();
|
||||
|
||||
private Map<String, Properties> dataSourceProps;
|
||||
|
||||
private IndexSchema schema;
|
||||
|
||||
public DocBuilder docBuilder;
|
||||
|
||||
public DocBuilder.Statistics cumulativeStatistics = new DocBuilder.Statistics();
|
||||
|
||||
public Map<String, Evaluator> evaluators;
|
||||
|
||||
private SolrCore core;
|
||||
|
||||
/**
|
||||
* Only for testing purposes
|
||||
*/
|
||||
DataImporter() {
|
||||
}
|
||||
|
||||
public DataImporter(String dataConfig, SolrCore core,
|
||||
Map<String, Properties> ds) {
|
||||
if (dataConfig == null)
|
||||
throw new DataImportHandlerException(DataImportHandlerException.SEVERE,
|
||||
"Configuration not found");
|
||||
this.core = core;
|
||||
this.schema = core.getSchema();
|
||||
dataSourceProps = ds;
|
||||
loadDataConfig(dataConfig);
|
||||
|
||||
for (DataConfig.Document document : config.documents) {
|
||||
for (DataConfig.Entity e : document.entities) {
|
||||
Map<String, DataConfig.Field> fields = new HashMap<String, DataConfig.Field>();
|
||||
initEntity(e, fields, false);
|
||||
e.implicitFields = new ArrayList<DataConfig.Field>();
|
||||
String errs = verifyWithSchema(fields, e.implicitFields);
|
||||
if (e.implicitFields.isEmpty())
|
||||
e.implicitFields = null;
|
||||
if (errs != null) {
|
||||
throw new DataImportHandlerException(
|
||||
DataImportHandlerException.SEVERE, errs);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
private String verifyWithSchema(Map<String, DataConfig.Field> fields,
|
||||
List<DataConfig.Field> autoFields) {
|
||||
List<String> errors = new ArrayList<String>();
|
||||
Map<String, SchemaField> schemaFields = schema.getFields();
|
||||
for (Map.Entry<String, SchemaField> entry : schemaFields.entrySet()) {
|
||||
SchemaField sf = entry.getValue();
|
||||
if (!fields.containsKey(sf.getName())) {
|
||||
if (sf.isRequired()) {
|
||||
LOG
|
||||
.info(sf.getName()
|
||||
+ " is a required field in SolrSchema . But not found in DataConfig");
|
||||
}
|
||||
autoFields.add(new DataConfig.Field(sf.getName(), sf.multiValued()));
|
||||
}
|
||||
}
|
||||
for (Map.Entry<String, DataConfig.Field> entry : fields.entrySet()) {
|
||||
DataConfig.Field fld = entry.getValue();
|
||||
FieldType fieldType = null;
|
||||
|
||||
try {
|
||||
fieldType = schema.getDynamicFieldType(fld.name);
|
||||
} catch (RuntimeException e) {
|
||||
// Ignore because it may not be a dynamic field
|
||||
}
|
||||
|
||||
if (fld.name != null) {
|
||||
if (schema.getFields().get(fld.name) == null && fieldType == null) {
|
||||
errors
|
||||
.add("The field :"
|
||||
+ fld.name
|
||||
+ " present in DataConfig does not have a counterpart in Solr Schema");
|
||||
}
|
||||
} else if (schema.getFields().get(fld.column) == null
|
||||
&& fieldType == null) {
|
||||
LOG.info("Column : " + fld.column + " is not a schema field");
|
||||
}
|
||||
}
|
||||
|
||||
if (!errors.isEmpty()) {
|
||||
StringBuffer sb = new StringBuffer("There are errors in the Schema\n");
|
||||
for (String error : errors) {
|
||||
sb.append(error).append("\n");
|
||||
}
|
||||
return sb.toString();
|
||||
|
||||
}
|
||||
return null;
|
||||
|
||||
}
|
||||
|
||||
void loadDataConfig(String configFile) {
|
||||
|
||||
try {
|
||||
DocumentBuilder builder = DocumentBuilderFactory.newInstance()
|
||||
.newDocumentBuilder();
|
||||
Document document = builder.parse(new InputSource(new StringReader(
|
||||
configFile)));
|
||||
|
||||
config = new DataConfig();
|
||||
config.readFromXml((Element) document.getElementsByTagName("dataConfig")
|
||||
.item(0));
|
||||
|
||||
LOG.info("Data Configuration loaded successfully");
|
||||
} catch (Exception e) {
|
||||
SolrConfig.severeErrors.add(e);
|
||||
throw new DataImportHandlerException(DataImportHandlerException.SEVERE,
|
||||
"Exception occurred while initializing context", e);
|
||||
}
|
||||
}
|
||||
|
||||
private void initEntity(DataConfig.Entity e,
|
||||
Map<String, DataConfig.Field> fields, boolean docRootFound) {
|
||||
if (e.pk != null)
|
||||
e.primaryKeys = e.pk.split(",");
|
||||
e.allAttributes.put(DATA_SRC, e.dataSource);
|
||||
|
||||
if (!docRootFound && !"false".equals(e.docRoot)) {
|
||||
// if in this chain no document root is found()
|
||||
e.isDocRoot = true;
|
||||
}
|
||||
|
||||
if (e.fields != null) {
|
||||
for (DataConfig.Field f : e.fields) {
|
||||
f.nameOrColName = f.getName();
|
||||
SchemaField schemaField = schema.getFields().get(f.getName());
|
||||
if (schemaField != null) {
|
||||
f.multiValued = schemaField.multiValued();
|
||||
f.allAttributes.put(MULTI_VALUED, Boolean.toString(schemaField
|
||||
.multiValued()));
|
||||
f.allAttributes.put(TYPE, schemaField.getType().getTypeName());
|
||||
f.allAttributes.put("indexed", Boolean
|
||||
.toString(schemaField.indexed()));
|
||||
f.allAttributes.put("stored", Boolean.toString(schemaField.stored()));
|
||||
f.allAttributes.put("defaultValue", schemaField.getDefaultValue());
|
||||
} else {
|
||||
|
||||
try {
|
||||
f.allAttributes.put(TYPE, schema.getDynamicFieldType(f.getName())
|
||||
.getTypeName());
|
||||
f.allAttributes.put(MULTI_VALUED, "true");
|
||||
f.multiValued = true;
|
||||
} catch (RuntimeException e2) {
|
||||
LOG.info("Field in data-config.xml - " + f.getName()
|
||||
+ " not found in schema.xml");
|
||||
f.toWrite = false;
|
||||
}
|
||||
}
|
||||
fields.put(f.getName(), f);
|
||||
f.entity = e;
|
||||
f.allAttributes.put("boost", f.boost.toString());
|
||||
f.allAttributes.put("toWrite", Boolean.toString(f.toWrite));
|
||||
e.allFieldsList.add(Collections.unmodifiableMap(f.allAttributes));
|
||||
}
|
||||
}
|
||||
e.allFieldsList = Collections.unmodifiableList(e.allFieldsList);
|
||||
e.allAttributes = Collections.unmodifiableMap(e.allAttributes);
|
||||
|
||||
addDataSource(e);
|
||||
|
||||
if (e.entities == null)
|
||||
return;
|
||||
for (DataConfig.Entity e1 : e.entities) {
|
||||
e1.parentEntity = e;
|
||||
initEntity(e1, fields, e.isDocRoot || docRootFound);
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
public DataConfig getConfig() {
|
||||
return config;
|
||||
}
|
||||
|
||||
public Date getIndexStartTime() {
|
||||
return indexStartTime;
|
||||
}
|
||||
|
||||
public void setIndexStartTime(Date indextStartTime) {
|
||||
this.indexStartTime = indextStartTime;
|
||||
}
|
||||
|
||||
public Date getLastIndexTime() {
|
||||
return lastIndexTime;
|
||||
}
|
||||
|
||||
public void setLastIndexTime(Date lastIndexTime) {
|
||||
this.lastIndexTime = lastIndexTime;
|
||||
}
|
||||
|
||||
public void store(Object key, Object value) {
|
||||
store.put(key, value);
|
||||
}
|
||||
|
||||
public Object retrieve(Object key) {
|
||||
return store.get(key);
|
||||
}
|
||||
|
||||
@SuppressWarnings("unchecked")
|
||||
public void addDataSource(DataConfig.Entity key) {
|
||||
if ("null".equals(key.dataSource)) {
|
||||
key.dataSrc = new MockDataSource();
|
||||
return;
|
||||
}
|
||||
key.dataSrc = getDataSourceInstance(key);
|
||||
}
|
||||
|
||||
DataSource getDataSourceInstance(DataConfig.Entity key) {
|
||||
Properties p = dataSourceProps.get(key.dataSource);
|
||||
if (p == null)
|
||||
p = config.dataSources.get(key.dataSource);
|
||||
if (p == null)
|
||||
p = dataSourceProps.get(null);// for default data source
|
||||
if (p == null)
|
||||
p = config.dataSources.get(null);
|
||||
if (p == null)
|
||||
throw new DataImportHandlerException(DataImportHandlerException.SEVERE,
|
||||
"No dataSource :" + key.dataSource + " available for entity :"
|
||||
+ key.name);
|
||||
String impl = p.getProperty(TYPE);
|
||||
DataSource dataSrc = null;
|
||||
if (impl == null) {
|
||||
dataSrc = new JdbcDataSource();
|
||||
} else {
|
||||
try {
|
||||
dataSrc = (DataSource) DocBuilder.loadClass(impl).newInstance();
|
||||
} catch (Exception e) {
|
||||
throw new DataImportHandlerException(DataImportHandlerException.SEVERE,
|
||||
"Invalid type for data source: " + impl, e);
|
||||
}
|
||||
}
|
||||
try {
|
||||
Properties copyProps = new Properties();
|
||||
copyProps.putAll(p);
|
||||
dataSrc.init(new ContextImpl(key, null, dataSrc, 0,
|
||||
Collections.EMPTY_MAP, new HashMap(), null, this), copyProps);
|
||||
} catch (Exception e) {
|
||||
throw new DataImportHandlerException(DataImportHandlerException.SEVERE,
|
||||
"Failed to initialize DataSource: " + key.dataSource, e);
|
||||
}
|
||||
return dataSrc;
|
||||
}
|
||||
|
||||
public Status getStatus() {
|
||||
return status;
|
||||
}
|
||||
|
||||
public void setStatus(Status status) {
|
||||
this.status = status;
|
||||
}
|
||||
|
||||
public void doFullImport(SolrWriter writer, RequestParams requestParams,
|
||||
Map<String, String> variables) {
|
||||
LOG.info("Starting Full Import");
|
||||
setStatus(Status.RUNNING_FULL_DUMP);
|
||||
|
||||
if (requestParams.commit)
|
||||
setIndexStartTime(new Date());
|
||||
|
||||
try {
|
||||
if (requestParams.clean)
|
||||
writer.doDeleteAll();
|
||||
docBuilder = new DocBuilder(this, writer, requestParams, variables);
|
||||
docBuilder.execute(getConfig().documents.get(0).name);
|
||||
if (!requestParams.debug)
|
||||
cumulativeStatistics.add(docBuilder.importStatistics);
|
||||
} catch (RuntimeException e) {
|
||||
LOG.log(Level.SEVERE, "Full Import failed", e);
|
||||
} finally {
|
||||
setStatus(Status.IDLE);
|
||||
config.clearCaches();
|
||||
DocBuilder.INSTANCE.set(null);
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
public void doDeltaImport(SolrWriter writer, RequestParams requestParams,
|
||||
Map<String, String> variables) {
|
||||
LOG.info("Starting Delta Import");
|
||||
setStatus(Status.RUNNING_DELTA_DUMP);
|
||||
|
||||
try {
|
||||
if (requestParams.commit) {
|
||||
Date lastModified = writer.loadIndexStartTime();
|
||||
setIndexStartTime(new Date());
|
||||
setLastIndexTime(lastModified);
|
||||
}
|
||||
docBuilder = new DocBuilder(this, writer, requestParams, variables);
|
||||
docBuilder.execute(config.documents.get(0).name);
|
||||
if (!requestParams.debug)
|
||||
cumulativeStatistics.add(docBuilder.importStatistics);
|
||||
} catch (RuntimeException e) {
|
||||
LOG.log(Level.SEVERE, "Delta Import Failed", e);
|
||||
} finally {
|
||||
setStatus(Status.IDLE);
|
||||
config.clearCaches();
|
||||
DocBuilder.INSTANCE.set(null);
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
public void runAsync(final RequestParams reqParams, final SolrWriter sw,
|
||||
final Map<String, String> variables) {
|
||||
new Thread() {
|
||||
@Override
|
||||
public void run() {
|
||||
rumCmd(reqParams, sw, variables);
|
||||
}
|
||||
}.start();
|
||||
}
|
||||
|
||||
void rumCmd(RequestParams reqParams, SolrWriter sw,
|
||||
Map<String, String> variables) {
|
||||
String command = reqParams.command;
|
||||
if (command.equals("full-import")) {
|
||||
doFullImport(sw, reqParams, variables);
|
||||
} else if (command.equals(DELTA_IMPORT_CMD)) {
|
||||
doDeltaImport(sw, reqParams, variables);
|
||||
} else if (command.equals(ABORT_CMD)) {
|
||||
if (docBuilder != null)
|
||||
docBuilder.abort();
|
||||
}
|
||||
}
|
||||
|
||||
@SuppressWarnings("unchecked")
|
||||
Map<String, String> getStatusMessages() {
|
||||
Map statusMessages = (Map) retrieve(STATUS_MSGS);
|
||||
Map<String, String> result = new LinkedHashMap<String, String>();
|
||||
if (statusMessages != null) {
|
||||
for (Object o : statusMessages.entrySet()) {
|
||||
Map.Entry e = (Map.Entry) o;
|
||||
result.put((String) e.getKey(), e.getValue().toString());
|
||||
}
|
||||
}
|
||||
return result;
|
||||
|
||||
}
|
||||
|
||||
public DocBuilder getDocBuilder() {
|
||||
return docBuilder;
|
||||
}
|
||||
|
||||
public static final ThreadLocal<AtomicLong> QUERY_COUNT = new ThreadLocal<AtomicLong>() {
|
||||
protected AtomicLong initialValue() {
|
||||
return new AtomicLong();
|
||||
}
|
||||
};
|
||||
|
||||
static final SimpleDateFormat DATE_TIME_FORMAT = new SimpleDateFormat(
|
||||
"yyyy-MM-dd HH:mm:ss");
|
||||
|
||||
static final class MSG {
|
||||
public static final String NO_CONFIG_FOUND = "Configuration not found";
|
||||
|
||||
public static final String NO_INIT = "DataImportHandler started. Not Initialized. No commands can be run";
|
||||
|
||||
public static final String INVALID_CONFIG = "FATAL: Could not create importer. DataImporter config invalid";
|
||||
|
||||
public static final String LOAD_EXP = "Exception while loading DataImporter";
|
||||
|
||||
public static final String JMX_DESC = "Manage data import from databases to Solr";
|
||||
|
||||
public static final String CMD_RUNNING = "A command is still running...";
|
||||
|
||||
public static final String DEBUG_NOT_ENABLED = "Debug not enabled. Add a tag <str name=\"enableDebug\">true</str> in solrconfig.xml";
|
||||
|
||||
public static final String CONFIG_RELOADED = "Configuration Re-loaded sucessfully";
|
||||
|
||||
public static final String TOTAL_DOC_PROCESSED = "Total Documents Processed";
|
||||
|
||||
public static final String TOTAL_QUERIES_EXECUTED = "Total Requests made to DataSource";
|
||||
|
||||
public static final String TOTAL_ROWS_EXECUTED = "Total Rows Fetched";
|
||||
|
||||
public static final String TOTAL_DOCS_DELETED = "Total Documents Deleted";
|
||||
|
||||
public static final String TOTAL_DOCS_SKIPPED = "Total Documents Skipped";
|
||||
}
|
||||
|
||||
static final class RequestParams {
|
||||
public String command = null;
|
||||
|
||||
public boolean debug = false;
|
||||
|
||||
public boolean verbose = false;
|
||||
|
||||
public boolean commit = true;
|
||||
|
||||
public boolean optimize = true;
|
||||
|
||||
public int start = 0;
|
||||
|
||||
public int rows = 10;
|
||||
|
||||
public boolean clean = true;
|
||||
|
||||
public List<String> entities;
|
||||
|
||||
public Map<String, Object> requestParams;
|
||||
|
||||
public String dataConfig;
|
||||
|
||||
public RequestParams() {
|
||||
}
|
||||
|
||||
public RequestParams(Map<String, Object> requestParams) {
|
||||
if (requestParams.containsKey("command"))
|
||||
command = (String) requestParams.get("command");
|
||||
|
||||
if ("on".equals(requestParams.get("debug"))) {
|
||||
debug = true;
|
||||
// Set default values suitable for debug mode
|
||||
commit = false;
|
||||
clean = false;
|
||||
verbose = "true".equals(requestParams.get("verbose"))
|
||||
|| "on".equals(requestParams.get("verbose"));
|
||||
}
|
||||
if (requestParams.containsKey("commit"))
|
||||
commit = Boolean.parseBoolean((String) requestParams.get("commit"));
|
||||
if (requestParams.containsKey("start"))
|
||||
start = Integer.parseInt((String) requestParams.get("start"));
|
||||
if (requestParams.containsKey("rows"))
|
||||
rows = Integer.parseInt((String) requestParams.get("rows"));
|
||||
if (requestParams.containsKey("clean"))
|
||||
clean = Boolean.parseBoolean((String) requestParams.get("clean"));
|
||||
if (requestParams.containsKey("optimize"))
|
||||
optimize = Boolean.parseBoolean((String) requestParams.get("optimize"));
|
||||
|
||||
Object o = requestParams.get("entity");
|
||||
|
||||
if (o != null && o instanceof String) {
|
||||
entities = new ArrayList<String>();
|
||||
entities.add((String) o);
|
||||
} else if (o != null && o instanceof List) {
|
||||
entities = (List<String>) requestParams.get("entity");
|
||||
}
|
||||
|
||||
dataConfig = (String) requestParams.get("dataConfig");
|
||||
if (dataConfig != null && dataConfig.trim().length() == 0) {
|
||||
// Empty data-config param is not valid, change it to null
|
||||
dataConfig = null;
|
||||
}
|
||||
|
||||
this.requestParams = requestParams;
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
public SolrCore getCore() {
|
||||
return core;
|
||||
}
|
||||
|
||||
public static final String COLUMN = "column";
|
||||
|
||||
public static final String TYPE = "type";
|
||||
|
||||
public static final String DATA_SRC = "dataSource";
|
||||
|
||||
public static final String MULTI_VALUED = "multiValued";
|
||||
|
||||
public static final String NAME = "name";
|
||||
|
||||
public static final String STATUS_MSGS = "status-messages";
|
||||
|
||||
public static final String FULL_IMPORT_CMD = "full-import";
|
||||
|
||||
public static final String DELTA_IMPORT_CMD = "delta-import";
|
||||
|
||||
public static final String ABORT_CMD = "abort";
|
||||
|
||||
public static final String DEBUG_MODE = "debug";
|
||||
|
||||
public static final String RELOAD_CONF_CMD = "reload-config";
|
||||
|
||||
public static final String SHOW_CONF_CMD = "show-config";
|
||||
}
|
|
@ -0,0 +1,72 @@
|
|||
/**
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
package org.apache.solr.handler.dataimport;
|
||||
|
||||
import java.util.Properties;
|
||||
|
||||
/**
|
||||
* <p>
|
||||
* Provides data from a source with a given query.
|
||||
* </p>
|
||||
* <p/>
|
||||
* <p>
|
||||
* Implementation of this interface must provide a default no-arg constructor
|
||||
* </p>
|
||||
* <p/>
|
||||
* <p>
|
||||
* Refer to <a
|
||||
* href="http://wiki.apache.org/solr/DataImportHandler">http://wiki.apache.org/solr/DataImportHandler</a>
|
||||
* for more details.
|
||||
* </p>
|
||||
* <p/>
|
||||
* <b>This API is experimental and may change in the future.</b>
|
||||
*
|
||||
* @version $Id$
|
||||
* @since solr 1.3
|
||||
*/
|
||||
public abstract class DataSource<T> {
|
||||
|
||||
/**
|
||||
* Initializes the DataSource with the <code>Context</code> and
|
||||
* initialization properties.
|
||||
* <p/>
|
||||
* This is invoked by the <code>DataImporter</code> after creating an
|
||||
* instance of this class.
|
||||
*
|
||||
* @param context
|
||||
* @param initProps
|
||||
*/
|
||||
public abstract void init(Context context, Properties initProps);
|
||||
|
||||
/**
|
||||
* Get records for the given query.The return type depends on the
|
||||
* implementation .
|
||||
*
|
||||
* @param query The query string. It can be a SQL for JdbcDataSource or a URL
|
||||
* for HttpDataSource or a file location for FileDataSource or a custom
|
||||
* format for your own custom DataSource.
|
||||
* @return Depends on the implementation. For instance JdbcDataSource returns
|
||||
* an Iterator<Map <String,Object>>
|
||||
*/
|
||||
public abstract T getData(String query);
|
||||
|
||||
/**
|
||||
* Cleans up resources of this DataSource after use.
|
||||
*/
|
||||
public abstract void close();
|
||||
}
|
|
@ -0,0 +1,87 @@
|
|||
/**
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
package org.apache.solr.handler.dataimport;
|
||||
|
||||
import java.text.ParseException;
|
||||
import java.text.SimpleDateFormat;
|
||||
import java.util.ArrayList;
|
||||
import java.util.Date;
|
||||
import java.util.List;
|
||||
import java.util.Map;
|
||||
import java.util.logging.Level;
|
||||
import java.util.logging.Logger;
|
||||
|
||||
/**
|
||||
* <p>
|
||||
* Transformer instance which creates Date instances out of Strings.
|
||||
* </p>
|
||||
* <p/>
|
||||
* <p>
|
||||
* Refer to <a
|
||||
* href="http://wiki.apache.org/solr/DataImportHandler">http://wiki.apache.org/solr/DataImportHandler</a>
|
||||
* for more details.
|
||||
* </p>
|
||||
* <p/>
|
||||
* <b>This API is experimental and subject to change</b>
|
||||
*
|
||||
* @version $Id$
|
||||
* @since solr 1.3
|
||||
*/
|
||||
public class DateFormatTransformer extends Transformer {
|
||||
private static final Logger LOG = Logger
|
||||
.getLogger(DateFormatTransformer.class.getName());
|
||||
|
||||
@SuppressWarnings("unchecked")
|
||||
public Object transformRow(Map<String, Object> aRow, Context context) {
|
||||
for (Map<String, String> map : context.getAllEntityFields()) {
|
||||
String fmt = map.get(DATE_TIME_FMT);
|
||||
if (fmt == null)
|
||||
continue;
|
||||
String column = map.get(DataImporter.COLUMN);
|
||||
String srcCol = map.get(RegexTransformer.SRC_COL_NAME);
|
||||
if (srcCol == null)
|
||||
srcCol = column;
|
||||
try {
|
||||
Object o = aRow.get(srcCol);
|
||||
if (o instanceof List) {
|
||||
List<String> inputs = (List<String>) o;
|
||||
List<Date> results = new ArrayList<Date>();
|
||||
for (String input : inputs) {
|
||||
results.add(process(input, fmt));
|
||||
}
|
||||
aRow.put(column, results);
|
||||
} else {
|
||||
String value = (String) o;
|
||||
aRow.put(column, process(value, fmt));
|
||||
}
|
||||
} catch (ParseException e) {
|
||||
LOG.log(Level.WARNING, "Could not parse a Date field ", e);
|
||||
}
|
||||
}
|
||||
return aRow;
|
||||
}
|
||||
|
||||
private Date process(String value, String format) throws ParseException {
|
||||
if (value == null || value.trim().length() == 0)
|
||||
return null;
|
||||
|
||||
return new SimpleDateFormat(format).parse(value);
|
||||
}
|
||||
|
||||
public static final String DATE_TIME_FMT = "dateTimeFormat";
|
||||
}
|
|
@ -0,0 +1,274 @@
|
|||
package org.apache.solr.handler.dataimport;
|
||||
|
||||
import org.apache.solr.common.util.NamedList;
|
||||
|
||||
import java.io.PrintWriter;
|
||||
import java.io.StringWriter;
|
||||
import java.text.MessageFormat;
|
||||
import java.util.List;
|
||||
import java.util.Map;
|
||||
import java.util.Properties;
|
||||
import java.util.Stack;
|
||||
|
||||
/**
|
||||
* <p>
|
||||
* Implements most of the interactive development functionality
|
||||
* </p>
|
||||
* <p/>
|
||||
* <p>
|
||||
* Refer to <a
|
||||
* href="http://wiki.apache.org/solr/DataImportHandler">http://wiki.apache.org/solr/DataImportHandler</a>
|
||||
* for more details.
|
||||
* </p>
|
||||
* <p/>
|
||||
* <b>This API is experimental and subject to change</b>
|
||||
*
|
||||
* @version $Id$
|
||||
* @since solr 1.3
|
||||
*/
|
||||
public class DebugLogger {
|
||||
private Stack<DebugInfo> debugStack;
|
||||
|
||||
NamedList output;
|
||||
|
||||
private static final String LINE = "---------------------------------------------";
|
||||
|
||||
private MessageFormat fmt = new MessageFormat(
|
||||
"----------- row #{0}-------------");
|
||||
|
||||
boolean enabled = true;
|
||||
|
||||
public DebugLogger() {
|
||||
output = new NamedList();
|
||||
debugStack = new Stack<DebugInfo>() {
|
||||
|
||||
public DebugInfo pop() {
|
||||
if (size() == 1)
|
||||
throw new DataImportHandlerException(
|
||||
DataImportHandlerException.SEVERE, "Stack is becoming empty");
|
||||
return super.pop();
|
||||
}
|
||||
};
|
||||
debugStack.push(new DebugInfo(null, -1, null));
|
||||
output = debugStack.peek().lst;
|
||||
}
|
||||
|
||||
private DebugInfo peekStack() {
|
||||
return debugStack.isEmpty() ? null : debugStack.peek();
|
||||
}
|
||||
|
||||
public void log(int event, String name, Object row) {
|
||||
if (event == SolrWriter.DISABLE_LOGGING) {
|
||||
enabled = false;
|
||||
return;
|
||||
} else if (event == SolrWriter.ENABLE_LOGGING) {
|
||||
enabled = true;
|
||||
return;
|
||||
}
|
||||
|
||||
if (!enabled && event != SolrWriter.START_ENTITY
|
||||
&& event != SolrWriter.END_ENTITY) {
|
||||
return;
|
||||
}
|
||||
|
||||
if (event == SolrWriter.START_DOC) {
|
||||
debugStack.push(new DebugInfo(null, SolrWriter.START_DOC, peekStack()));
|
||||
} else if (SolrWriter.START_ENTITY == event) {
|
||||
debugStack
|
||||
.push(new DebugInfo(name, SolrWriter.START_ENTITY, peekStack()));
|
||||
} else if (SolrWriter.ENTITY_OUT == event
|
||||
|| SolrWriter.PRE_TRANSFORMER_ROW == event) {
|
||||
if (debugStack.peek().type == SolrWriter.START_ENTITY
|
||||
|| debugStack.peek().type == SolrWriter.START_DOC) {
|
||||
debugStack.peek().lst.add(null, fmt.format(new Object[]{++debugStack
|
||||
.peek().rowCount}));
|
||||
addToNamedList(debugStack.peek().lst, row);
|
||||
debugStack.peek().lst.add(null, LINE);
|
||||
}
|
||||
} else if (event == SolrWriter.ROW_END) {
|
||||
popAllTransformers();
|
||||
} else if (SolrWriter.END_ENTITY == event) {
|
||||
while (debugStack.pop().type != SolrWriter.START_ENTITY)
|
||||
;
|
||||
} else if (SolrWriter.END_DOC == event) {
|
||||
while (debugStack.pop().type != SolrWriter.START_DOC)
|
||||
;
|
||||
} else if (event == SolrWriter.TRANSFORMER_EXCEPTION) {
|
||||
debugStack.push(new DebugInfo(name, event, peekStack()));
|
||||
debugStack.peek().lst.add("EXCEPTION",
|
||||
getStacktraceString((Exception) row));
|
||||
} else if (SolrWriter.TRANSFORMED_ROW == event) {
|
||||
debugStack.push(new DebugInfo(name, event, peekStack()));
|
||||
debugStack.peek().lst.add(null, LINE);
|
||||
addToNamedList(debugStack.peek().lst, row);
|
||||
debugStack.peek().lst.add(null, LINE);
|
||||
if (row instanceof DataImportHandlerException) {
|
||||
DataImportHandlerException dataImportHandlerException = (DataImportHandlerException) row;
|
||||
dataImportHandlerException.debugged = true;
|
||||
}
|
||||
} else if (SolrWriter.ENTITY_META == event) {
|
||||
popAllTransformers();
|
||||
debugStack.peek().lst.add(name, row);
|
||||
} else if (SolrWriter.ENTITY_EXCEPTION == event) {
|
||||
if (row instanceof DataImportHandlerException) {
|
||||
DataImportHandlerException dihe = (DataImportHandlerException) row;
|
||||
if (dihe.debugged)
|
||||
return;
|
||||
dihe.debugged = true;
|
||||
}
|
||||
|
||||
popAllTransformers();
|
||||
debugStack.peek().lst.add("EXCEPTION",
|
||||
getStacktraceString((Exception) row));
|
||||
}
|
||||
}
|
||||
|
||||
private void popAllTransformers() {
|
||||
while (true) {
|
||||
int type = debugStack.peek().type;
|
||||
if (type == SolrWriter.START_DOC || type == SolrWriter.START_ENTITY)
|
||||
break;
|
||||
debugStack.pop();
|
||||
}
|
||||
}
|
||||
|
||||
private void addToNamedList(NamedList nl, Object row) {
|
||||
if (row instanceof List) {
|
||||
List list = (List) row;
|
||||
NamedList l = new NamedList();
|
||||
nl.add(null, l);
|
||||
for (Object o : list) {
|
||||
Map<String, Object> map = (Map<String, Object>) o;
|
||||
for (Map.Entry<String, Object> entry : map.entrySet())
|
||||
nl.add(entry.getKey(), entry.getValue());
|
||||
}
|
||||
} else if (row instanceof Map) {
|
||||
Map<String, Object> map = (Map<String, Object>) row;
|
||||
for (Map.Entry<String, Object> entry : map.entrySet())
|
||||
nl.add(entry.getKey(), entry.getValue());
|
||||
}
|
||||
}
|
||||
|
||||
static DataSource wrapDs(final DataSource ds) {
|
||||
final SolrWriter writer = DocBuilder.INSTANCE.get().writer;
|
||||
return new DataSource() {
|
||||
public void init(Context context, Properties initProps) {
|
||||
ds.init(context, initProps);
|
||||
}
|
||||
|
||||
public void close() {
|
||||
ds.close();
|
||||
}
|
||||
|
||||
public Object getData(String query) {
|
||||
writer.log(SolrWriter.ENTITY_META, "query", query);
|
||||
long start = System.currentTimeMillis();
|
||||
try {
|
||||
return ds.getData(query);
|
||||
} catch (DataImportHandlerException de) {
|
||||
DocBuilder.INSTANCE.get().writer.log(SolrWriter.ENTITY_EXCEPTION,
|
||||
null, de);
|
||||
throw de;
|
||||
} catch (Exception e) {
|
||||
DocBuilder.INSTANCE.get().writer.log(SolrWriter.ENTITY_EXCEPTION,
|
||||
null, e);
|
||||
DataImportHandlerException de = new DataImportHandlerException(
|
||||
DataImportHandlerException.SEVERE, "", e);
|
||||
de.debugged = true;
|
||||
throw de;
|
||||
} finally {
|
||||
writer.log(SolrWriter.ENTITY_META, "time-taken", DocBuilder
|
||||
.getTimeElapsedSince(start));
|
||||
}
|
||||
}
|
||||
};
|
||||
}
|
||||
|
||||
static Transformer wrapTransformer(final Transformer t) {
|
||||
if (DocBuilder.INSTANCE.get() != null
|
||||
&& DocBuilder.INSTANCE.get().verboseDebug) {
|
||||
return new Transformer() {
|
||||
public Object transformRow(Map<String, Object> row, Context context) {
|
||||
DocBuilder.INSTANCE.get().writer.log(SolrWriter.PRE_TRANSFORMER_ROW,
|
||||
null, row);
|
||||
String tName = getTransformerName(t);
|
||||
Object result = null;
|
||||
try {
|
||||
result = t.transformRow(row, context);
|
||||
DocBuilder.INSTANCE.get().writer.log(SolrWriter.TRANSFORMED_ROW,
|
||||
tName, result);
|
||||
} catch (DataImportHandlerException de) {
|
||||
DocBuilder.INSTANCE.get().writer.log(
|
||||
SolrWriter.TRANSFORMER_EXCEPTION, tName, de);
|
||||
de.debugged = true;
|
||||
throw de;
|
||||
} catch (Exception e) {
|
||||
DocBuilder.INSTANCE.get().writer.log(
|
||||
SolrWriter.TRANSFORMER_EXCEPTION, tName, e);
|
||||
DataImportHandlerException de = new DataImportHandlerException(
|
||||
DataImportHandlerException.SEVERE, "", e);
|
||||
de.debugged = true;
|
||||
throw de;
|
||||
}
|
||||
return result;
|
||||
}
|
||||
};
|
||||
} else {
|
||||
return t;
|
||||
}
|
||||
}
|
||||
|
||||
public static String getStacktraceString(Exception e) {
|
||||
StringWriter sw = new StringWriter();
|
||||
e.printStackTrace(new PrintWriter(sw));
|
||||
return sw.toString();
|
||||
}
|
||||
|
||||
static String getTransformerName(Transformer t) {
|
||||
Class transClass = t.getClass();
|
||||
if (t instanceof EntityProcessorBase.ReflectionTransformer) {
|
||||
return ((EntityProcessorBase.ReflectionTransformer) t).trans;
|
||||
}
|
||||
if (t instanceof ScriptTransformer) {
|
||||
ScriptTransformer scriptTransformer = (ScriptTransformer) t;
|
||||
return "script:" + scriptTransformer.getFunctionName();
|
||||
}
|
||||
if (transClass.getPackage().equals(DebugLogger.class.getPackage())) {
|
||||
return transClass.getSimpleName();
|
||||
} else {
|
||||
return transClass.getName();
|
||||
}
|
||||
}
|
||||
|
||||
private static class DebugInfo {
|
||||
String name;
|
||||
|
||||
int tCount, rowCount;
|
||||
|
||||
NamedList lst;
|
||||
|
||||
int type;
|
||||
|
||||
DebugInfo parent;
|
||||
|
||||
public DebugInfo(String name, int type, DebugInfo parent) {
|
||||
this.name = name;
|
||||
this.type = type;
|
||||
this.parent = parent;
|
||||
lst = new NamedList();
|
||||
if (parent != null) {
|
||||
String displayName = null;
|
||||
if (type == SolrWriter.START_ENTITY) {
|
||||
displayName = "entity:" + name;
|
||||
} else if (type == SolrWriter.TRANSFORMED_ROW
|
||||
|| type == SolrWriter.TRANSFORMER_EXCEPTION) {
|
||||
displayName = "transformer:" + name;
|
||||
} else if (type == SolrWriter.START_DOC) {
|
||||
name = displayName = "document#" + SolrWriter.getDocCount();
|
||||
}
|
||||
parent.lst.add(displayName, lst);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
}
|
|
@ -0,0 +1,614 @@
|
|||
/**
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
package org.apache.solr.handler.dataimport;
|
||||
|
||||
import java.util.*;
|
||||
import java.util.concurrent.atomic.AtomicBoolean;
|
||||
import java.util.concurrent.atomic.AtomicInteger;
|
||||
import java.util.concurrent.atomic.AtomicLong;
|
||||
import java.util.logging.Level;
|
||||
import java.util.logging.Logger;
|
||||
|
||||
/**
|
||||
* <p>
|
||||
* DocBuilder is responsible for creating Solr documents out of the given
|
||||
* configuration. It also maintains statistics information. It depends on the
|
||||
* EntityProcessor implementations to fetch data.
|
||||
* </p>
|
||||
* <p/>
|
||||
* <b>This API is experimental and subject to change</b>
|
||||
*
|
||||
* @version $Id$
|
||||
* @since solr 1.3
|
||||
*/
|
||||
public class DocBuilder {
|
||||
public static final String DOC_BOOST = "$docBoost";
|
||||
|
||||
private static final Logger LOG = Logger
|
||||
.getLogger(DocBuilder.class.getName());
|
||||
|
||||
private DataImporter dataImporter;
|
||||
|
||||
private DataConfig.Document document;
|
||||
|
||||
private DataConfig.Entity root;
|
||||
|
||||
@SuppressWarnings("unchecked")
|
||||
private Map statusMessages = new LinkedHashMap();
|
||||
|
||||
public Statistics importStatistics = new Statistics();
|
||||
|
||||
SolrWriter writer;
|
||||
|
||||
DataImporter.RequestParams requestParameters;
|
||||
|
||||
boolean verboseDebug = false;
|
||||
|
||||
private Map<String, String> defaultVariables;
|
||||
|
||||
private Map<String, Object> session = new HashMap<String, Object>();
|
||||
|
||||
static final ThreadLocal<DocBuilder> INSTANCE = new ThreadLocal<DocBuilder>();
|
||||
|
||||
public DocBuilder(DataImporter context, SolrWriter writer,
|
||||
DataImporter.RequestParams reqParams, Map<String, String> variables) {
|
||||
INSTANCE.set(this);
|
||||
this.dataImporter = context;
|
||||
this.writer = writer;
|
||||
DataImporter.QUERY_COUNT.set(importStatistics.queryCount);
|
||||
requestParameters = reqParams;
|
||||
verboseDebug = requestParameters.debug && requestParameters.verbose;
|
||||
defaultVariables = Collections.unmodifiableMap(variables);
|
||||
}
|
||||
|
||||
public VariableResolverImpl getVariableResolver(DataImporter context) {
|
||||
VariableResolverImpl resolver = new VariableResolverImpl();
|
||||
Map<String, Object> indexerNamespace = new HashMap<String, Object>();
|
||||
if (context.getLastIndexTime() != null)
|
||||
indexerNamespace.put(LAST_INDEX_TIME, DataImporter.DATE_TIME_FORMAT
|
||||
.format(context.getLastIndexTime()));
|
||||
indexerNamespace.put(INDEX_START_TIME, context.getIndexStartTime());
|
||||
indexerNamespace.put("request", requestParameters);
|
||||
indexerNamespace.put("defaults", defaultVariables);
|
||||
indexerNamespace.put("functions", EvaluatorBag.getFunctionsNamespace(resolver,
|
||||
dataImporter.getConfig().evaluators));
|
||||
if (context.getConfig().script != null) {
|
||||
indexerNamespace
|
||||
.put(DataConfig.SCRIPT, context.getConfig().script.script);
|
||||
indexerNamespace.put(DataConfig.SCRIPT_LANG,
|
||||
context.getConfig().script.language);
|
||||
}
|
||||
resolver.addNamespace(DataConfig.IMPORTER_NS, indexerNamespace);
|
||||
return resolver;
|
||||
}
|
||||
|
||||
@SuppressWarnings("unchecked")
|
||||
public void execute(String docName) {
|
||||
dataImporter.store(DataImporter.STATUS_MSGS, statusMessages);
|
||||
document = dataImporter.getConfig().getDocumentByName(docName);
|
||||
if (document == null)
|
||||
return;
|
||||
final AtomicLong startTime = new AtomicLong(System.currentTimeMillis());
|
||||
statusMessages.put(TIME_ELAPSED, new Object() {
|
||||
public String toString() {
|
||||
return getTimeElapsedSince(startTime.get());
|
||||
}
|
||||
});
|
||||
|
||||
statusMessages.put(DataImporter.MSG.TOTAL_QUERIES_EXECUTED,
|
||||
importStatistics.queryCount);
|
||||
statusMessages.put(DataImporter.MSG.TOTAL_ROWS_EXECUTED,
|
||||
importStatistics.rowsCount);
|
||||
statusMessages.put(DataImporter.MSG.TOTAL_DOC_PROCESSED,
|
||||
importStatistics.docCount);
|
||||
statusMessages.put(DataImporter.MSG.TOTAL_DOCS_SKIPPED,
|
||||
importStatistics.skipDocCount);
|
||||
|
||||
List<String> entities = requestParameters.entities;
|
||||
|
||||
for (DataConfig.Entity e : document.entities) {
|
||||
if (entities != null && !entities.contains(e.name))
|
||||
continue;
|
||||
|
||||
root = e;
|
||||
if (dataImporter.getStatus() == DataImporter.Status.RUNNING_DELTA_DUMP
|
||||
&& dataImporter.getLastIndexTime() != null) {
|
||||
doDelta();
|
||||
|
||||
} else {
|
||||
doFullDump();
|
||||
}
|
||||
statusMessages.remove(DataImporter.MSG.TOTAL_DOC_PROCESSED);
|
||||
}
|
||||
|
||||
if (stop.get()) {
|
||||
if (DataImporter.ABORT_CMD.equals(requestParameters.command)) {
|
||||
// Dont commit if aborted using command=abort
|
||||
statusMessages.put("Aborted", DataImporter.DATE_TIME_FORMAT
|
||||
.format(new Date()));
|
||||
} else if (requestParameters.commit) {
|
||||
// Debug mode, commit if commit=true was specified
|
||||
commit();
|
||||
}
|
||||
} else {
|
||||
// Finished operation normally, commit now
|
||||
commit();
|
||||
}
|
||||
|
||||
statusMessages.remove(TIME_ELAPSED);
|
||||
statusMessages.put("Time taken ", getTimeElapsedSince(startTime.get()));
|
||||
LOG.info("Time taken = " + getTimeElapsedSince(startTime.get()));
|
||||
}
|
||||
|
||||
@SuppressWarnings("unchecked")
|
||||
private void commit() {
|
||||
if (requestParameters.commit)
|
||||
writer.persistIndexStartTime(dataImporter.getIndexStartTime());
|
||||
LOG.info("Full Import completed successfully");
|
||||
statusMessages.put("", "Indexing completed. Added/Updated: "
|
||||
+ importStatistics.docCount + " documents. Deleted "
|
||||
+ importStatistics.deletedDocCount + " documents.");
|
||||
writer.commit(requestParameters.optimize);
|
||||
addStatusMessage("Committed");
|
||||
if (requestParameters.optimize)
|
||||
addStatusMessage("Optimized");
|
||||
|
||||
}
|
||||
|
||||
@SuppressWarnings("unchecked")
|
||||
private void doFullDump() {
|
||||
addStatusMessage("Full Dump Started");
|
||||
buildDocument(getVariableResolver(dataImporter), null, null, root, true,
|
||||
null);
|
||||
}
|
||||
|
||||
@SuppressWarnings("unchecked")
|
||||
private void doDelta() {
|
||||
addStatusMessage("Delta Dump started");
|
||||
VariableResolverImpl resolver = getVariableResolver(dataImporter);
|
||||
|
||||
if (document.deleteQuery != null) {
|
||||
writer.deleteByQuery(document.deleteQuery);
|
||||
}
|
||||
|
||||
addStatusMessage("Identifying Delta");
|
||||
LOG.info("Starting delta collection.");
|
||||
Set<Map<String, Object>> deletedKeys = new HashSet<Map<String, Object>>();
|
||||
Set<Map<String, Object>> allPks = collectDelta(root, null, resolver,
|
||||
dataImporter, deletedKeys);
|
||||
if (stop.get())
|
||||
return;
|
||||
addStatusMessage("Deltas Obtained");
|
||||
addStatusMessage("Building documents");
|
||||
if (!deletedKeys.isEmpty()) {
|
||||
deleteAll(deletedKeys);
|
||||
importStatistics.deletedDocCount.addAndGet(deletedKeys.size());
|
||||
// Make sure that documents are not re-created
|
||||
allPks.removeAll(deletedKeys);
|
||||
}
|
||||
|
||||
statusMessages.put("Total Changed Documents", allPks.size());
|
||||
for (Map<String, Object> pk : allPks) {
|
||||
VariableResolverImpl vri = getVariableResolver(dataImporter);
|
||||
vri.addNamespace(DataConfig.IMPORTER_NS + ".delta", pk);
|
||||
buildDocument(vri, null, pk, root, true, null);
|
||||
}
|
||||
|
||||
if (!stop.get()) {
|
||||
writer.persistIndexStartTime(dataImporter.getIndexStartTime());
|
||||
LOG.info("Delta Import completed successfully");
|
||||
}
|
||||
}
|
||||
|
||||
private void deleteAll(Set<Map<String, Object>> deletedKeys) {
|
||||
LOG.info("Deleting stale documents ");
|
||||
for (Map<String, Object> deletedKey : deletedKeys) {
|
||||
writer.deleteDoc(deletedKey.get(root.pk));
|
||||
}
|
||||
}
|
||||
|
||||
@SuppressWarnings("unchecked")
|
||||
public void addStatusMessage(String msg) {
|
||||
statusMessages.put(msg, DataImporter.DATE_TIME_FORMAT.format(new Date()));
|
||||
}
|
||||
|
||||
@SuppressWarnings("unchecked")
|
||||
private void buildDocument(VariableResolverImpl vr, SolrWriter.SolrDoc doc,
|
||||
Map<String, Object> pk, DataConfig.Entity entity, boolean isRoot,
|
||||
ContextImpl parentCtx) {
|
||||
|
||||
EntityProcessor entityProcessor = getEntityProcessor(entity);
|
||||
DataSource ds = entity.dataSrc;
|
||||
if (verboseDebug) {
|
||||
ds = DebugLogger.wrapDs(ds);
|
||||
}
|
||||
ContextImpl ctx = new ContextImpl(entity, vr, ds,
|
||||
pk == null ? Context.FULL_DUMP : Context.DELTA_DUMP,
|
||||
requestParameters.requestParams, session, parentCtx, dataImporter);
|
||||
entityProcessor.init(ctx);
|
||||
|
||||
if (requestParameters.start > 0) {
|
||||
writer.log(SolrWriter.DISABLE_LOGGING, null, null);
|
||||
}
|
||||
|
||||
if (verboseDebug) {
|
||||
writer.log(SolrWriter.START_ENTITY, entity.name, null);
|
||||
}
|
||||
|
||||
int seenDocCount = 0;
|
||||
|
||||
try {
|
||||
while (true) {
|
||||
if (stop.get())
|
||||
return;
|
||||
try {
|
||||
seenDocCount++;
|
||||
|
||||
if (seenDocCount > requestParameters.start) {
|
||||
writer.log(SolrWriter.ENABLE_LOGGING, null, null);
|
||||
}
|
||||
|
||||
if (verboseDebug && entity.isDocRoot) {
|
||||
writer.log(SolrWriter.START_DOC, entity.name, null);
|
||||
}
|
||||
if (doc == null && entity.isDocRoot) {
|
||||
if (ctx.getDocSession() != null)
|
||||
ctx.getDocSession().clear();
|
||||
else
|
||||
ctx.setDocSession(new HashMap<String, Object>());
|
||||
doc = writer.getSolrDocInstance();
|
||||
DataConfig.Entity e = entity;
|
||||
while (e.parentEntity != null) {
|
||||
addFields(e.parentEntity, doc, (Map<String, Object>) vr
|
||||
.resolve(e.parentEntity.name));
|
||||
e = e.parentEntity;
|
||||
}
|
||||
}
|
||||
|
||||
Map<String, Object> arow = entityProcessor.nextRow();
|
||||
if (arow == null)
|
||||
break;
|
||||
|
||||
if (arow.containsKey(DOC_BOOST)) {
|
||||
setDocumentBoost(doc, arow);
|
||||
}
|
||||
|
||||
// Support for start parameter in debug mode
|
||||
if (entity.isDocRoot) {
|
||||
if (seenDocCount <= requestParameters.start)
|
||||
continue;
|
||||
}
|
||||
|
||||
if (verboseDebug) {
|
||||
writer.log(SolrWriter.ENTITY_OUT, entity.name, arow);
|
||||
}
|
||||
importStatistics.rowsCount.incrementAndGet();
|
||||
if (entity.fields != null && doc != null) {
|
||||
addFields(entity, doc, arow);
|
||||
}
|
||||
if (isRoot)
|
||||
vr.removeNamespace(null);
|
||||
if (entity.entities != null) {
|
||||
vr.addNamespace(entity.name, arow);
|
||||
for (DataConfig.Entity child : entity.entities) {
|
||||
buildDocument(vr, doc, null, child, false, ctx);
|
||||
}
|
||||
vr.removeNamespace(entity.name);
|
||||
}
|
||||
|
||||
if (entity.isDocRoot) {
|
||||
if (stop.get())
|
||||
return;
|
||||
boolean result = writer.upload(doc);
|
||||
doc = null;
|
||||
if (result)
|
||||
importStatistics.docCount.incrementAndGet();
|
||||
}
|
||||
|
||||
} catch (DataImportHandlerException e) {
|
||||
if (verboseDebug) {
|
||||
writer.log(SolrWriter.ENTITY_EXCEPTION, entity.name, e);
|
||||
}
|
||||
if (isRoot) {
|
||||
if (e.getErrCode() == DataImportHandlerException.SKIP) {
|
||||
importStatistics.skipDocCount.getAndIncrement();
|
||||
} else {
|
||||
LOG.log(Level.SEVERE, "Exception while processing: "
|
||||
+ entity.name + " document : " + doc, e);
|
||||
}
|
||||
if (e.getErrCode() == DataImportHandlerException.SEVERE)
|
||||
throw e;
|
||||
} else
|
||||
throw e;
|
||||
} finally {
|
||||
if (verboseDebug) {
|
||||
writer.log(SolrWriter.ROW_END, entity.name, null);
|
||||
if (entity.isDocRoot)
|
||||
writer.log(SolrWriter.END_DOC, null, null);
|
||||
}
|
||||
}
|
||||
}
|
||||
} finally {
|
||||
if (verboseDebug) {
|
||||
writer.log(SolrWriter.END_ENTITY, null, null);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
private void setDocumentBoost(SolrWriter.SolrDoc doc, Map<String, Object> arow) {
|
||||
Object v = arow.get(DOC_BOOST);
|
||||
float value = 1.0f;
|
||||
if (v instanceof Number) {
|
||||
value = ((Number) v).floatValue();
|
||||
} else {
|
||||
value = Float.parseFloat(v.toString());
|
||||
}
|
||||
doc.setDocumentBoost(value);
|
||||
}
|
||||
|
||||
@SuppressWarnings("unchecked")
|
||||
private void addFields(DataConfig.Entity entity, SolrWriter.SolrDoc doc,
|
||||
Map<String, Object> arow) {
|
||||
DataConfig.Entity parentMost = entity;
|
||||
while (parentMost.parentEntity != null)
|
||||
parentMost = parentMost.parentEntity;
|
||||
for (DataConfig.Field field : entity.fields) {
|
||||
addFieldValue(field, arow, null, doc);
|
||||
}
|
||||
if (parentMost.implicitFields != null) {
|
||||
Map<String, Object> lowerCaseMap = new HashMap<String, Object>();
|
||||
for (Map.Entry<String, Object> entry : arow.entrySet())
|
||||
lowerCaseMap.put(entry.getKey().toLowerCase(), entry.getValue());
|
||||
|
||||
for (DataConfig.Field automaticField : parentMost.implicitFields) {
|
||||
addFieldValue(automaticField, arow, lowerCaseMap, doc);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
private void addFieldValue(DataConfig.Field field, Map<String, Object> arow,
|
||||
Map<String, Object> lowerCaseMap, SolrWriter.SolrDoc doc) {
|
||||
if (!field.toWrite)
|
||||
return;
|
||||
Object value = arow.get(field.column);
|
||||
if (value == null) {
|
||||
if (lowerCaseMap != null) {
|
||||
value = lowerCaseMap.get(field.column.toLowerCase());
|
||||
}
|
||||
if (value == null)
|
||||
return;
|
||||
}
|
||||
|
||||
if (value instanceof Collection) {
|
||||
Collection collection = (Collection) value;
|
||||
if (field.multiValued) {
|
||||
for (Object o : collection) {
|
||||
doc.addField(field.nameOrColName, o, field.boost);
|
||||
}
|
||||
} else {
|
||||
if (doc.getField(field.nameOrColName) == null)
|
||||
for (Object o : collection) {
|
||||
doc.addField(field.nameOrColName, o, field.boost);
|
||||
break;
|
||||
}
|
||||
}
|
||||
} else if (field.multiValued) {
|
||||
doc.addField(field.nameOrColName, value, field.boost);
|
||||
} else {
|
||||
if (doc.getField(field.nameOrColName) == null)
|
||||
doc.addField(field.nameOrColName, value, field.boost);
|
||||
}
|
||||
}
|
||||
|
||||
public static EntityProcessor getEntityProcessor(DataConfig.Entity entity) {
|
||||
if (entity.processor != null)
|
||||
return entity.processor;
|
||||
EntityProcessor entityProcessor;
|
||||
if (entity.proc == null) {
|
||||
entityProcessor = new SqlEntityProcessor();
|
||||
} else {
|
||||
try {
|
||||
entityProcessor = (EntityProcessor) loadClass(entity.proc)
|
||||
.newInstance();
|
||||
} catch (Exception e) {
|
||||
throw new DataImportHandlerException(DataImportHandlerException.SEVERE,
|
||||
"Unable to load EntityProcessor implementation for entity:"
|
||||
+ entity.name, e);
|
||||
}
|
||||
}
|
||||
return entity.processor = entityProcessor;
|
||||
}
|
||||
|
||||
/**
|
||||
* <p>
|
||||
* Collects unique keys of all Solr documents for whom one or more source
|
||||
* tables have been changed since the last indexed time.
|
||||
* </p>
|
||||
* <p>
|
||||
* Note: In our definition, unique key of Solr document is the primary key of
|
||||
* the top level entity (unless skipped using docRoot=false) in the Solr
|
||||
* document in data-config.xml
|
||||
* </p>
|
||||
*
|
||||
* @return an iterator to the list of keys for which Solr documents should be
|
||||
* updated.
|
||||
*/
|
||||
@SuppressWarnings("unchecked")
|
||||
public Set<Map<String, Object>> collectDelta(DataConfig.Entity entity,
|
||||
DataConfig.Entity parentEntity, VariableResolverImpl resolver,
|
||||
DataImporter context, Set<Map<String, Object>> deletedRows) {
|
||||
if (stop.get())
|
||||
return new HashSet();
|
||||
|
||||
Set<Map<String, Object>> myModifiedPks = new HashSet<Map<String, Object>>();
|
||||
|
||||
if (entity.entities != null) {
|
||||
|
||||
for (DataConfig.Entity entity1 : entity.entities) {
|
||||
myModifiedPks.addAll(collectDelta(entity1, entity, resolver, context,
|
||||
deletedRows));
|
||||
}
|
||||
|
||||
}
|
||||
// identifying the modified rows for this entities
|
||||
|
||||
Set<Map<String, Object>> deltaSet = new HashSet<Map<String, Object>>();
|
||||
resolver.addNamespace(null, (Map) entity.allAttributes);
|
||||
EntityProcessor entityProcessor = getEntityProcessor(entity);
|
||||
entityProcessor.init(new ContextImpl(entity, resolver, entity.dataSrc,
|
||||
Context.FIND_DELTA, requestParameters.requestParams, session, null,
|
||||
dataImporter));
|
||||
LOG.info("Running ModifiedRowKey() for Entity: " + entity.name);
|
||||
int count = 0;
|
||||
while (true) {
|
||||
Map<String, Object> row = entityProcessor.nextModifiedRowKey();
|
||||
|
||||
if (row == null)
|
||||
break;
|
||||
|
||||
deltaSet.add(row);
|
||||
count++;
|
||||
importStatistics.rowsCount.incrementAndGet();
|
||||
}
|
||||
LOG.info("Completed ModifiedRowKey for Entity: " + entity.name
|
||||
+ " rows obtained : " + count);
|
||||
count = 0;
|
||||
// identifying the deleted rows from this entities
|
||||
LOG.info("Running DeletedRowKey() for Entity: " + entity.name);
|
||||
Set<Map<String, Object>> deletedSet = new HashSet<Map<String, Object>>();
|
||||
while (true) {
|
||||
Map<String, Object> row = entityProcessor.nextDeletedRowKey();
|
||||
if (row == null)
|
||||
break;
|
||||
|
||||
deletedSet.add(row);
|
||||
count++;
|
||||
importStatistics.rowsCount.incrementAndGet();
|
||||
}
|
||||
LOG.info("Completed DeletedRowKey for Entity: " + entity.name
|
||||
+ " rows obtained : " + count);
|
||||
|
||||
myModifiedPks.addAll(deltaSet);
|
||||
Set<Map<String, Object>> parentKeyList = new HashSet<Map<String, Object>>();
|
||||
if (parentEntity != null && parentEntity.isDocRoot) {
|
||||
EntityProcessor parentEntityProcessor = getEntityProcessor(parentEntity);
|
||||
parentEntityProcessor.init(new ContextImpl(parentEntity, resolver,
|
||||
parentEntity.dataSrc, Context.FIND_DELTA,
|
||||
requestParameters.requestParams, session, null, dataImporter));
|
||||
// identifying deleted rows with deltas
|
||||
|
||||
for (Map<String, Object> row : myModifiedPks)
|
||||
getModifiedParentRows(resolver.addNamespace(entity.name, row),
|
||||
entity.name, parentEntityProcessor, parentKeyList);
|
||||
// running the same for deletedrows
|
||||
for (Map<String, Object> row : deletedSet) {
|
||||
getModifiedParentRows(resolver.addNamespace(entity.name, row),
|
||||
entity.name, parentEntityProcessor, parentKeyList);
|
||||
}
|
||||
}
|
||||
LOG.info("Completed parentDeltaQuery for Entity: " + entity.name);
|
||||
if (entity.isDocRoot)
|
||||
deletedRows.addAll(deletedSet);
|
||||
|
||||
return entity.isDocRoot ? myModifiedPks : new HashSet<Map<String, Object>>(
|
||||
parentKeyList);
|
||||
}
|
||||
|
||||
private void getModifiedParentRows(VariableResolverImpl resolver,
|
||||
String entity, EntityProcessor entityProcessor,
|
||||
Set<Map<String, Object>> parentKeyList) {
|
||||
try {
|
||||
while (true) {
|
||||
Map<String, Object> parentRow = entityProcessor
|
||||
.nextModifiedParentRowKey();
|
||||
if (parentRow == null)
|
||||
break;
|
||||
|
||||
parentKeyList.add(parentRow);
|
||||
importStatistics.rowsCount.incrementAndGet();
|
||||
}
|
||||
|
||||
} finally {
|
||||
resolver.removeNamespace(entity);
|
||||
}
|
||||
}
|
||||
|
||||
public void abort() {
|
||||
stop.set(true);
|
||||
}
|
||||
|
||||
private AtomicBoolean stop = new AtomicBoolean(false);
|
||||
|
||||
public static final String TIME_ELAPSED = "Time Elapsed";
|
||||
|
||||
public static void main(String[] args) throws InterruptedException {
|
||||
long l = System.currentTimeMillis();
|
||||
Thread.sleep(1050);
|
||||
System.out.println(getTimeElapsedSince(l));
|
||||
|
||||
}
|
||||
|
||||
static String getTimeElapsedSince(long l) {
|
||||
l = System.currentTimeMillis() - l;
|
||||
return (l / (60000 * 60)) % 60 + ":" + (l / 60000) % 60 + ":" + (l / 1000)
|
||||
% 60 + "." + l % 1000;
|
||||
}
|
||||
|
||||
@SuppressWarnings("unchecked")
|
||||
static Class loadClass(String name) throws ClassNotFoundException {
|
||||
DocBuilder inst = INSTANCE.get();
|
||||
try {
|
||||
return inst != null ?
|
||||
inst.writer.loadClass(name) :
|
||||
Class.forName(name);
|
||||
} catch (ClassNotFoundException e) {
|
||||
try {
|
||||
String n = DocBuilder.class.getPackage().getName() + "." + name;
|
||||
return inst != null ?
|
||||
inst.writer.loadClass(n) :
|
||||
Class.forName(n);
|
||||
} catch (ClassNotFoundException e1) {
|
||||
throw e;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
public static class Statistics {
|
||||
public AtomicInteger docCount = new AtomicInteger();
|
||||
|
||||
public AtomicInteger deletedDocCount = new AtomicInteger();
|
||||
|
||||
public AtomicLong rowsCount = new AtomicLong();
|
||||
|
||||
public AtomicLong queryCount = new AtomicLong();
|
||||
|
||||
public AtomicLong skipDocCount = new AtomicLong();
|
||||
|
||||
public Statistics add(Statistics stats) {
|
||||
this.docCount.addAndGet(stats.docCount.get());
|
||||
this.deletedDocCount.addAndGet(stats.deletedDocCount.get());
|
||||
this.rowsCount.addAndGet(stats.rowsCount.get());
|
||||
this.queryCount.addAndGet(stats.queryCount.get());
|
||||
|
||||
return this;
|
||||
}
|
||||
}
|
||||
|
||||
public static final String LAST_INDEX_TIME = "last_index_time";
|
||||
public static final String INDEX_START_TIME = "index_start_time";
|
||||
}
|
|
@ -0,0 +1,96 @@
|
|||
/**
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
package org.apache.solr.handler.dataimport;
|
||||
|
||||
import java.util.Map;
|
||||
|
||||
/**
|
||||
* <p>
|
||||
* An instance of entity processor serves an entity. It is reused throughout the
|
||||
* import process.
|
||||
* </p>
|
||||
* <p/>
|
||||
* <p>
|
||||
* Implementations of this interface must provide a public no-args constructor.
|
||||
* </p>
|
||||
* <p/>
|
||||
* <p>
|
||||
* Refer to <a
|
||||
* href="http://wiki.apache.org/solr/DataImportHandler">http://wiki.apache.org/solr/DataImportHandler</a>
|
||||
* for more details.
|
||||
* </p>
|
||||
* <p/>
|
||||
* <b>This API is experimental and may change in the future.</b>
|
||||
*
|
||||
* @version $Id$
|
||||
* @since solr 1.3
|
||||
*/
|
||||
public abstract class EntityProcessor {
|
||||
|
||||
/**
|
||||
* This method is called when it starts processing an entity. When it comes
|
||||
* back to the entity it is called again. So it can reset anything at that point.
|
||||
* For a rootmost entity this is called only once for an ingestion. For sub-entities , this
|
||||
* is called multiple once for each row from its parent entity
|
||||
*
|
||||
* @param context The current context
|
||||
*/
|
||||
public abstract void init(Context context);
|
||||
|
||||
/**
|
||||
* This method helps streaming the data for each row . The implementation
|
||||
* would fetch as many rows as needed and gives one 'row' at a time. Only this
|
||||
* method is used during a full import
|
||||
*
|
||||
* @return A 'row' . The 'key' for the map is the column name and the 'value'
|
||||
* is the value of that column. If there are no more rows to be
|
||||
* returned, return 'null'
|
||||
*/
|
||||
public abstract Map<String, Object> nextRow();
|
||||
|
||||
/**
|
||||
* This is used for delta-import. It gives the pks of the changed rows in this
|
||||
* entity
|
||||
*
|
||||
* @return the pk vs value of all changed rows
|
||||
*/
|
||||
public abstract Map<String, Object> nextModifiedRowKey();
|
||||
|
||||
/**
|
||||
* This is used during delta-import. It gives the primary keys of the rows
|
||||
* that are deleted from this entity. If this entity is the root entity, solr
|
||||
* document is deleted. If this is a sub-entity, the solr document is
|
||||
* considered as 'changed' and will be recreated
|
||||
*
|
||||
* @return the pk vs value of all changed rows
|
||||
*/
|
||||
public abstract Map<String, Object> nextDeletedRowKey();
|
||||
|
||||
/**
|
||||
* This is used during delta-import. This gives the primary keys and their
|
||||
* values of all the rows changed in a parent entity due to changes in this
|
||||
* entity.
|
||||
*
|
||||
* @return the pk vs value of all changed rows in the parent entity
|
||||
*/
|
||||
public abstract Map<String, Object> nextModifiedParentRowKey();
|
||||
|
||||
/**
|
||||
* Invoked when the Entity processor is detroyed. towards the end of injestion. Called only once
|
||||
*/
|
||||
public abstract void destroy();
|
||||
}
|
|
@ -0,0 +1,423 @@
|
|||
/**
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
package org.apache.solr.handler.dataimport;
|
||||
|
||||
import java.lang.reflect.Method;
|
||||
import java.util.*;
|
||||
import java.util.logging.Level;
|
||||
import java.util.logging.Logger;
|
||||
|
||||
/**
|
||||
* <p>
|
||||
* Base class for all implementations of EntityProcessor
|
||||
* </p>
|
||||
* <p/>
|
||||
* <p>
|
||||
* Most implementations of EntityProcessor extend this base class which provides
|
||||
* common functionality.
|
||||
* </p>
|
||||
* <p/>
|
||||
* <b>This API is experimental and subject to change</b>
|
||||
*
|
||||
* @version $Id$
|
||||
* @since solr 1.3
|
||||
*/
|
||||
public class EntityProcessorBase extends EntityProcessor {
|
||||
private static final Logger LOG = Logger.getLogger(EntityProcessorBase.class
|
||||
.getName());
|
||||
|
||||
protected String entityName;
|
||||
|
||||
protected Context context;
|
||||
|
||||
protected VariableResolverImpl resolver;
|
||||
|
||||
protected Iterator<Map<String, Object>> rowIterator;
|
||||
|
||||
protected List<Transformer> transformers;
|
||||
|
||||
protected List<Map<String, Object>> rowcache;
|
||||
|
||||
protected String query;
|
||||
|
||||
@SuppressWarnings("unchecked")
|
||||
private Map session;
|
||||
|
||||
public void init(Context context) {
|
||||
rowIterator = null;
|
||||
rowcache = null;
|
||||
this.context = context;
|
||||
entityName = context.getEntityAttribute("name");
|
||||
resolver = (VariableResolverImpl) context.getVariableResolver();
|
||||
query = null;
|
||||
session = null;
|
||||
|
||||
}
|
||||
|
||||
@SuppressWarnings("unchecked")
|
||||
void loadTransformers() {
|
||||
String transClasses = context.getEntityAttribute(TRANSFORMER);
|
||||
|
||||
if (transClasses == null) {
|
||||
transformers = Collections.EMPTY_LIST;
|
||||
return;
|
||||
}
|
||||
|
||||
String[] transArr = transClasses.split(",");
|
||||
transformers = new ArrayList<Transformer>() {
|
||||
public boolean add(Transformer transformer) {
|
||||
return super.add(DebugLogger.wrapTransformer(transformer));
|
||||
}
|
||||
};
|
||||
for (String aTransArr : transArr) {
|
||||
String trans = aTransArr.trim();
|
||||
if (trans.startsWith("script:")) {
|
||||
String functionName = trans.substring("script:".length());
|
||||
ScriptTransformer scriptTransformer = new ScriptTransformer();
|
||||
scriptTransformer.setFunctionName(functionName);
|
||||
transformers.add(scriptTransformer);
|
||||
continue;
|
||||
}
|
||||
try {
|
||||
Class clazz = DocBuilder.loadClass(trans);
|
||||
if (clazz.newInstance() instanceof Transformer) {
|
||||
transformers.add((Transformer) clazz.newInstance());
|
||||
} else {
|
||||
final Method meth = clazz.getMethod(TRANSFORM_ROW, Map.class);
|
||||
if (meth == null) {
|
||||
String msg = "Transformer :"
|
||||
+ trans
|
||||
+ "does not implement Transformer interface or does not have a transformRow(Map m)method";
|
||||
LOG.log(Level.SEVERE, msg);
|
||||
throw new DataImportHandlerException(
|
||||
DataImportHandlerException.SEVERE, msg);
|
||||
}
|
||||
transformers.add(new ReflectionTransformer(meth, clazz, trans));
|
||||
}
|
||||
} catch (Exception e) {
|
||||
LOG.log(Level.SEVERE, "Unable to load Transformer: " + aTransArr, e);
|
||||
throw new DataImportHandlerException(DataImportHandlerException.SEVERE,
|
||||
e);
|
||||
}
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
@SuppressWarnings("unchecked")
|
||||
static class ReflectionTransformer extends Transformer {
|
||||
final Method meth;
|
||||
|
||||
final Class clazz;
|
||||
|
||||
final String trans;
|
||||
|
||||
final Object o;
|
||||
|
||||
public ReflectionTransformer(Method meth, Class clazz, String trans)
|
||||
throws Exception {
|
||||
this.meth = meth;
|
||||
this.clazz = clazz;
|
||||
this.trans = trans;
|
||||
o = clazz.newInstance();
|
||||
}
|
||||
|
||||
public Object transformRow(Map<String, Object> aRow, Context context) {
|
||||
try {
|
||||
return meth.invoke(o, aRow);
|
||||
} catch (Exception e) {
|
||||
LOG.log(Level.WARNING, "method invocation failed on transformer : "
|
||||
+ trans, e);
|
||||
throw new DataImportHandlerException(DataImportHandlerException.WARN, e);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
protected Map<String, Object> getFromRowCache() {
|
||||
Map<String, Object> r = rowcache.remove(0);
|
||||
if (rowcache.isEmpty())
|
||||
rowcache = null;
|
||||
return r;
|
||||
}
|
||||
|
||||
@SuppressWarnings("unchecked")
|
||||
protected Map<String, Object> applyTransformer(Map<String, Object> row) {
|
||||
if (transformers == null)
|
||||
loadTransformers();
|
||||
if (transformers == Collections.EMPTY_LIST)
|
||||
return row;
|
||||
Map<String, Object> transformedRow = row;
|
||||
List<Map<String, Object>> rows = null;
|
||||
for (Transformer t : transformers) {
|
||||
try {
|
||||
if (rows != null) {
|
||||
List<Map<String, Object>> tmpRows = new ArrayList<Map<String, Object>>();
|
||||
for (Map<String, Object> map : rows) {
|
||||
Object o = t.transformRow(map, context);
|
||||
if (o == null)
|
||||
continue;
|
||||
if (o instanceof Map) {
|
||||
Map oMap = (Map) o;
|
||||
checkSkipDoc(oMap, t);
|
||||
tmpRows.add((Map) o);
|
||||
} else if (o instanceof List) {
|
||||
tmpRows.addAll((List) o);
|
||||
} else {
|
||||
LOG
|
||||
.log(Level.SEVERE,
|
||||
"Transformer must return Map<String, Object> or a List<Map<String, Object>>");
|
||||
}
|
||||
}
|
||||
rows = tmpRows;
|
||||
} else {
|
||||
Object o = t.transformRow(transformedRow, context);
|
||||
if (o == null)
|
||||
return null;
|
||||
if (o instanceof Map) {
|
||||
Map oMap = (Map) o;
|
||||
checkSkipDoc(oMap, t);
|
||||
transformedRow = (Map) o;
|
||||
} else if (o instanceof List) {
|
||||
rows = (List) o;
|
||||
} else {
|
||||
LOG
|
||||
.log(Level.SEVERE,
|
||||
"Transformer must return Map<String, Object> or a List<Map<String, Object>>");
|
||||
}
|
||||
}
|
||||
|
||||
} catch (DataImportHandlerException e) {
|
||||
throw e;
|
||||
} catch (Exception e) {
|
||||
LOG.log(Level.WARNING, "transformer threw error", e);
|
||||
throw new DataImportHandlerException(DataImportHandlerException.WARN, e);
|
||||
}
|
||||
}
|
||||
if (rows == null) {
|
||||
return transformedRow;
|
||||
} else {
|
||||
rowcache = rows;
|
||||
return getFromRowCache();
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
private void checkSkipDoc(Map oMap, Transformer t) {
|
||||
if (oMap.get(SKIP_DOC) != null
|
||||
&& Boolean.parseBoolean(oMap.get(SKIP_DOC).toString()))
|
||||
throw new DataImportHandlerException(DataImportHandlerException.SKIP,
|
||||
"Document skipped by: " + DebugLogger.getTransformerName(t));
|
||||
}
|
||||
|
||||
protected Map<String, Object> getNext() {
|
||||
try {
|
||||
if (rowIterator == null)
|
||||
return null;
|
||||
if (rowIterator.hasNext())
|
||||
return rowIterator.next();
|
||||
rowIterator = null;
|
||||
query = null;
|
||||
return null;
|
||||
} catch (Exception e) {
|
||||
LOG.log(Level.SEVERE, "getNext() failed for query '" + query + "'", e);
|
||||
rowIterator = null;
|
||||
query = null;
|
||||
throw new DataImportHandlerException(DataImportHandlerException.WARN, e);
|
||||
}
|
||||
}
|
||||
|
||||
public Map<String, Object> nextModifiedRowKey() {
|
||||
return null;
|
||||
}
|
||||
|
||||
public Map<String, Object> nextDeletedRowKey() {
|
||||
return null;
|
||||
}
|
||||
|
||||
public Map<String, Object> nextModifiedParentRowKey() {
|
||||
return null;
|
||||
}
|
||||
|
||||
public void setSessionAttribute(Object key, Object val) {
|
||||
if (session == null) {
|
||||
session = new HashMap();
|
||||
}
|
||||
session.put(key, val);
|
||||
}
|
||||
|
||||
public Object getSessionAttribute(Object key) {
|
||||
if (session == null)
|
||||
return null;
|
||||
return session.get(key);
|
||||
}
|
||||
|
||||
/**
|
||||
* For a simple implementation, this is the only method that the sub-class
|
||||
* should implement. This is intended to stream rows one-by-one. Return null
|
||||
* to signal end of rows
|
||||
*
|
||||
* @return a row where the key is the name of the field and value can be any
|
||||
* Object or a Collection of objects. Return null to signal end of
|
||||
* rows
|
||||
*/
|
||||
public Map<String, Object> nextRow() {
|
||||
return null;// do not do anything
|
||||
}
|
||||
|
||||
|
||||
public void destroy() {
|
||||
/*no op*/
|
||||
}
|
||||
|
||||
/**
|
||||
* Clears the internal session maintained by this EntityProcessor
|
||||
*/
|
||||
public void clearSession() {
|
||||
if (session != null)
|
||||
session.clear();
|
||||
}
|
||||
|
||||
/**
|
||||
* Only used by cache implementations
|
||||
*/
|
||||
protected String cachePk;
|
||||
|
||||
/**
|
||||
* Only used by cache implementations
|
||||
*/
|
||||
protected String cacheVariableName;
|
||||
|
||||
/**
|
||||
* Only used by cache implementations
|
||||
*/
|
||||
protected Map<String, List<Map<String, Object>>> simpleCache;
|
||||
|
||||
/**
|
||||
* Only used by cache implementations
|
||||
*/
|
||||
protected Map<String, Map<Object, List<Map<String, Object>>>> cacheWithWhereClause;
|
||||
|
||||
protected List<Map<String, Object>> dataSourceRowCache;
|
||||
|
||||
/**
|
||||
* Only used by cache implementations
|
||||
*/
|
||||
protected void cacheInit() {
|
||||
if (simpleCache != null || cacheWithWhereClause != null)
|
||||
return;
|
||||
String where = context.getEntityAttribute("where");
|
||||
if (where == null) {
|
||||
simpleCache = new HashMap<String, List<Map<String, Object>>>();
|
||||
} else {
|
||||
String[] splits = where.split("=");
|
||||
cachePk = splits[0];
|
||||
cacheVariableName = splits[1].trim();
|
||||
cacheWithWhereClause = new HashMap<String, Map<Object, List<Map<String, Object>>>>();
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* If the where clause is present the cache is sql Vs Map of key Vs List of
|
||||
* Rows. Only used by cache implementations.
|
||||
*
|
||||
* @param query
|
||||
* @return
|
||||
*/
|
||||
protected Map<String, Object> getIdCacheData(String query) {
|
||||
Map<Object, List<Map<String, Object>>> rowIdVsRows = cacheWithWhereClause
|
||||
.get(query);
|
||||
List<Map<String, Object>> rows = null;
|
||||
Object key = resolver.resolve(cacheVariableName);
|
||||
if (rowIdVsRows != null) {
|
||||
rows = rowIdVsRows.get(key);
|
||||
if (rows == null)
|
||||
return null;
|
||||
dataSourceRowCache = new ArrayList<Map<String, Object>>(rows);
|
||||
return getFromRowCacheTransformed();
|
||||
} else {
|
||||
rows = getAllNonCachedRows();
|
||||
if (rows.isEmpty()) {
|
||||
return null;
|
||||
} else {
|
||||
rowIdVsRows = new HashMap<Object, List<Map<String, Object>>>();
|
||||
for (Map<String, Object> row : rows) {
|
||||
Object k = row.get(cachePk);
|
||||
if (rowIdVsRows.get(k) == null)
|
||||
rowIdVsRows.put(k, new ArrayList<Map<String, Object>>());
|
||||
rowIdVsRows.get(k).add(row);
|
||||
}
|
||||
cacheWithWhereClause.put(query, rowIdVsRows);
|
||||
if (!rowIdVsRows.containsKey(key))
|
||||
return null;
|
||||
dataSourceRowCache = new ArrayList<Map<String, Object>>(rowIdVsRows.get(key));
|
||||
if (dataSourceRowCache.isEmpty()) {
|
||||
dataSourceRowCache = null;
|
||||
return null;
|
||||
}
|
||||
return getFromRowCacheTransformed();
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Get all the rows from the the datasource for the given query. Only used by
|
||||
* cache implementations.
|
||||
* <p/>
|
||||
* This <b>must</b> be implemented by sub-classes which intend to provide a
|
||||
* cached implementation
|
||||
*
|
||||
* @return
|
||||
*/
|
||||
protected List<Map<String, Object>> getAllNonCachedRows() {
|
||||
return Collections.EMPTY_LIST;
|
||||
}
|
||||
|
||||
/**
|
||||
* If where clause is not present the cache is a Map of query vs List of Rows.
|
||||
* Only used by cache implementations.
|
||||
*
|
||||
* @return
|
||||
*/
|
||||
protected Map<String, Object> getSimplCacheData(String query) {
|
||||
List<Map<String, Object>> rows = simpleCache.get(query);
|
||||
if (rows != null) {
|
||||
dataSourceRowCache = new ArrayList<Map<String, Object>>(rows);
|
||||
return getFromRowCacheTransformed();
|
||||
} else {
|
||||
rows = getAllNonCachedRows();
|
||||
if (rows.isEmpty()) {
|
||||
return null;
|
||||
} else {
|
||||
dataSourceRowCache = new ArrayList<Map<String, Object>>(rows);
|
||||
simpleCache.put(query, rows);
|
||||
return getFromRowCacheTransformed();
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
protected Map<String, Object> getFromRowCacheTransformed() {
|
||||
Map<String, Object> r = dataSourceRowCache.remove(0);
|
||||
if (dataSourceRowCache.isEmpty())
|
||||
dataSourceRowCache = null;
|
||||
return r == null ? null : applyTransformer(r);
|
||||
}
|
||||
|
||||
public static final String TRANSFORMER = "transformer";
|
||||
|
||||
public static final String TRANSFORM_ROW = "transformRow";
|
||||
|
||||
public static final String SKIP_DOC = "$skipDoc";
|
||||
}
|
|
@ -0,0 +1,49 @@
|
|||
/**
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
package org.apache.solr.handler.dataimport;
|
||||
|
||||
/**
|
||||
* <p>
|
||||
* Pluggable functions for resolving variables
|
||||
* </p>
|
||||
* <p/>
|
||||
* <p>
|
||||
* Implementations of this interface must provide a public no-arg constructor.
|
||||
* </p>
|
||||
* <p/>
|
||||
* <p>
|
||||
* Refer to <a
|
||||
* href="http://wiki.apache.org/solr/DataImportHandler">http://wiki.apache.org/solr/DataImportHandler</a>
|
||||
* for more details.
|
||||
* </p>
|
||||
* <p/>
|
||||
* <b>This API is experimental and may change in the future.</b>
|
||||
*
|
||||
* @version $Id$
|
||||
* @since solr 1.3
|
||||
*/
|
||||
public abstract class Evaluator {
|
||||
|
||||
/**
|
||||
* Return a String after processing an expression and a VariableResolver
|
||||
*
|
||||
* @param resolver
|
||||
* @param expression
|
||||
* @return
|
||||
*/
|
||||
public abstract String evaluate(VariableResolver resolver, String expression);
|
||||
}
|
|
@ -0,0 +1,212 @@
|
|||
package org.apache.solr.handler.dataimport;
|
||||
|
||||
import org.apache.solr.util.DateMathParser;
|
||||
|
||||
import java.io.UnsupportedEncodingException;
|
||||
import java.net.URLEncoder;
|
||||
import java.text.ParseException;
|
||||
import java.text.SimpleDateFormat;
|
||||
import java.util.*;
|
||||
import java.util.regex.Matcher;
|
||||
import java.util.regex.Pattern;
|
||||
|
||||
/**
|
||||
* <p>
|
||||
* Holds definitions for evaluators provided by DataImportHandler
|
||||
* </p>
|
||||
* <p/>
|
||||
* <p>
|
||||
* Refer to <a
|
||||
* href="http://wiki.apache.org/solr/DataImportHandler">http://wiki.apache.org/solr/DataImportHandler</a>
|
||||
* for more details.
|
||||
* </p>
|
||||
* <p/>
|
||||
* <b>This API is experimental and may change in the future.</b>
|
||||
*
|
||||
* @version $Id$
|
||||
* @since solr 1.3
|
||||
*/
|
||||
public class EvaluatorBag {
|
||||
|
||||
public static final String DATE_FORMAT_EVALUATOR = "formatDate";
|
||||
|
||||
public static final String URL_ENCODE_EVALUATOR = "encodeUrl";
|
||||
|
||||
public static final String SQL_ESCAPE_EVALUATOR = "escapeSql";
|
||||
static final Pattern FORMAT_METHOD = Pattern
|
||||
.compile("^(\\w*?)\\((.*?)\\)$");
|
||||
|
||||
/**
|
||||
* <p>
|
||||
* Returns an <code>Evaluator</code> instance meant to be used for escaping
|
||||
* values in SQL queries.
|
||||
* </p>
|
||||
* <p>
|
||||
* It escapes the value of the given expression by replacing all occurrences
|
||||
* of single-quotes by two single-quotes and similarily for double-quotes
|
||||
* </p>
|
||||
*
|
||||
* @return
|
||||
*/
|
||||
public static Evaluator getSqlEscapingEvaluator() {
|
||||
return new Evaluator() {
|
||||
public String evaluate(VariableResolver resolver, String expression) {
|
||||
Object o = resolver.resolve(expression);
|
||||
|
||||
if (o == null)
|
||||
return null;
|
||||
|
||||
return o.toString().replaceAll("'", "''").replaceAll("\"", "\"\"");
|
||||
}
|
||||
};
|
||||
}
|
||||
|
||||
/**
|
||||
* <p>
|
||||
* Returns an <code>Evaluator</code> instance capable of URL-encoding
|
||||
* expressions. The expressions are evaluated using a
|
||||
* <code>VariableResolver</code>
|
||||
* </p>
|
||||
*
|
||||
* @return an <code>Evaluator</code> instance capable of URL-encoding
|
||||
* expressions.
|
||||
*/
|
||||
public static Evaluator getUrlEvaluator() {
|
||||
return new Evaluator() {
|
||||
public String evaluate(VariableResolver resolver, String expression) {
|
||||
Object value = null;
|
||||
try {
|
||||
value = resolver.resolve(expression);
|
||||
if (value == null)
|
||||
return null;
|
||||
|
||||
return URLEncoder.encode(value.toString(), "UTF-8");
|
||||
} catch (UnsupportedEncodingException e) {
|
||||
throw new DataImportHandlerException(
|
||||
DataImportHandlerException.SEVERE,
|
||||
"Unable to encode expression: " + expression + " with value: "
|
||||
+ value, e);
|
||||
}
|
||||
}
|
||||
};
|
||||
}
|
||||
|
||||
/**
|
||||
* <p>
|
||||
* Returns an <code>Evaluator</code> instance capable of formatting values
|
||||
* using a given date format.
|
||||
* </p>
|
||||
* <p>
|
||||
* The value to be formatted can be a entity.field or a date expression parsed
|
||||
* with <code>DateMathParser</code> class. If the value is in single quotes,
|
||||
* then it is assumed to be a datemath expression, otherwise it resolved using
|
||||
* a <code>VariableResolver</code> instance
|
||||
* </p>
|
||||
*
|
||||
* @return an Evaluator instance capable of formatting values to a given date
|
||||
* format
|
||||
* @see DateMathParser
|
||||
*/
|
||||
public static Evaluator getDateFormatEvaluator() {
|
||||
return new Evaluator() {
|
||||
public String evaluate(VariableResolver resolver, String expression) {
|
||||
CacheEntry e = getCachedData(expression);
|
||||
String expr = e.key;
|
||||
SimpleDateFormat fmt = e.format;
|
||||
Matcher m = IN_SINGLE_QUOTES.matcher(expr);
|
||||
if (m.find()) {
|
||||
String datemathExpr = m.group(1);
|
||||
try {
|
||||
Date date = dateMathParser.parseMath(datemathExpr);
|
||||
return fmt.format(date);
|
||||
} catch (ParseException exp) {
|
||||
throw new DataImportHandlerException(
|
||||
DataImportHandlerException.SEVERE,
|
||||
"Invalid expression for date", exp);
|
||||
}
|
||||
} else {
|
||||
Object o = resolver.resolve(expr);
|
||||
if (o == null)
|
||||
return "";
|
||||
Date date = null;
|
||||
if (o instanceof Date) {
|
||||
date = (Date) o;
|
||||
} else {
|
||||
String s = o.toString();
|
||||
try {
|
||||
date = DataImporter.DATE_TIME_FORMAT.parse(s);
|
||||
} catch (ParseException exp) {
|
||||
throw new DataImportHandlerException(
|
||||
DataImportHandlerException.SEVERE,
|
||||
"Invalid expression for date", exp);
|
||||
}
|
||||
}
|
||||
return fmt.format(date);
|
||||
}
|
||||
}
|
||||
|
||||
private CacheEntry getCachedData(String str) {
|
||||
CacheEntry result = cache.get(str);
|
||||
if (result != null)
|
||||
return result;
|
||||
Matcher m = FORMAT_METHOD.matcher(str);
|
||||
String expr, pattern;
|
||||
if (m.find()) {
|
||||
expr = m.group(1).trim();
|
||||
if (IN_SINGLE_QUOTES.matcher(expr).find()) {
|
||||
expr = expr.replaceAll("NOW", "");
|
||||
}
|
||||
pattern = m.group(2).trim();
|
||||
cache.put(str, new CacheEntry(expr, new SimpleDateFormat(pattern)));
|
||||
return cache.get(str);
|
||||
} else {
|
||||
throw new DataImportHandlerException(
|
||||
DataImportHandlerException.SEVERE, "Invalid format String : "
|
||||
+ "${dataimporter.functions." + str + "}");
|
||||
}
|
||||
}
|
||||
|
||||
Map<String, CacheEntry> cache = new HashMap<String, CacheEntry>();
|
||||
|
||||
Pattern FORMAT_METHOD = Pattern.compile("^(.*?),(.*?)$");
|
||||
};
|
||||
}
|
||||
|
||||
static Map<String, Object> getFunctionsNamespace(
|
||||
final VariableResolver resolver, final Map<String, Evaluator> evaluators) {
|
||||
|
||||
return new HashMap<String, Object>() {
|
||||
@Override
|
||||
public String get(Object key) {
|
||||
if (key == null)
|
||||
return null;
|
||||
Matcher m = FORMAT_METHOD.matcher((String) key);
|
||||
if (!m.find())
|
||||
return null;
|
||||
String fname = m.group(1);
|
||||
Evaluator evaluator = evaluators.get(fname);
|
||||
if (evaluator == null)
|
||||
return null;
|
||||
return evaluator.evaluate(resolver, m.group(2));
|
||||
}
|
||||
};
|
||||
}
|
||||
|
||||
|
||||
static class CacheEntry {
|
||||
public String key;
|
||||
|
||||
public SimpleDateFormat format;
|
||||
|
||||
public CacheEntry(String key, SimpleDateFormat format) {
|
||||
this.key = key;
|
||||
this.format = format;
|
||||
}
|
||||
}
|
||||
|
||||
static Pattern IN_SINGLE_QUOTES = Pattern.compile("^'(.*?)'$");
|
||||
|
||||
static DateMathParser dateMathParser = new DateMathParser(TimeZone
|
||||
.getDefault(), Locale.getDefault());
|
||||
|
||||
}
|
|
@ -0,0 +1,103 @@
|
|||
/**
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
package org.apache.solr.handler.dataimport;
|
||||
|
||||
import java.io.*;
|
||||
import java.util.Properties;
|
||||
|
||||
/**
|
||||
* <p>
|
||||
* A DataSource which reads from local files
|
||||
* </p>
|
||||
* <p>
|
||||
* The file is read with the default platform encoding. It can be overriden by
|
||||
* specifying the encoding in solrconfig.xml
|
||||
* </p>
|
||||
* <p/>
|
||||
* <p>
|
||||
* Refer to <a
|
||||
* href="http://wiki.apache.org/solr/DataImportHandler">http://wiki.apache.org/solr/DataImportHandler</a>
|
||||
* for more details.
|
||||
* </p>
|
||||
* <p/>
|
||||
* <b>This API is experimental and may change in the future.</b>
|
||||
*
|
||||
* @version $Id$
|
||||
* @since solr 1.3
|
||||
*/
|
||||
public class FileDataSource extends DataSource<Reader> {
|
||||
public static final String BASE_PATH = "basePath";
|
||||
|
||||
private String basePath;
|
||||
|
||||
private String encoding = null;
|
||||
|
||||
public void init(Context context, Properties initProps) {
|
||||
basePath = initProps.getProperty(BASE_PATH);
|
||||
if (initProps.get(HttpDataSource.ENCODING) != null)
|
||||
encoding = initProps.getProperty(HttpDataSource.ENCODING);
|
||||
}
|
||||
|
||||
/**
|
||||
* <p>
|
||||
* Returns a reader for the given file.
|
||||
* </p>
|
||||
* <p>
|
||||
* If the given file is not absolute, we try to construct an absolute path
|
||||
* using basePath configuration. If that fails, then the relative path is
|
||||
* tried. If file is not found a RuntimeException is thrown.
|
||||
* </p>
|
||||
* <p>
|
||||
* <b>It is the responsibility of the calling method to properly close the
|
||||
* returned Reader</b>
|
||||
* </p>
|
||||
*/
|
||||
public Reader getData(String query) {
|
||||
try {
|
||||
File file0 = new File(query);
|
||||
File file = file0;
|
||||
|
||||
if (!file.isAbsolute())
|
||||
file = new File(basePath + query);
|
||||
|
||||
if (file.isFile() && file.canRead()) {
|
||||
return openStream(file);
|
||||
} else if (file != file0)
|
||||
if (file0.isFile() && file0.canRead())
|
||||
return openStream(file0);
|
||||
|
||||
throw new FileNotFoundException("Could not find file: " + query);
|
||||
} catch (UnsupportedEncodingException e) {
|
||||
throw new RuntimeException(e);
|
||||
} catch (FileNotFoundException e) {
|
||||
throw new RuntimeException(e);
|
||||
}
|
||||
}
|
||||
|
||||
private InputStreamReader openStream(File file) throws FileNotFoundException,
|
||||
UnsupportedEncodingException {
|
||||
if (encoding == null) {
|
||||
return new InputStreamReader(new FileInputStream(file));
|
||||
} else {
|
||||
return new InputStreamReader(new FileInputStream(file), encoding);
|
||||
}
|
||||
}
|
||||
|
||||
public void close() {
|
||||
|
||||
}
|
||||
}
|
|
@ -0,0 +1,225 @@
|
|||
/**
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
package org.apache.solr.handler.dataimport;
|
||||
|
||||
import java.io.File;
|
||||
import java.io.FilenameFilter;
|
||||
import java.text.ParseException;
|
||||
import java.util.*;
|
||||
import java.util.regex.Matcher;
|
||||
import java.util.regex.Pattern;
|
||||
|
||||
/**
|
||||
* <p>
|
||||
* An EntityProcessor instance which can stream file names found in a given base
|
||||
* directory matching patterns and returning rows containing file information.
|
||||
* </p>
|
||||
* <p/>
|
||||
* <p>
|
||||
* It supports querying a give base directory by matching:
|
||||
* <ul>
|
||||
* <li>regular expressions to file names</li>
|
||||
* <li>excluding certain files based on regular expression</li>
|
||||
* <li>last modification date (newer or older than a given date or time)</li>
|
||||
* <li>size (bigger or smaller than size given in bytes)</li>
|
||||
* <li>recursively iterating through sub-directories</li>
|
||||
* </ul>
|
||||
* Its output can be used along with FileDataSource to read from files in file
|
||||
* systems.
|
||||
* </p>
|
||||
* <p/>
|
||||
* <p>
|
||||
* Refer to <a
|
||||
* href="http://wiki.apache.org/solr/DataImportHandler">http://wiki.apache.org/solr/DataImportHandler</a>
|
||||
* for more details.
|
||||
* </p>
|
||||
* <p/>
|
||||
* <b>This API is experimental and may change in the future.</b>
|
||||
*
|
||||
* @version $Id$
|
||||
* @since solr 1.3
|
||||
*/
|
||||
public class FileListEntityProcessor extends EntityProcessorBase {
|
||||
private String fileName, baseDir, excludes;
|
||||
|
||||
private Date newerThan, olderThan;
|
||||
|
||||
private long biggerThan = -1, smallerThan = -1;
|
||||
|
||||
private boolean recursive = false;
|
||||
|
||||
private Pattern fileNamePattern, excludesPattern;
|
||||
|
||||
public void init(Context context) {
|
||||
super.init(context);
|
||||
fileName = context.getEntityAttribute(FILE_NAME);
|
||||
if (fileName != null) {
|
||||
fileName = resolver.replaceTokens(fileName);
|
||||
fileNamePattern = Pattern.compile(fileName);
|
||||
}
|
||||
baseDir = context.getEntityAttribute(BASE_DIR);
|
||||
if (baseDir == null)
|
||||
throw new DataImportHandlerException(DataImportHandlerException.SEVERE,
|
||||
"'baseDir' is a required attribute");
|
||||
baseDir = resolver.replaceTokens(baseDir);
|
||||
File dir = new File(baseDir);
|
||||
if (!dir.isDirectory())
|
||||
throw new DataImportHandlerException(DataImportHandlerException.SEVERE,
|
||||
"'baseDir' should point to a directory");
|
||||
String r = context.getEntityAttribute(RECURSIVE);
|
||||
if (r != null)
|
||||
recursive = Boolean.parseBoolean(r);
|
||||
excludes = context.getEntityAttribute(EXCLUDES);
|
||||
if (excludes != null)
|
||||
excludes = resolver.replaceTokens(excludes);
|
||||
if (excludes != null)
|
||||
excludesPattern = Pattern.compile(excludes);
|
||||
|
||||
}
|
||||
|
||||
private Date getDate(String dateStr) {
|
||||
if (dateStr == null)
|
||||
return null;
|
||||
|
||||
Matcher m = PLACE_HOLDER_PATTERN.matcher(dateStr);
|
||||
if (m.find()) {
|
||||
return (Date) resolver.resolve(dateStr);
|
||||
}
|
||||
m = EvaluatorBag.IN_SINGLE_QUOTES.matcher(dateStr);
|
||||
if (m.find()) {
|
||||
String expr = null;
|
||||
expr = m.group(1).replaceAll("NOW", "");
|
||||
try {
|
||||
return EvaluatorBag.dateMathParser.parseMath(expr);
|
||||
} catch (ParseException exp) {
|
||||
throw new DataImportHandlerException(DataImportHandlerException.SEVERE,
|
||||
"Invalid expression for date", exp);
|
||||
}
|
||||
}
|
||||
try {
|
||||
return DataImporter.DATE_TIME_FORMAT.parse(dateStr);
|
||||
} catch (ParseException exp) {
|
||||
throw new DataImportHandlerException(DataImportHandlerException.SEVERE,
|
||||
"Invalid expression for date", exp);
|
||||
}
|
||||
}
|
||||
|
||||
public Map<String, Object> nextRow() {
|
||||
if (rowIterator != null)
|
||||
return getAndApplyTrans();
|
||||
List<Map<String, Object>> fileDetails = new ArrayList<Map<String, Object>>();
|
||||
File dir = new File(baseDir);
|
||||
|
||||
String dateStr = context.getEntityAttribute(NEWER_THAN);
|
||||
newerThan = getDate(dateStr);
|
||||
dateStr = context.getEntityAttribute(OLDER_THAN);
|
||||
olderThan = getDate(dateStr);
|
||||
|
||||
getFolderFiles(dir, fileDetails);
|
||||
rowIterator = fileDetails.iterator();
|
||||
return getAndApplyTrans();
|
||||
}
|
||||
|
||||
private Map<String, Object> getAndApplyTrans() {
|
||||
if (rowcache != null)
|
||||
return getFromRowCache();
|
||||
while (true) {
|
||||
Map<String, Object> r = getNext();
|
||||
if (r == null)
|
||||
return null;
|
||||
r = applyTransformer(r);
|
||||
if (r != null)
|
||||
return r;
|
||||
}
|
||||
}
|
||||
|
||||
private void getFolderFiles(File dir,
|
||||
final List<Map<String, Object>> fileDetails) {
|
||||
dir.list(new FilenameFilter() {
|
||||
public boolean accept(File dir, String name) {
|
||||
if (fileNamePattern == null) {
|
||||
addDetails(fileDetails, dir, name);
|
||||
return false;
|
||||
}
|
||||
if (fileNamePattern.matcher(name).find()) {
|
||||
if (excludesPattern != null && excludesPattern.matcher(name).find())
|
||||
return false;
|
||||
addDetails(fileDetails, dir, name);
|
||||
}
|
||||
|
||||
return false;
|
||||
}
|
||||
});
|
||||
}
|
||||
|
||||
private void addDetails(List<Map<String, Object>> files, File dir, String name) {
|
||||
Map<String, Object> details = new HashMap<String, Object>();
|
||||
File aFile = new File(dir, name);
|
||||
if (aFile.isDirectory()) {
|
||||
if (!recursive)
|
||||
return;
|
||||
getFolderFiles(aFile, files);
|
||||
return;
|
||||
}
|
||||
long sz = aFile.length();
|
||||
Date lastModified = new Date(aFile.lastModified());
|
||||
if (biggerThan != -1 && sz <= biggerThan)
|
||||
return;
|
||||
if (smallerThan != -1 && sz >= smallerThan)
|
||||
return;
|
||||
if (olderThan != null && lastModified.after(olderThan))
|
||||
return;
|
||||
if (newerThan != null && lastModified.before(newerThan))
|
||||
return;
|
||||
details.put(DIR, dir.getAbsolutePath());
|
||||
details.put(FILE, name);
|
||||
details.put(ABSOLUTE_FILE, aFile.getAbsolutePath());
|
||||
details.put(SIZE, sz);
|
||||
details.put(LAST_MODIFIED, lastModified);
|
||||
files.add(details);
|
||||
}
|
||||
|
||||
public static final Pattern PLACE_HOLDER_PATTERN = Pattern
|
||||
.compile("\\$\\{.*?\\}");
|
||||
|
||||
public static final String DIR = "fileDir";
|
||||
|
||||
public static final String FILE = "file";
|
||||
|
||||
public static final String ABSOLUTE_FILE = "fileAbsolutePath";
|
||||
|
||||
public static final String SIZE = "fileSize";
|
||||
|
||||
public static final String LAST_MODIFIED = "fileLastModified";
|
||||
|
||||
public static final String FILE_NAME = "fileName";
|
||||
|
||||
public static final String BASE_DIR = "baseDir";
|
||||
|
||||
public static final String EXCLUDES = "excludes";
|
||||
|
||||
public static final String NEWER_THAN = "newerThan";
|
||||
|
||||
public static final String OLDER_THAN = "olderThan";
|
||||
|
||||
public static final String BIGGER_THAN = "biggerThan";
|
||||
|
||||
public static final String SMALLER_THAN = "smallerThan";
|
||||
|
||||
public static final String RECURSIVE = "recursive";
|
||||
|
||||
}
|
|
@ -0,0 +1,139 @@
|
|||
/**
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
package org.apache.solr.handler.dataimport;
|
||||
|
||||
import java.io.InputStream;
|
||||
import java.io.InputStreamReader;
|
||||
import java.io.Reader;
|
||||
import java.net.URL;
|
||||
import java.net.URLConnection;
|
||||
import java.util.Properties;
|
||||
import java.util.logging.Level;
|
||||
import java.util.logging.Logger;
|
||||
import java.util.regex.Matcher;
|
||||
import java.util.regex.Pattern;
|
||||
|
||||
/**
|
||||
* <p>
|
||||
* A data source implementation which can be used to read character files using
|
||||
* HTTP.
|
||||
* </p>
|
||||
* <p/>
|
||||
* <p>
|
||||
* Refer to <a
|
||||
* href="http://wiki.apache.org/solr/DataImportHandler">http://wiki.apache.org/solr/DataImportHandler</a>
|
||||
* for more details.
|
||||
* </p>
|
||||
* <p/>
|
||||
* <b>This API is experimental and may change in the future.</b>
|
||||
*
|
||||
* @version $Id$
|
||||
* @since solr 1.3
|
||||
*/
|
||||
public class HttpDataSource extends DataSource<Reader> {
|
||||
Logger LOG = Logger.getLogger(HttpDataSource.class.getName());
|
||||
|
||||
private String baseUrl;
|
||||
|
||||
private String encoding;
|
||||
|
||||
private int connectionTimeout = CONNECTION_TIMEOUT;
|
||||
|
||||
private int readTimeout = READ_TIMEOUT;
|
||||
|
||||
public HttpDataSource() {
|
||||
}
|
||||
|
||||
public void init(Context context, Properties initProps) {
|
||||
baseUrl = initProps.getProperty(BASE_URL);
|
||||
if (initProps.get(ENCODING) != null)
|
||||
encoding = initProps.getProperty(ENCODING);
|
||||
String cTimeout = initProps.getProperty(CONNECTION_TIMEOUT_FIELD_NAME);
|
||||
String rTimeout = initProps.getProperty(READ_TIMEOUT_FIELD_NAME);
|
||||
if (cTimeout != null) {
|
||||
try {
|
||||
connectionTimeout = Integer.parseInt(cTimeout);
|
||||
} catch (NumberFormatException e) {
|
||||
LOG.log(Level.WARNING, "Invalid connection timeout: " + cTimeout);
|
||||
}
|
||||
}
|
||||
if (rTimeout != null) {
|
||||
try {
|
||||
readTimeout = Integer.parseInt(rTimeout);
|
||||
} catch (NumberFormatException e) {
|
||||
LOG.log(Level.WARNING, "Invalid read timeout: " + rTimeout);
|
||||
}
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
public Reader getData(String query) {
|
||||
URL url = null;
|
||||
try {
|
||||
if (query.startsWith("http:")) {
|
||||
url = new URL(query);
|
||||
} else {
|
||||
url = new URL(baseUrl + query);
|
||||
}
|
||||
|
||||
LOG.info("Created URL to: " + url.toString());
|
||||
|
||||
URLConnection conn = url.openConnection();
|
||||
conn.setConnectTimeout(connectionTimeout);
|
||||
conn.setReadTimeout(readTimeout);
|
||||
InputStream in = conn.getInputStream();
|
||||
String enc = encoding;
|
||||
if (enc == null) {
|
||||
String cType = conn.getContentType();
|
||||
if (cType != null) {
|
||||
Matcher m = CHARSET_PATTERN.matcher(cType);
|
||||
if (m.find()) {
|
||||
enc = m.group(1);
|
||||
}
|
||||
}
|
||||
}
|
||||
if (enc == null)
|
||||
enc = UTF_8;
|
||||
DataImporter.QUERY_COUNT.get().incrementAndGet();
|
||||
return new InputStreamReader(in, enc);
|
||||
} catch (Exception e) {
|
||||
LOG.log(Level.SEVERE, "Exception thrown while getting data", e);
|
||||
throw new DataImportHandlerException(DataImportHandlerException.SEVERE,
|
||||
"Exception in invoking url " + url, e);
|
||||
}
|
||||
}
|
||||
|
||||
public void close() {
|
||||
}
|
||||
|
||||
private static final Pattern CHARSET_PATTERN = Pattern.compile(
|
||||
".*?charset=(.*)$", Pattern.CASE_INSENSITIVE);
|
||||
|
||||
public static final String ENCODING = "encoding";
|
||||
|
||||
public static final String BASE_URL = "baseUrl";
|
||||
|
||||
public static final String UTF_8 = "UTF-8";
|
||||
|
||||
public static final String CONNECTION_TIMEOUT_FIELD_NAME = "connectionTimeout";
|
||||
|
||||
public static final String READ_TIMEOUT_FIELD_NAME = "readTimeout";
|
||||
|
||||
public static final int CONNECTION_TIMEOUT = 5000;
|
||||
|
||||
public static final int READ_TIMEOUT = 10000;
|
||||
}
|
|
@ -0,0 +1,325 @@
|
|||
/**
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
package org.apache.solr.handler.dataimport;
|
||||
|
||||
import java.sql.*;
|
||||
import java.util.*;
|
||||
import java.util.concurrent.Callable;
|
||||
import java.util.logging.Level;
|
||||
import java.util.logging.Logger;
|
||||
|
||||
/**
|
||||
* <p>
|
||||
* A DataSource implementation which can fetch data using JDBC.
|
||||
* </p>
|
||||
* <p/>
|
||||
* <p>
|
||||
* Refer to <a
|
||||
* href="http://wiki.apache.org/solr/DataImportHandler">http://wiki.apache.org/solr/DataImportHandler</a>
|
||||
* for more details.
|
||||
* </p>
|
||||
* <p/>
|
||||
* <b>This API is experimental and may change in the future.</b>
|
||||
*
|
||||
* @version $Id$
|
||||
* @since solr 1.3
|
||||
*/
|
||||
public class JdbcDataSource extends
|
||||
DataSource<Iterator<Map<String, Object>>> {
|
||||
private static final Logger LOG = Logger.getLogger(JdbcDataSource.class
|
||||
.getName());
|
||||
|
||||
private Callable<Connection> factory;
|
||||
|
||||
private long connLastUsed = System.currentTimeMillis();
|
||||
|
||||
private Connection conn;
|
||||
|
||||
private Map<String, Integer> fieldNameVsType = new HashMap<String, Integer>();
|
||||
|
||||
private boolean convertType = false;
|
||||
|
||||
private int batchSize = FETCH_SIZE;
|
||||
|
||||
public void init(Context context, Properties initProps) {
|
||||
Object o = initProps.get(CONVERT_TYPE);
|
||||
if (o != null)
|
||||
convertType = Boolean.parseBoolean(o.toString());
|
||||
|
||||
createConnectionFactory(context, initProps);
|
||||
try {
|
||||
conn = factory.call();
|
||||
} catch (Exception e) {
|
||||
throw new DataImportHandlerException(DataImportHandlerException.SEVERE,
|
||||
"Unable to create database connection", e);
|
||||
}
|
||||
|
||||
String bsz = initProps.getProperty("batchSize");
|
||||
if (bsz != null) {
|
||||
try {
|
||||
batchSize = Integer.parseInt(bsz);
|
||||
if (batchSize == -1)
|
||||
batchSize = Integer.MIN_VALUE;
|
||||
} catch (NumberFormatException e) {
|
||||
LOG.log(Level.WARNING, "Invalid batch size: " + bsz);
|
||||
}
|
||||
}
|
||||
|
||||
for (Map<String, String> map : context.getAllEntityFields()) {
|
||||
String n = map.get(DataImporter.COLUMN);
|
||||
String t = map.get(DataImporter.TYPE);
|
||||
if ("sint".equals(t) || "integer".equals(t))
|
||||
fieldNameVsType.put(n, Types.INTEGER);
|
||||
else if ("slong".equals(t) || "long".equals(t))
|
||||
fieldNameVsType.put(n, Types.BIGINT);
|
||||
else if ("float".equals(t) || "sfloat".equals(t))
|
||||
fieldNameVsType.put(n, Types.FLOAT);
|
||||
else if ("double".equals(t) || "sdouble".equals(t))
|
||||
fieldNameVsType.put(n, Types.DOUBLE);
|
||||
else if ("date".equals(t))
|
||||
fieldNameVsType.put(n, Types.DATE);
|
||||
else if ("boolean".equals(t))
|
||||
fieldNameVsType.put(n, Types.BOOLEAN);
|
||||
else
|
||||
fieldNameVsType.put(n, Types.VARCHAR);
|
||||
}
|
||||
}
|
||||
|
||||
private void createConnectionFactory(final Context context,
|
||||
final Properties initProps) {
|
||||
|
||||
final String url = initProps.getProperty(URL);
|
||||
String driver = initProps.getProperty(DRIVER);
|
||||
|
||||
if (url == null)
|
||||
throw new DataImportHandlerException(DataImportHandlerException.SEVERE,
|
||||
"JDBC URL cannot be null");
|
||||
|
||||
try {
|
||||
if (driver != null)
|
||||
Class.forName(driver);
|
||||
} catch (ClassNotFoundException e) {
|
||||
throw new DataImportHandlerException(DataImportHandlerException.SEVERE,
|
||||
"driver could not be loaded");
|
||||
}
|
||||
factory = new Callable<Connection>() {
|
||||
public Connection call() throws Exception {
|
||||
LOG.info("Creating a connection for entity "
|
||||
+ context.getEntityAttribute(DataImporter.NAME) + " with URL: "
|
||||
+ url);
|
||||
long start = System.currentTimeMillis();
|
||||
Connection c = DriverManager.getConnection(url, initProps);
|
||||
LOG.info("Time taken for getConnection(): "
|
||||
+ (System.currentTimeMillis() - start));
|
||||
return c;
|
||||
}
|
||||
};
|
||||
}
|
||||
|
||||
public Iterator<Map<String, Object>> getData(String query) {
|
||||
ResultSetIterator r = new ResultSetIterator(query);
|
||||
return r.getIterator();
|
||||
}
|
||||
|
||||
private void logError(String msg, Exception e) {
|
||||
LOG.log(Level.WARNING, msg, e);
|
||||
}
|
||||
|
||||
private List<String> readFieldNames(ResultSetMetaData metaData)
|
||||
throws SQLException {
|
||||
List<String> colNames = new ArrayList<String>();
|
||||
int count = metaData.getColumnCount();
|
||||
for (int i = 0; i < count; i++) {
|
||||
colNames.add(metaData.getColumnLabel(i + 1));
|
||||
}
|
||||
return colNames;
|
||||
}
|
||||
|
||||
private class ResultSetIterator {
|
||||
ResultSet resultSet;
|
||||
|
||||
Statement stmt = null;
|
||||
|
||||
List<String> colNames;
|
||||
|
||||
Iterator<Map<String, Object>> rSetIterator;
|
||||
|
||||
public ResultSetIterator(String query) {
|
||||
|
||||
try {
|
||||
Connection c = getConnection();
|
||||
stmt = c.createStatement(ResultSet.TYPE_FORWARD_ONLY,
|
||||
ResultSet.CONCUR_READ_ONLY);
|
||||
stmt.setFetchSize(batchSize);
|
||||
LOG.finer("Executing SQL: " + query);
|
||||
long start = System.currentTimeMillis();
|
||||
if (stmt.execute(query)) {
|
||||
resultSet = stmt.getResultSet();
|
||||
}
|
||||
LOG.finest("Time taken for sql :"
|
||||
+ (System.currentTimeMillis() - start));
|
||||
colNames = readFieldNames(resultSet.getMetaData());
|
||||
} catch (Exception e) {
|
||||
throw new DataImportHandlerException(DataImportHandlerException.SEVERE,
|
||||
"Unable to execute query: " + query, e);
|
||||
}
|
||||
if (resultSet == null) {
|
||||
rSetIterator = new ArrayList<Map<String, Object>>().iterator();
|
||||
return;
|
||||
}
|
||||
|
||||
rSetIterator = new Iterator<Map<String, Object>>() {
|
||||
public boolean hasNext() {
|
||||
return hasnext();
|
||||
}
|
||||
|
||||
public Map<String, Object> next() {
|
||||
return getARow();
|
||||
}
|
||||
|
||||
public void remove() {/* do nothing */
|
||||
}
|
||||
};
|
||||
}
|
||||
|
||||
private Iterator<Map<String, Object>> getIterator() {
|
||||
return rSetIterator;
|
||||
}
|
||||
|
||||
private Map<String, Object> getARow() {
|
||||
if (resultSet == null)
|
||||
return null;
|
||||
Map<String, Object> result = new HashMap<String, Object>();
|
||||
for (String colName : colNames) {
|
||||
try {
|
||||
if (!convertType) {
|
||||
// Use underlying database's type information
|
||||
result.put(colName, resultSet.getObject(colName));
|
||||
continue;
|
||||
}
|
||||
|
||||
Integer type = fieldNameVsType.get(colName);
|
||||
if (type == null)
|
||||
type = 12;
|
||||
switch (type) {
|
||||
case Types.INTEGER:
|
||||
result.put(colName, resultSet.getInt(colName));
|
||||
break;
|
||||
case Types.FLOAT:
|
||||
result.put(colName, resultSet.getFloat(colName));
|
||||
break;
|
||||
case Types.BIGINT:
|
||||
result.put(colName, resultSet.getLong(colName));
|
||||
break;
|
||||
case Types.DOUBLE:
|
||||
result.put(colName, resultSet.getDouble(colName));
|
||||
break;
|
||||
case Types.DATE:
|
||||
result.put(colName, resultSet.getDate(colName));
|
||||
break;
|
||||
case Types.BOOLEAN:
|
||||
result
|
||||
.put(colName, resultSet.getBoolean(colName));
|
||||
break;
|
||||
default:
|
||||
result.put(colName, resultSet.getString(colName));
|
||||
break;
|
||||
}
|
||||
} catch (SQLException e) {
|
||||
logError("Error reading data ", e);
|
||||
throw new DataImportHandlerException(
|
||||
DataImportHandlerException.SEVERE,
|
||||
"Error reading data from database", e);
|
||||
}
|
||||
}
|
||||
return result;
|
||||
}
|
||||
|
||||
private boolean hasnext() {
|
||||
if (resultSet == null)
|
||||
return false;
|
||||
try {
|
||||
if (resultSet.next()) {
|
||||
return true;
|
||||
} else {
|
||||
close();
|
||||
return false;
|
||||
}
|
||||
} catch (SQLException e) {
|
||||
logError("Error reading data ", e);
|
||||
close();
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
private void close() {
|
||||
try {
|
||||
if (resultSet != null)
|
||||
resultSet.close();
|
||||
if (stmt != null)
|
||||
stmt.close();
|
||||
|
||||
} catch (Exception e) {
|
||||
logError("Exception while closing result set", e);
|
||||
} finally {
|
||||
resultSet = null;
|
||||
stmt = null;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
private Connection getConnection() throws Exception {
|
||||
long currTime = System.currentTimeMillis();
|
||||
if (currTime - connLastUsed > CONN_TIME_OUT) {
|
||||
synchronized (this) {
|
||||
Connection tmpConn = factory.call();
|
||||
finalize();
|
||||
connLastUsed = System.currentTimeMillis();
|
||||
return conn = tmpConn;
|
||||
}
|
||||
|
||||
} else {
|
||||
connLastUsed = currTime;
|
||||
return conn;
|
||||
}
|
||||
}
|
||||
|
||||
protected void finalize() {
|
||||
try {
|
||||
conn.close();
|
||||
} catch (Exception e) {
|
||||
}
|
||||
}
|
||||
|
||||
public void close() {
|
||||
try {
|
||||
conn.close();
|
||||
} catch (Exception e) {
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
private static final long CONN_TIME_OUT = 10 * 1000; // 10 seconds
|
||||
|
||||
private static final int FETCH_SIZE = 500;
|
||||
|
||||
public static final String URL = "url";
|
||||
|
||||
public static final String DRIVER = "driver";
|
||||
|
||||
public static final String CONVERT_TYPE = "convertType";
|
||||
}
|
|
@ -0,0 +1,59 @@
|
|||
/**
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
package org.apache.solr.handler.dataimport;
|
||||
|
||||
import java.util.HashMap;
|
||||
import java.util.Iterator;
|
||||
import java.util.Map;
|
||||
import java.util.Properties;
|
||||
|
||||
/**
|
||||
* <p>
|
||||
* A mock DataSource implementation which can be used for testing.
|
||||
* </p>
|
||||
* <p/>
|
||||
* <b>This API is experimental and may change in the future.</b>
|
||||
*
|
||||
* @version $Id$
|
||||
* @since solr 1.3
|
||||
*/
|
||||
public class MockDataSource extends
|
||||
DataSource<Iterator<Map<String, Object>>> {
|
||||
|
||||
private static Map<String, Iterator<Map<String, Object>>> cache = new HashMap<String, Iterator<Map<String, Object>>>();
|
||||
|
||||
public static void setIterator(String query,
|
||||
Iterator<Map<String, Object>> iter) {
|
||||
cache.put(query, iter);
|
||||
}
|
||||
|
||||
public static void clearCache() {
|
||||
cache.clear();
|
||||
}
|
||||
|
||||
public void init(Context context, Properties initProps) {
|
||||
}
|
||||
|
||||
public Iterator<Map<String, Object>> getData(String query) {
|
||||
return cache.get(query);
|
||||
}
|
||||
|
||||
public void close() {
|
||||
cache.clear();
|
||||
|
||||
}
|
||||
}
|
|
@ -0,0 +1,97 @@
|
|||
package org.apache.solr.handler.dataimport;
|
||||
|
||||
import java.text.NumberFormat;
|
||||
import java.text.ParseException;
|
||||
import java.util.ArrayList;
|
||||
import java.util.List;
|
||||
import java.util.Map;
|
||||
|
||||
/**
|
||||
* <p>
|
||||
* A Transformer instance which can extract numbers out of strings. It uses
|
||||
* <code>java.text.NumberFormat</code> class to parse strings and supports
|
||||
* Number, Integer, Currency and Percent styles as supported by
|
||||
* <code>java.text.NumberFormat</code>
|
||||
* </p>
|
||||
* <p/>
|
||||
* <p>
|
||||
* Refer to <a
|
||||
* href="http://wiki.apache.org/solr/DataImportHandler">http://wiki.apache.org/solr/DataImportHandler</a>
|
||||
* for more details.
|
||||
* </p>
|
||||
* <p/>
|
||||
* <b>This API is experimental and may change in the future.</b>
|
||||
*
|
||||
* @version $Id$
|
||||
* @since solr 1.3
|
||||
*/
|
||||
public class NumberFormatTransformer extends Transformer {
|
||||
|
||||
@SuppressWarnings("unchecked")
|
||||
public Object transformRow(Map<String, Object> row, Context context) {
|
||||
for (Map<String, String> fld : context.getAllEntityFields()) {
|
||||
String style = fld.get(FORMAT_STYLE);
|
||||
if (style != null) {
|
||||
String column = fld.get(DataImporter.COLUMN);
|
||||
String srcCol = fld.get(RegexTransformer.SRC_COL_NAME);
|
||||
if (srcCol == null)
|
||||
srcCol = column;
|
||||
|
||||
Object val = row.get(srcCol);
|
||||
String styleSmall = style.toLowerCase();
|
||||
|
||||
if (val instanceof List) {
|
||||
List<String> inputs = (List) val;
|
||||
List results = new ArrayList();
|
||||
for (String input : inputs) {
|
||||
try {
|
||||
results.add(process(input, styleSmall));
|
||||
} catch (ParseException e) {
|
||||
throw new DataImportHandlerException(
|
||||
DataImportHandlerException.SEVERE,
|
||||
"Failed to apply NumberFormat on column: " + column, e);
|
||||
}
|
||||
}
|
||||
row.put(column, results);
|
||||
} else {
|
||||
if (val == null || val.toString().trim().equals(""))
|
||||
continue;
|
||||
try {
|
||||
row.put(column, process(val.toString(), styleSmall));
|
||||
} catch (ParseException e) {
|
||||
throw new DataImportHandlerException(
|
||||
DataImportHandlerException.SEVERE,
|
||||
"Failed to apply NumberFormat on column: " + column, e);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
return row;
|
||||
}
|
||||
|
||||
private Number process(String val, String style) throws ParseException {
|
||||
if (INTEGER.equals(style)) {
|
||||
return NumberFormat.getIntegerInstance().parse(val);
|
||||
} else if (NUMBER.equals(style)) {
|
||||
return NumberFormat.getNumberInstance().parse(val);
|
||||
} else if (CURRENCY.equals(style)) {
|
||||
return NumberFormat.getCurrencyInstance().parse(val);
|
||||
} else if (PERCENT.equals(style)) {
|
||||
return NumberFormat.getPercentInstance().parse(val);
|
||||
}
|
||||
|
||||
return null;
|
||||
}
|
||||
|
||||
public static final String FORMAT_STYLE = "formatStyle";
|
||||
|
||||
public static final String LOCALE = "locale";
|
||||
|
||||
public static final String NUMBER = "number";
|
||||
|
||||
public static final String PERCENT = "percent";
|
||||
|
||||
public static final String INTEGER = "integer";
|
||||
|
||||
public static final String CURRENCY = "currency";
|
||||
}
|
|
@ -0,0 +1,146 @@
|
|||
/**
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
package org.apache.solr.handler.dataimport;
|
||||
|
||||
import java.util.*;
|
||||
import java.util.logging.Level;
|
||||
import java.util.logging.Logger;
|
||||
import java.util.regex.Matcher;
|
||||
import java.util.regex.Pattern;
|
||||
|
||||
/**
|
||||
* <p>
|
||||
* A Transformer implementation which uses Regular Expressions to extract, split
|
||||
* and replace data in fields.
|
||||
* </p>
|
||||
* <p/>
|
||||
* <p>
|
||||
* Refer to <a
|
||||
* href="http://wiki.apache.org/solr/DataImportHandler">http://wiki.apache.org/solr/DataImportHandler</a>
|
||||
* for more details.
|
||||
* </p>
|
||||
* <p/>
|
||||
* <b>This API is experimental and may change in the future.</b>
|
||||
*
|
||||
* @version $Id$
|
||||
* @since solr 1.3
|
||||
*/
|
||||
public class RegexTransformer extends Transformer {
|
||||
private static final Logger LOG = Logger.getLogger(RegexTransformer.class
|
||||
.getName());
|
||||
|
||||
@SuppressWarnings("unchecked")
|
||||
public Map<String, Object> transformRow(Map<String, Object> row,
|
||||
Context context) {
|
||||
List<Map<String, String>> fields = context.getAllEntityFields();
|
||||
for (Map<String, String> field : fields) {
|
||||
String col = field.get(DataImporter.COLUMN);
|
||||
String reStr = field.get(REGEX);
|
||||
String splitBy = field.get(SPLIT_BY);
|
||||
String replaceWith = field.get(REPLACE_WITH);
|
||||
if (reStr != null || splitBy != null) {
|
||||
String srcColName = field.get(SRC_COL_NAME);
|
||||
if (srcColName == null) {
|
||||
srcColName = col;
|
||||
}
|
||||
Object tmpVal = row.get(srcColName);
|
||||
if (tmpVal == null)
|
||||
continue;
|
||||
|
||||
if (tmpVal instanceof List) {
|
||||
List<String> inputs = (List<String>) tmpVal;
|
||||
List results = new ArrayList();
|
||||
for (String input : inputs) {
|
||||
Object o = process(col, reStr, splitBy, replaceWith, input);
|
||||
if (o != null)
|
||||
results.add(o);
|
||||
}
|
||||
row.put(col, results);
|
||||
} else {
|
||||
String value = tmpVal.toString();
|
||||
Object o = process(col, reStr, splitBy, replaceWith, value);
|
||||
if (o != null)
|
||||
row.put(col, o);
|
||||
}
|
||||
}
|
||||
}
|
||||
return row;
|
||||
}
|
||||
|
||||
private Object process(String col, String reStr, String splitBy,
|
||||
String replaceWith, String value) {
|
||||
if (splitBy != null) {
|
||||
return readBySplit(splitBy, value);
|
||||
} else if (replaceWith != null) {
|
||||
Pattern p = getPattern(reStr);
|
||||
return p.matcher(value).replaceAll(replaceWith);
|
||||
} else {
|
||||
return readfromRegExp(reStr, value, col);
|
||||
}
|
||||
}
|
||||
|
||||
@SuppressWarnings("unchecked")
|
||||
private List<String> readBySplit(String splitBy, String value) {
|
||||
String[] vals = value.split(splitBy);
|
||||
List<String> l = new ArrayList<String>();
|
||||
l.addAll(Arrays.asList(vals));
|
||||
return l;
|
||||
}
|
||||
|
||||
@SuppressWarnings("unchecked")
|
||||
private Object readfromRegExp(String reStr, String value, String columnName) {
|
||||
Pattern regexp = getPattern(reStr);
|
||||
Matcher m = regexp.matcher(value);
|
||||
if (m.find() && m.groupCount() > 0) {
|
||||
if (m.groupCount() > 1) {
|
||||
List l = new ArrayList();
|
||||
for (int i = 1; i <= m.groupCount(); i++) {
|
||||
try {
|
||||
l.add(m.group(i));
|
||||
} catch (Exception e) {
|
||||
LOG.log(Level.WARNING, "Parsing failed for field : " + columnName,
|
||||
e);
|
||||
}
|
||||
}
|
||||
return l;
|
||||
} else {
|
||||
return m.group(1);
|
||||
}
|
||||
}
|
||||
|
||||
return null;
|
||||
}
|
||||
|
||||
private Pattern getPattern(String reStr) {
|
||||
Pattern result = PATTERN_CACHE.get(reStr);
|
||||
if (result == null) {
|
||||
PATTERN_CACHE.put(reStr, result = Pattern.compile(reStr));
|
||||
}
|
||||
return result;
|
||||
}
|
||||
|
||||
private HashMap<String, Pattern> PATTERN_CACHE = new HashMap<String, Pattern>();
|
||||
|
||||
public static final String REGEX = "regex";
|
||||
|
||||
public static final String REPLACE_WITH = "replaceWith";
|
||||
|
||||
public static final String SPLIT_BY = "splitBy";
|
||||
|
||||
public static final String SRC_COL_NAME = "sourceColName";
|
||||
|
||||
}
|
|
@ -0,0 +1,102 @@
|
|||
/**
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
package org.apache.solr.handler.dataimport;
|
||||
|
||||
import java.lang.reflect.InvocationTargetException;
|
||||
import java.lang.reflect.Method;
|
||||
import java.util.Map;
|
||||
|
||||
/**
|
||||
* <p>
|
||||
* A Transformer instance capable of executing functions written in scripting
|
||||
* languages as a Transformer instance.
|
||||
* </p>
|
||||
* <p/>
|
||||
* <p>
|
||||
* Refer to <a
|
||||
* href="http://wiki.apache.org/solr/DataImportHandler">http://wiki.apache.org/solr/DataImportHandler</a>
|
||||
* for more details.
|
||||
* </p>
|
||||
* <p/>
|
||||
* <b>This API is experimental and may change in the future.</b>
|
||||
*
|
||||
* @version $Id$
|
||||
* @since solr 1.3
|
||||
*/
|
||||
public class ScriptTransformer extends Transformer {
|
||||
private Object engine;
|
||||
|
||||
private Method invokeFunctionMethod;
|
||||
|
||||
private String functionName;
|
||||
|
||||
public Object transformRow(Map<String, Object> row, Context context) {
|
||||
try {
|
||||
if (engine == null)
|
||||
initEngine(context);
|
||||
if (engine == null)
|
||||
return row;
|
||||
return invokeFunctionMethod.invoke(engine, functionName, new Object[]{
|
||||
row, context});
|
||||
} catch (DataImportHandlerException e) {
|
||||
throw e;
|
||||
} catch (InvocationTargetException e) {
|
||||
throw new DataImportHandlerException(DataImportHandlerException.SEVERE,
|
||||
"Could not invoke method :"
|
||||
+ functionName
|
||||
+ "\n <script>\n"
|
||||
+ context.getVariableResolver().resolve(
|
||||
DataConfig.IMPORTER_NS + "." + DataConfig.SCRIPT)
|
||||
+ "</script>", e);
|
||||
} catch (Exception e) {
|
||||
throw new DataImportHandlerException(DataImportHandlerException.SEVERE,
|
||||
"Error invoking script for entity "
|
||||
+ context.getEntityAttribute("name"), e);
|
||||
}
|
||||
}
|
||||
|
||||
private void initEngine(Context context) {
|
||||
try {
|
||||
String scriptText = (String) context.getVariableResolver().resolve(
|
||||
DataConfig.IMPORTER_NS + "." + DataConfig.SCRIPT);
|
||||
String scriptLang = (String) context.getVariableResolver().resolve(
|
||||
DataConfig.IMPORTER_NS + "." + DataConfig.SCRIPT_LANG);
|
||||
Object scriptEngineMgr = Class
|
||||
.forName("javax.script.ScriptEngineManager").newInstance();
|
||||
// create a Script engine
|
||||
Method getEngineMethod = scriptEngineMgr.getClass().getMethod(
|
||||
"getEngineByName", String.class);
|
||||
engine = getEngineMethod.invoke(scriptEngineMgr, scriptLang);
|
||||
Method evalMethod = engine.getClass().getMethod("eval", String.class);
|
||||
invokeFunctionMethod = engine.getClass().getMethod("invokeFunction",
|
||||
String.class, Object[].class);
|
||||
evalMethod.invoke(engine, scriptText);
|
||||
} catch (Exception e) {
|
||||
throw new DataImportHandlerException(DataImportHandlerException.SEVERE,
|
||||
"<script> can be used only in java 6 or above", e);
|
||||
}
|
||||
}
|
||||
|
||||
public void setFunctionName(String methodName) {
|
||||
this.functionName = methodName;
|
||||
}
|
||||
|
||||
public String getFunctionName() {
|
||||
return functionName;
|
||||
}
|
||||
|
||||
}
|
|
@ -0,0 +1,289 @@
|
|||
/**
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
package org.apache.solr.handler.dataimport;
|
||||
|
||||
import org.apache.lucene.document.Document;
|
||||
import org.apache.solr.update.AddUpdateCommand;
|
||||
import org.apache.solr.update.CommitUpdateCommand;
|
||||
import org.apache.solr.update.DeleteUpdateCommand;
|
||||
import org.apache.solr.update.UpdateHandler;
|
||||
|
||||
import java.io.*;
|
||||
import java.text.ParseException;
|
||||
import java.util.Date;
|
||||
import java.util.Properties;
|
||||
import java.util.logging.Level;
|
||||
import java.util.logging.Logger;
|
||||
|
||||
/**
|
||||
* <p>
|
||||
* Writes documents to SOLR as well as provides methods for loading and
|
||||
* persisting last index time.
|
||||
* </p>
|
||||
* <p/>
|
||||
* <b>This API is experimental and may change in the future.</b>
|
||||
*
|
||||
* @version $Id$
|
||||
* @since solr 1.3
|
||||
*/
|
||||
public abstract class SolrWriter {
|
||||
private static final Logger LOG = Logger
|
||||
.getLogger(SolrWriter.class.getName());
|
||||
|
||||
static final String IMPORTER_PROPERTIES = "dataimport.properties";
|
||||
|
||||
static final String LAST_INDEX_KEY = "last_index_time";
|
||||
|
||||
private final UpdateHandler updater;
|
||||
|
||||
private final String configDir;
|
||||
|
||||
public SolrWriter(UpdateHandler updater, String confDir) {
|
||||
this.updater = updater;
|
||||
configDir = confDir;
|
||||
|
||||
}
|
||||
|
||||
public boolean upload(Document d) {
|
||||
try {
|
||||
AddUpdateCommand command = new AddUpdateCommand();
|
||||
command.doc = d;
|
||||
command.allowDups = false;
|
||||
command.overwritePending = true;
|
||||
command.overwriteCommitted = true;
|
||||
updater.addDoc(command);
|
||||
} catch (IOException e) {
|
||||
LOG.log(Level.SEVERE, "Exception while adding: " + d, e);
|
||||
return false;
|
||||
} catch (Exception e) {
|
||||
LOG.log(Level.WARNING, "Error creating document : " + d);
|
||||
return false;
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
public void deleteDoc(Object id) {
|
||||
try {
|
||||
LOG.info("deleted from document to Solr: " + id);
|
||||
DeleteUpdateCommand delCmd = new DeleteUpdateCommand();
|
||||
delCmd.id = id.toString();
|
||||
delCmd.fromPending = true;
|
||||
delCmd.fromCommitted = true;
|
||||
updater.delete(delCmd);
|
||||
} catch (IOException e) {
|
||||
LOG.log(Level.SEVERE, "Exception while deleteing: " + id, e);
|
||||
}
|
||||
}
|
||||
|
||||
Date getStartTime() {
|
||||
Properties props = readIndexerProperties();
|
||||
String result = props.getProperty(SolrWriter.LAST_INDEX_KEY);
|
||||
|
||||
try {
|
||||
if (result != null)
|
||||
return DataImporter.DATE_TIME_FORMAT.parse(result);
|
||||
} catch (ParseException e) {
|
||||
throw new DataImportHandlerException(DataImportHandlerException.WARN,
|
||||
"Unable to read last indexed time from: "
|
||||
+ SolrWriter.IMPORTER_PROPERTIES, e);
|
||||
}
|
||||
return null;
|
||||
}
|
||||
|
||||
private void persistStartTime(Date date) {
|
||||
OutputStream propOutput = null;
|
||||
|
||||
Properties props = readIndexerProperties();
|
||||
|
||||
try {
|
||||
props.put(SolrWriter.LAST_INDEX_KEY, DataImporter.DATE_TIME_FORMAT
|
||||
.format(date));
|
||||
String filePath = configDir;
|
||||
if (configDir != null && !configDir.endsWith(File.separator))
|
||||
filePath += File.separator;
|
||||
filePath += SolrWriter.IMPORTER_PROPERTIES;
|
||||
propOutput = new FileOutputStream(filePath);
|
||||
props.store(propOutput, null);
|
||||
LOG.info("Wrote last indexed time to " + SolrWriter.IMPORTER_PROPERTIES);
|
||||
} catch (FileNotFoundException e) {
|
||||
throw new DataImportHandlerException(DataImportHandlerException.SEVERE,
|
||||
"Unable to persist Index Start Time", e);
|
||||
} catch (IOException e) {
|
||||
throw new DataImportHandlerException(DataImportHandlerException.SEVERE,
|
||||
"Unable to persist Index Start Time", e);
|
||||
} finally {
|
||||
try {
|
||||
if (propOutput != null)
|
||||
propOutput.close();
|
||||
} catch (IOException e) {
|
||||
propOutput = null;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
private Properties readIndexerProperties() {
|
||||
Properties props = new Properties();
|
||||
InputStream propInput = null;
|
||||
|
||||
try {
|
||||
propInput = new FileInputStream(configDir
|
||||
+ SolrWriter.IMPORTER_PROPERTIES);
|
||||
props.load(propInput);
|
||||
LOG.info("Read " + SolrWriter.IMPORTER_PROPERTIES);
|
||||
} catch (Exception e) {
|
||||
LOG.log(Level.WARNING, "Unable to read: "
|
||||
+ SolrWriter.IMPORTER_PROPERTIES);
|
||||
} finally {
|
||||
try {
|
||||
if (propInput != null)
|
||||
propInput.close();
|
||||
} catch (IOException e) {
|
||||
propInput = null;
|
||||
}
|
||||
}
|
||||
|
||||
return props;
|
||||
}
|
||||
|
||||
public void deleteByQuery(String query) {
|
||||
try {
|
||||
LOG.info("Deleting documents from Solr with query: " + query);
|
||||
DeleteUpdateCommand delCmd = new DeleteUpdateCommand();
|
||||
delCmd.query = query;
|
||||
delCmd.fromCommitted = true;
|
||||
delCmd.fromPending = true;
|
||||
updater.deleteByQuery(delCmd);
|
||||
} catch (IOException e) {
|
||||
LOG.log(Level.SEVERE, "Exception while deleting by query: " + query, e);
|
||||
}
|
||||
}
|
||||
|
||||
public void commit(boolean optimize) {
|
||||
try {
|
||||
CommitUpdateCommand commit = new CommitUpdateCommand(optimize);
|
||||
updater.commit(commit);
|
||||
} catch (Exception e) {
|
||||
LOG.log(Level.SEVERE, "Exception while solr commit.", e);
|
||||
}
|
||||
}
|
||||
|
||||
public void doDeleteAll() {
|
||||
try {
|
||||
DeleteUpdateCommand deleteCommand = new DeleteUpdateCommand();
|
||||
deleteCommand.query = "*:*";
|
||||
deleteCommand.fromCommitted = true;
|
||||
deleteCommand.fromPending = true;
|
||||
updater.deleteByQuery(deleteCommand);
|
||||
} catch (IOException e) {
|
||||
throw new DataImportHandlerException(DataImportHandlerException.SEVERE,
|
||||
"Exception in full dump while deleting all documents.", e);
|
||||
}
|
||||
}
|
||||
|
||||
static String getResourceAsString(InputStream in) throws IOException {
|
||||
ByteArrayOutputStream baos = new ByteArrayOutputStream(1024);
|
||||
byte[] buf = new byte[1024];
|
||||
int sz = 0;
|
||||
try {
|
||||
while (true) {
|
||||
sz = in.read(buf);
|
||||
baos.write(buf, 0, sz);
|
||||
if (sz < buf.length)
|
||||
break;
|
||||
}
|
||||
} finally {
|
||||
try {
|
||||
in.close();
|
||||
} catch (Exception e) {
|
||||
|
||||
}
|
||||
}
|
||||
return new String(baos.toByteArray());
|
||||
}
|
||||
|
||||
static String getDocCount() {
|
||||
if (DocBuilder.INSTANCE.get() != null) {
|
||||
return ""
|
||||
+ (DocBuilder.INSTANCE.get().importStatistics.docCount.get() + 1);
|
||||
} else {
|
||||
return "";
|
||||
}
|
||||
}
|
||||
|
||||
public Date loadIndexStartTime() {
|
||||
return this.getStartTime();
|
||||
}
|
||||
|
||||
public Class loadClass(String name) throws ClassNotFoundException {
|
||||
return Class.forName(name);
|
||||
}
|
||||
|
||||
/**
|
||||
* <p>
|
||||
* Stores the last indexed time into the <code>IMPORTER_PROPERTIES</code>
|
||||
* file. If any properties are already defined in the file, then they are
|
||||
* preserved.
|
||||
* </p>
|
||||
*
|
||||
* @param date
|
||||
*/
|
||||
public void persistIndexStartTime(Date date) {
|
||||
this.persistStartTime(date);
|
||||
}
|
||||
|
||||
public abstract SolrDoc getSolrDocInstance();
|
||||
|
||||
/**
|
||||
* <p>
|
||||
* Write the document to the index
|
||||
* </p>
|
||||
*
|
||||
* @param d . The Document warapper object
|
||||
* @return
|
||||
*/
|
||||
public abstract boolean upload(SolrDoc d);
|
||||
|
||||
/**
|
||||
* This method is used for verbose debugging
|
||||
*
|
||||
* @param event The event name start.entity ,end.entity ,transformer.row
|
||||
* @param name Name of the entity/transformer
|
||||
* @param row The actual data . Can be a Map<String,object> or a List<Map<String,object>>
|
||||
*/
|
||||
public abstract void log(int event, String name, Object row);
|
||||
|
||||
/**
|
||||
* The purpose of this interface to provide pluggable implementations for Solr
|
||||
* 1.2 & 1.3 The implementation can choose to wrap appropriate Objects based
|
||||
* on the version
|
||||
*/
|
||||
public static interface SolrDoc {
|
||||
|
||||
public void addField(String name, Object value, float boost);
|
||||
|
||||
public Object getField(String field);
|
||||
|
||||
public void setDocumentBoost(float boost);
|
||||
}
|
||||
|
||||
public static final int START_ENTITY = 1, END_ENTITY = 2,
|
||||
TRANSFORMED_ROW = 3, ENTITY_META = 4, PRE_TRANSFORMER_ROW = 5,
|
||||
START_DOC = 6, END_DOC = 7, ENTITY_OUT = 8, ROW_END = 9,
|
||||
TRANSFORMER_EXCEPTION = 10, ENTITY_EXCEPTION = 11, DISABLE_LOGGING = 12,
|
||||
ENABLE_LOGGING = 13;
|
||||
}
|
|
@ -0,0 +1,171 @@
|
|||
/**
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
package org.apache.solr.handler.dataimport;
|
||||
|
||||
import java.util.Iterator;
|
||||
import java.util.Map;
|
||||
import java.util.logging.Level;
|
||||
import java.util.logging.Logger;
|
||||
import java.util.regex.Matcher;
|
||||
import java.util.regex.Pattern;
|
||||
|
||||
/**
|
||||
* <p>
|
||||
* An EntityProcessor instance which provides support for reading from
|
||||
* databases. It is used in conjunction with JdbcDataSource. This is the default
|
||||
* EntityProcessor if none is specified explicitly in data-config.xml
|
||||
* </p>
|
||||
* <p/>
|
||||
* <p>
|
||||
* Refer to <a
|
||||
* href="http://wiki.apache.org/solr/DataImportHandler">http://wiki.apache.org/solr/DataImportHandler</a>
|
||||
* for more details.
|
||||
* </p>
|
||||
* <p/>
|
||||
* <b>This API is experimental and may change in the future.</b>
|
||||
*
|
||||
* @version $Id$
|
||||
* @since solr 1.3
|
||||
*/
|
||||
public class SqlEntityProcessor extends EntityProcessorBase {
|
||||
private static final Logger LOG = Logger.getLogger(SqlEntityProcessor.class
|
||||
.getName());
|
||||
|
||||
protected DataSource<Iterator<Map<String, Object>>> dataSource;
|
||||
|
||||
@SuppressWarnings("unchecked")
|
||||
public void init(Context context) {
|
||||
super.init(context);
|
||||
dataSource = context.getDataSource();
|
||||
}
|
||||
|
||||
protected void initQuery(String q) {
|
||||
try {
|
||||
DataImporter.QUERY_COUNT.get().incrementAndGet();
|
||||
rowIterator = dataSource.getData(q);
|
||||
this.query = q;
|
||||
} catch (DataImportHandlerException e) {
|
||||
throw e;
|
||||
} catch (Exception e) {
|
||||
LOG.log(Level.SEVERE, "The query failed '" + q + "'", e);
|
||||
throw new DataImportHandlerException(DataImportHandlerException.SEVERE, e);
|
||||
}
|
||||
}
|
||||
|
||||
public Map<String, Object> nextRow() {
|
||||
if (rowcache != null)
|
||||
return getFromRowCache();
|
||||
if (rowIterator == null) {
|
||||
String q = getQuery();
|
||||
initQuery(resolver.replaceTokens(q));
|
||||
}
|
||||
while (true) {
|
||||
Map<String, Object> r = getNext();
|
||||
if (r == null)
|
||||
return null;
|
||||
r = applyTransformer(r);
|
||||
if (r != null)
|
||||
return r;
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
public Map<String, Object> nextModifiedRowKey() {
|
||||
if (rowIterator == null) {
|
||||
String deltaQuery = context.getEntityAttribute(DELTA_QUERY);
|
||||
if (deltaQuery == null)
|
||||
return null;
|
||||
initQuery(resolver.replaceTokens(deltaQuery));
|
||||
}
|
||||
return getNext();
|
||||
}
|
||||
|
||||
public Map<String, Object> nextDeletedRowKey() {
|
||||
if (rowIterator == null) {
|
||||
String deletedPkQuery = context.getEntityAttribute(DEL_PK_QUERY);
|
||||
if (deletedPkQuery == null)
|
||||
return null;
|
||||
initQuery(resolver.replaceTokens(deletedPkQuery));
|
||||
}
|
||||
return getNext();
|
||||
}
|
||||
|
||||
public Map<String, Object> nextModifiedParentRowKey() {
|
||||
if (rowIterator == null) {
|
||||
String parentDeltaQuery = context.getEntityAttribute(PARENT_DELTA_QUERY);
|
||||
if (parentDeltaQuery == null)
|
||||
return null;
|
||||
LOG.info("Running parentDeltaQuery for Entity: "
|
||||
+ context.getEntityAttribute("name"));
|
||||
initQuery(resolver.replaceTokens(parentDeltaQuery));
|
||||
}
|
||||
return getNext();
|
||||
}
|
||||
|
||||
public String getQuery() {
|
||||
String queryString = context.getEntityAttribute(QUERY);
|
||||
if (context.currentProcess() == Context.FULL_DUMP
|
||||
|| !context.isRootEntity()) {
|
||||
return queryString;
|
||||
}
|
||||
return getDeltaImportQuery(queryString);
|
||||
}
|
||||
|
||||
public String getDeltaImportQuery(String queryString) {
|
||||
StringBuffer sb = new StringBuffer(queryString);
|
||||
if (SELECT_WHERE_PATTERN.matcher(queryString).find()) {
|
||||
sb.append(" and ");
|
||||
} else {
|
||||
sb.append(" where ");
|
||||
}
|
||||
boolean first = true;
|
||||
String[] primaryKeys = context.getEntityAttribute("pk").split(",");
|
||||
for (String primaryKey : primaryKeys) {
|
||||
if (!first) {
|
||||
sb.append(" and ");
|
||||
}
|
||||
first = false;
|
||||
Object val = resolver.resolve("dataimporter.delta." + primaryKey);
|
||||
if (val == null) {
|
||||
Matcher m = DOT_PATTERN.matcher(primaryKey);
|
||||
if (m.find()) {
|
||||
val = resolver.resolve("dataimporter.delta." + m.group(1));
|
||||
}
|
||||
}
|
||||
sb.append(primaryKey).append(" = ");
|
||||
if (val instanceof Number) {
|
||||
sb.append(val.toString());
|
||||
} else {
|
||||
sb.append("'").append(val.toString()).append("'");
|
||||
}
|
||||
}
|
||||
return sb.toString();
|
||||
}
|
||||
|
||||
private static Pattern SELECT_WHERE_PATTERN = Pattern.compile(
|
||||
"^\\s*(select\\b.*?\\b)(where).*", Pattern.CASE_INSENSITIVE);
|
||||
|
||||
public static final String QUERY = "query";
|
||||
|
||||
public static final String DELTA_QUERY = "deltaQuery";
|
||||
|
||||
public static final String PARENT_DELTA_QUERY = "parentDeltaQuery";
|
||||
|
||||
public static final String DEL_PK_QUERY = "deletedPkQuery";
|
||||
|
||||
public static final Pattern DOT_PATTERN = Pattern.compile(".*?\\.(.*)$");
|
||||
}
|
|
@ -0,0 +1,115 @@
|
|||
/**
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
package org.apache.solr.handler.dataimport;
|
||||
|
||||
import java.util.ArrayList;
|
||||
import java.util.HashMap;
|
||||
import java.util.List;
|
||||
import java.util.Map;
|
||||
import java.util.regex.Matcher;
|
||||
import java.util.regex.Pattern;
|
||||
|
||||
/**
|
||||
* <p>
|
||||
* Provides functionality for replacing variables in a templatized string. It
|
||||
* can also be used to get the place-holders (variables) in a templatized
|
||||
* string.
|
||||
* </p>
|
||||
* <p/>
|
||||
* <b>This API is experimental and may change in the future.</b>
|
||||
*
|
||||
* @version $Id$
|
||||
* @since solr 1.3
|
||||
*/
|
||||
public class TemplateString {
|
||||
private List<String> variables = new ArrayList<String>();
|
||||
|
||||
private List<String> pcs = new ArrayList<String>();
|
||||
|
||||
private Map<String, TemplateString> cache;
|
||||
|
||||
public TemplateString() {
|
||||
cache = new HashMap<String, TemplateString>();
|
||||
}
|
||||
|
||||
private TemplateString(String s) {
|
||||
Matcher m = WORD_PATTERN.matcher(s);
|
||||
int idx = 0;
|
||||
while (m.find()) {
|
||||
String aparam = s.substring(m.start() + 2, m.end() - 1);
|
||||
variables.add(aparam);
|
||||
pcs.add(s.substring(idx, m.start()));
|
||||
idx = m.end();
|
||||
}
|
||||
pcs.add(s.substring(idx));
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns a string with all variables replaced by the known values. An
|
||||
* unknown variable is replaced by an empty string.
|
||||
*
|
||||
* @param string
|
||||
* @param resolver
|
||||
* @return
|
||||
*/
|
||||
public String replaceTokens(String string, VariableResolver resolver) {
|
||||
TemplateString ts = cache.get(string);
|
||||
if (ts == null) {
|
||||
ts = new TemplateString(string);
|
||||
cache.put(string, ts);
|
||||
}
|
||||
return ts.fillTokens(resolver);
|
||||
}
|
||||
|
||||
private String fillTokens(VariableResolver resolver) {
|
||||
String[] s = new String[variables.size()];
|
||||
for (int i = 0; i < variables.size(); i++) {
|
||||
Object val = resolver.resolve(variables.get(i));
|
||||
s[i] = val == null ? "" : getObjectAsString(val);
|
||||
}
|
||||
|
||||
StringBuffer sb = new StringBuffer();
|
||||
for (int i = 0; i < pcs.size(); i++) {
|
||||
sb.append(pcs.get(i));
|
||||
if (i < s.length) {
|
||||
sb.append(s[i]);
|
||||
}
|
||||
}
|
||||
|
||||
return sb.toString();
|
||||
}
|
||||
|
||||
private String getObjectAsString(Object val) {
|
||||
if (val instanceof java.sql.Date) {
|
||||
java.sql.Date d = (java.sql.Date) val;
|
||||
return DataImporter.DATE_TIME_FORMAT.format(d);
|
||||
}
|
||||
return val.toString();
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns the variables in the given string.
|
||||
*
|
||||
* @param the templatized string
|
||||
* @return the list of variables (strings) in the given templatized string.
|
||||
*/
|
||||
public static List<String> getVariables(String s) {
|
||||
return new TemplateString(s).variables;
|
||||
}
|
||||
|
||||
static final Pattern WORD_PATTERN = Pattern.compile("(\\$\\{.*?\\})");
|
||||
}
|
|
@ -0,0 +1,108 @@
|
|||
/**
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
package org.apache.solr.handler.dataimport;
|
||||
|
||||
import java.util.HashMap;
|
||||
import java.util.List;
|
||||
import java.util.Map;
|
||||
import java.util.logging.Logger;
|
||||
|
||||
/**
|
||||
* <p>
|
||||
* A Transformer which can put values into a column by resolving an expression
|
||||
* containing other columns
|
||||
* </p>
|
||||
* <p/>
|
||||
* <p>
|
||||
* For example:<br />
|
||||
* <field column="name" template="${e.lastName}, ${e.firstName}
|
||||
* ${e.middleName}" /> will produce the name by combining values from
|
||||
* lastName, firstName and middleName fields as given in the template attribute.
|
||||
* </p>
|
||||
* <p/>
|
||||
* <p>
|
||||
* Refer to <a
|
||||
* href="http://wiki.apache.org/solr/DataImportHandler">http://wiki.apache.org/solr/DataImportHandler</a>
|
||||
* for more details.
|
||||
* </p>
|
||||
* <p/>
|
||||
* <b>This API is experimental and may change in the future.</b>
|
||||
*
|
||||
* @version $Id$
|
||||
* @since solr 1.3
|
||||
*/
|
||||
public class TemplateTransformer extends Transformer {
|
||||
|
||||
private static final Logger LOG = Logger.getLogger(TemplateTransformer.class
|
||||
.getName());
|
||||
|
||||
@SuppressWarnings("unchecked")
|
||||
public Object transformRow(Map<String, Object> row, Context context) {
|
||||
|
||||
String entityName = context.getEntityAttribute(DataImporter.NAME);
|
||||
|
||||
VariableResolverImpl resolver = (VariableResolverImpl) context
|
||||
.getVariableResolver();
|
||||
Map<String, Object> resolverMap = (Map<String, Object>) resolver
|
||||
.resolve(entityName);
|
||||
|
||||
// Clone resolver map because the resolver map contains common fields or any
|
||||
// others
|
||||
// that the entity processor chooses to keep.
|
||||
Map<String, Object> resolverMapCopy = new HashMap<String, Object>();
|
||||
if (resolverMap != null) {
|
||||
for (Map.Entry<String, Object> entry : resolverMap.entrySet())
|
||||
resolverMapCopy.put(entry.getKey(), entry.getValue());
|
||||
}
|
||||
// Add current row to the copy of resolver map
|
||||
for (Map.Entry<String, Object> entry : row.entrySet())
|
||||
resolverMapCopy.put(entry.getKey(), entry.getValue());
|
||||
// Add this copy to the namespace of the current entity in the resolver
|
||||
resolver.addNamespace(entityName, resolverMapCopy);
|
||||
|
||||
for (Map<String, String> map : context.getAllEntityFields()) {
|
||||
String expr = map.get(TEMPLATE);
|
||||
if (expr == null)
|
||||
continue;
|
||||
|
||||
String column = map.get(DataImporter.COLUMN);
|
||||
|
||||
// Verify if all variables can be resolved or not
|
||||
boolean resolvable = true;
|
||||
List<String> variables = TemplateString.getVariables(expr);
|
||||
for (String v : variables) {
|
||||
if (resolver.resolve(v) == null) {
|
||||
LOG.warning("Unable to resolve variable: " + v
|
||||
+ " while parsing expression: " + expr);
|
||||
resolvable = false;
|
||||
}
|
||||
}
|
||||
|
||||
if (!resolvable)
|
||||
continue;
|
||||
|
||||
row.put(column, resolver.replaceTokens(expr));
|
||||
}
|
||||
|
||||
// Restore the original resolver map
|
||||
resolver.addNamespace(entityName, resolverMap);
|
||||
|
||||
return row;
|
||||
}
|
||||
|
||||
public static final String TEMPLATE = "template";
|
||||
}
|
|
@ -0,0 +1,52 @@
|
|||
/**
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
package org.apache.solr.handler.dataimport;
|
||||
|
||||
import java.util.Map;
|
||||
|
||||
/**
|
||||
* <p>
|
||||
* Use this API to implement a custom transformer for any given entity
|
||||
* </p>
|
||||
* <p/>
|
||||
* <p>
|
||||
* Implementations of this interface must provide a public no-args constructor.
|
||||
* </p>
|
||||
* <p/>
|
||||
* <p>
|
||||
* Refer to <a
|
||||
* href="http://wiki.apache.org/solr/DataImportHandler">http://wiki.apache.org/solr/DataImportHandler</a>
|
||||
* for more details.
|
||||
* </p>
|
||||
* <p/>
|
||||
* <b>This API is experimental and may change in the future.</b>
|
||||
*
|
||||
* @version $Id$
|
||||
* @since solr 1.3
|
||||
*/
|
||||
public abstract class Transformer {
|
||||
/**
|
||||
* The input is a row of data and the output has to be a new row.
|
||||
*
|
||||
* @param context The current context
|
||||
* @param row A row of data
|
||||
* @return The changed data. It must be a Map<String, Object> if it returns
|
||||
* only one row or if there are multiple rows to be returned it must
|
||||
* be a List<Map<String, Object>>
|
||||
*/
|
||||
public abstract Object transformRow(Map<String, Object> row, Context context);
|
||||
}
|
|
@ -0,0 +1,53 @@
|
|||
/**
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
package org.apache.solr.handler.dataimport;
|
||||
|
||||
/**
|
||||
* <p>
|
||||
* This class is more or less like a Map. But has more intelligence to resolve
|
||||
* namespaces. Namespaces are delimited with '.' (period)
|
||||
* </p>
|
||||
* <p/>
|
||||
* <p>
|
||||
* Refer to <a
|
||||
* href="http://wiki.apache.org/solr/DataImportHandler">http://wiki.apache.org/solr/DataImportHandler</a>
|
||||
* for more details.
|
||||
* </p>
|
||||
* <p/>
|
||||
* <b>This API is experimental and may change in the future.</b>
|
||||
*
|
||||
* @version $Id$
|
||||
* @since solr 1.3
|
||||
*/
|
||||
public abstract class VariableResolver {
|
||||
|
||||
/**
|
||||
* Resolves a given value with a name
|
||||
*
|
||||
* @param name
|
||||
* @return
|
||||
*/
|
||||
public abstract Object resolve(String name);
|
||||
|
||||
/**
|
||||
* Given a String with place holders, replace them with the value tokens.
|
||||
*
|
||||
* @param template
|
||||
* @return the string with the placeholders replaced with their values
|
||||
*/
|
||||
public abstract String replaceTokens(String template);
|
||||
}
|
|
@ -0,0 +1,120 @@
|
|||
/**
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
package org.apache.solr.handler.dataimport;
|
||||
|
||||
import java.util.HashMap;
|
||||
import java.util.Map;
|
||||
import java.util.regex.Pattern;
|
||||
|
||||
/**
|
||||
* <p>
|
||||
* The default implementation of VariableResolver interface
|
||||
* </p>
|
||||
* <p/>
|
||||
* <b>This API is experimental and may change in the future.</b>
|
||||
*
|
||||
* @version $Id$
|
||||
* @see VariableResolver
|
||||
* @since solr 1.3
|
||||
*/
|
||||
public class VariableResolverImpl extends VariableResolver {
|
||||
private Map<String, Object> container = new HashMap<String, Object>();
|
||||
|
||||
private static final TemplateString TEMPLATE_STRING = new TemplateString();
|
||||
|
||||
public VariableResolverImpl() {
|
||||
}
|
||||
|
||||
@SuppressWarnings("unchecked")
|
||||
public VariableResolverImpl addNamespace(String name, Map<String, Object> map) {
|
||||
if (name != null) {
|
||||
String[] parts = DOT_SPLIT.split(name, 0);
|
||||
Map ns = container;
|
||||
for (int i = 0; i < parts.length; i++) {
|
||||
if (i == parts.length - 1) {
|
||||
ns.put(parts[i], map);
|
||||
}
|
||||
if (ns.get(parts[i]) == null) {
|
||||
ns.put(parts[i], new HashMap());
|
||||
ns = (Map) ns.get(parts[i]);
|
||||
} else {
|
||||
if (ns.get(parts[i]) instanceof Map) {
|
||||
ns = (Map) ns.get(parts[i]);
|
||||
} else {
|
||||
ns.put(parts[i], new HashMap());
|
||||
ns = (Map) ns.get(parts[i]);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
} else {
|
||||
container.putAll(map);
|
||||
}
|
||||
return this;
|
||||
|
||||
}
|
||||
|
||||
public void removeNamespace(String name) {
|
||||
if (name != null)
|
||||
container.remove(name);
|
||||
}
|
||||
|
||||
public String replaceTokens(String template) {
|
||||
return TEMPLATE_STRING.replaceTokens(template, this);
|
||||
}
|
||||
|
||||
@SuppressWarnings("unchecked")
|
||||
public Object resolve(String name) {
|
||||
if (name == null)
|
||||
return container;
|
||||
if ("".equals(name))
|
||||
return null;
|
||||
String[] parts = DOT_SPLIT.split(name, 0);
|
||||
Map<String, Object> namespace = container;
|
||||
for (int i = 0; i < parts.length; i++) {
|
||||
String thePart = parts[i];
|
||||
if (i == parts.length - 1) {
|
||||
return namespace.get(thePart);
|
||||
}
|
||||
Object temp = namespace.get(thePart);
|
||||
if (temp == null) {
|
||||
return namespace.get(mergeAll(parts, i));
|
||||
} else {
|
||||
if (temp instanceof Map) {
|
||||
namespace = (Map) temp;
|
||||
} else {
|
||||
return null;
|
||||
}
|
||||
}
|
||||
}
|
||||
return null;
|
||||
}
|
||||
|
||||
private String mergeAll(String[] parts, int i) {
|
||||
if (i == parts.length - 1)
|
||||
return parts[parts.length - 1];
|
||||
StringBuffer sb = new StringBuffer();
|
||||
for (int j = i; j < parts.length; j++) {
|
||||
sb.append(parts[j]);
|
||||
if (j < parts.length - 1)
|
||||
sb.append(".");
|
||||
}
|
||||
return sb.toString();
|
||||
}
|
||||
|
||||
static final Pattern DOT_SPLIT = Pattern.compile("\\.");
|
||||
}
|
|
@ -0,0 +1,329 @@
|
|||
/**
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
package org.apache.solr.handler.dataimport;
|
||||
|
||||
import javax.xml.transform.Source;
|
||||
import javax.xml.transform.TransformerException;
|
||||
import javax.xml.transform.TransformerFactory;
|
||||
import javax.xml.transform.stream.StreamResult;
|
||||
import javax.xml.transform.stream.StreamSource;
|
||||
import java.io.CharArrayReader;
|
||||
import java.io.CharArrayWriter;
|
||||
import java.io.Reader;
|
||||
import java.util.ArrayList;
|
||||
import java.util.HashMap;
|
||||
import java.util.List;
|
||||
import java.util.Map;
|
||||
import java.util.logging.Logger;
|
||||
|
||||
/**
|
||||
* <p>
|
||||
* An implementation of EntityProcessor which uses a streaming xpath parser to
|
||||
* extract values out of XML documents. It is typically used in conjunction with
|
||||
* HttpDataSource or FileDataSource.
|
||||
* </p>
|
||||
* <p/>
|
||||
* <p>
|
||||
* Refer to <a
|
||||
* href="http://wiki.apache.org/solr/DataImportHandler">http://wiki.apache.org/solr/DataImportHandler</a>
|
||||
* for more details.
|
||||
* </p>
|
||||
* <p/>
|
||||
* <b>This API is experimental and may change in the future.</b>
|
||||
*
|
||||
* @version $Id$
|
||||
* @see XPathRecordReader
|
||||
* @since solr 1.3
|
||||
*/
|
||||
public class XPathEntityProcessor extends EntityProcessorBase {
|
||||
private static final Logger LOG = Logger.getLogger(XPathEntityProcessor.class
|
||||
.getName());
|
||||
|
||||
protected List<String> placeHolderVariables;
|
||||
|
||||
protected List<String> commonFields;
|
||||
|
||||
private String pk;
|
||||
|
||||
private XPathRecordReader xpathReader;
|
||||
|
||||
protected DataSource<Reader> dataSource;
|
||||
|
||||
protected javax.xml.transform.Transformer xslTransformer;
|
||||
|
||||
@SuppressWarnings("unchecked")
|
||||
public void init(Context context) {
|
||||
super.init(context);
|
||||
if (xpathReader == null)
|
||||
initXpathReader();
|
||||
pk = context.getEntityAttribute("pk");
|
||||
dataSource = context.getDataSource();
|
||||
|
||||
}
|
||||
|
||||
private void initXpathReader() {
|
||||
boolean useSolrAddXml = Boolean.parseBoolean(context
|
||||
.getEntityAttribute(USE_SOLR_ADD_SCHEMA));
|
||||
String xslt = context.getEntityAttribute(XSL);
|
||||
if (xslt != null) {
|
||||
xslt = resolver.replaceTokens(xslt);
|
||||
try {
|
||||
Source xsltSource = new StreamSource(xslt);
|
||||
// create an instance of TransformerFactory
|
||||
TransformerFactory transFact = TransformerFactory.newInstance();
|
||||
xslTransformer = transFact.newTransformer(xsltSource);
|
||||
LOG
|
||||
.info("Using xslTransformer: "
|
||||
+ xslTransformer.getClass().getName());
|
||||
} catch (Exception e) {
|
||||
throw new DataImportHandlerException(DataImportHandlerException.SEVERE,
|
||||
"Error initializing XSL ", e);
|
||||
}
|
||||
}
|
||||
|
||||
if (useSolrAddXml) {
|
||||
// Support solr add documents
|
||||
xpathReader = new XPathRecordReader("/add/doc");
|
||||
xpathReader.addField("name", "/add/doc/field/@name", true);
|
||||
xpathReader.addField("value", "/add/doc/field", true);
|
||||
} else {
|
||||
String forEachXpath = context.getEntityAttribute(FOR_EACH);
|
||||
if (forEachXpath == null)
|
||||
throw new DataImportHandlerException(DataImportHandlerException.SEVERE,
|
||||
"Entity : " + context.getEntityAttribute("name")
|
||||
+ " must have a 'forEach' attribute");
|
||||
|
||||
try {
|
||||
xpathReader = new XPathRecordReader(forEachXpath);
|
||||
for (Map<String, String> field : context.getAllEntityFields()) {
|
||||
if (field.get(XPATH) == null)
|
||||
continue;
|
||||
xpathReader.addField(field.get(DataImporter.COLUMN),
|
||||
field.get(XPATH), Boolean.parseBoolean(field
|
||||
.get(DataImporter.MULTI_VALUED)));
|
||||
}
|
||||
} catch (RuntimeException e) {
|
||||
throw new DataImportHandlerException(DataImportHandlerException.SEVERE,
|
||||
"Exception while reading xpaths for fields", e);
|
||||
}
|
||||
}
|
||||
|
||||
List<String> l = TemplateString.getVariables(context
|
||||
.getEntityAttribute(URL));
|
||||
for (String s : l) {
|
||||
if (s.startsWith(entityName + ".")) {
|
||||
if (placeHolderVariables == null)
|
||||
placeHolderVariables = new ArrayList<String>();
|
||||
placeHolderVariables.add(s.substring(entityName.length() + 1));
|
||||
}
|
||||
}
|
||||
for (Map<String, String> fld : context.getAllEntityFields()) {
|
||||
if (fld.get(COMMON_FIELD) != null && "true".equals(fld.get(COMMON_FIELD))) {
|
||||
if (commonFields == null)
|
||||
commonFields = new ArrayList<String>();
|
||||
commonFields.add(fld.get(DataImporter.COLUMN));
|
||||
}
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
public Map<String, Object> nextRow() {
|
||||
Map<String, Object> result;
|
||||
|
||||
if (!context.isRootEntity())
|
||||
return fetchNextRow();
|
||||
|
||||
while (true) {
|
||||
result = fetchNextRow();
|
||||
|
||||
if (result == null)
|
||||
return null;
|
||||
|
||||
if (pk == null || result.get(pk) != null)
|
||||
return result;
|
||||
}
|
||||
}
|
||||
|
||||
@SuppressWarnings("unchecked")
|
||||
private Map<String, Object> fetchNextRow() {
|
||||
Map<String, Object> r = null;
|
||||
while (true) {
|
||||
if (rowcache != null)
|
||||
return getFromRowCache();
|
||||
if (rowIterator == null)
|
||||
initQuery(resolver.replaceTokens(context.getEntityAttribute(URL)));
|
||||
r = getNext();
|
||||
if (r == null) {
|
||||
Object hasMore = getSessionAttribute(HAS_MORE);
|
||||
if ("true".equals(hasMore) || Boolean.TRUE.equals(hasMore)) {
|
||||
String url = (String) getSessionAttribute(NEXT_URL);
|
||||
if (url == null)
|
||||
url = context.getEntityAttribute(URL);
|
||||
Map namespace = (Map) getSessionAttribute(entityName);
|
||||
if (namespace != null)
|
||||
resolver.addNamespace(entityName, namespace);
|
||||
clearSession();
|
||||
initQuery(resolver.replaceTokens(url));
|
||||
r = getNext();
|
||||
if (r == null)
|
||||
return null;
|
||||
} else {
|
||||
return null;
|
||||
}
|
||||
}
|
||||
r = applyTransformer(r);
|
||||
if (r != null)
|
||||
return readUsefulVars(r);
|
||||
}
|
||||
}
|
||||
|
||||
private void initQuery(String s) {
|
||||
Reader data = null;
|
||||
try {
|
||||
final List<Map<String, Object>> rows = new ArrayList<Map<String, Object>>();
|
||||
data = dataSource.getData(s);
|
||||
if (xslTransformer != null) {
|
||||
try {
|
||||
SimpleCharArrayReader caw = new SimpleCharArrayReader();
|
||||
xslTransformer.transform(new StreamSource(data),
|
||||
new StreamResult(caw));
|
||||
data = caw.getReader();
|
||||
} catch (TransformerException e) {
|
||||
throw new DataImportHandlerException(
|
||||
DataImportHandlerException.SEVERE,
|
||||
"Exception in applying XSL Transformeation", e);
|
||||
}
|
||||
}
|
||||
final List<Map<String, Object>> solrDocs = new ArrayList<Map<String, Object>>();
|
||||
final boolean useSolrAddXml = Boolean.parseBoolean(context
|
||||
.getEntityAttribute(USE_SOLR_ADD_SCHEMA));
|
||||
xpathReader.streamRecords(data, new XPathRecordReader.Handler() {
|
||||
@SuppressWarnings("unchecked")
|
||||
public void handle(Map<String, Object> record, String xpath) {
|
||||
if (useSolrAddXml) {
|
||||
List<String> names = (List<String>) record.get("name");
|
||||
List<String> values = (List<String>) record.get("value");
|
||||
|
||||
Map<String, Object> row = new HashMap<String, Object>();
|
||||
|
||||
for (int i = 0; i < names.size(); i++) {
|
||||
if (row.containsKey(names.get(i))) {
|
||||
Object existing = row.get(names.get(i));
|
||||
if (existing instanceof List) {
|
||||
List list = (List) existing;
|
||||
list.add(values.get(i));
|
||||
} else {
|
||||
List list = new ArrayList();
|
||||
list.add(existing);
|
||||
list.add(values.get(i));
|
||||
row.put(names.get(i), list);
|
||||
}
|
||||
} else {
|
||||
row.put(names.get(i), values.get(i));
|
||||
}
|
||||
}
|
||||
|
||||
solrDocs.add(row);
|
||||
} else {
|
||||
record.put(XPATH_FIELD_NAME, xpath);
|
||||
rows.add(record);
|
||||
}
|
||||
}
|
||||
});
|
||||
|
||||
if (useSolrAddXml) {
|
||||
rowIterator = solrDocs.iterator();
|
||||
} else {
|
||||
rowIterator = rows.iterator();
|
||||
}
|
||||
} finally {
|
||||
try {
|
||||
data.close();
|
||||
} catch (Exception e) { /* Ignore */
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
private static class SimpleCharArrayReader extends CharArrayWriter {
|
||||
public Reader getReader() {
|
||||
return new CharArrayReader(super.buf, 0, super.count);
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
@SuppressWarnings("unchecked")
|
||||
private Map<String, Object> readUsefulVars(Map<String, Object> r) {
|
||||
Object val = r.get(HAS_MORE);
|
||||
if (val != null)
|
||||
setSessionAttribute(HAS_MORE, val);
|
||||
val = r.get(NEXT_URL);
|
||||
if (val != null)
|
||||
setSessionAttribute(NEXT_URL, val);
|
||||
if (placeHolderVariables != null) {
|
||||
Map namespace = getNameSpace();
|
||||
for (String s : placeHolderVariables) {
|
||||
val = r.get(s);
|
||||
if (val != null)
|
||||
namespace.put(s, val);
|
||||
}
|
||||
}
|
||||
if (commonFields != null) {
|
||||
for (String s : commonFields) {
|
||||
Object commonVal = r.get(s);
|
||||
if (commonVal != null) {
|
||||
setSessionAttribute(s, commonVal);
|
||||
getNameSpace().put(s, commonVal);
|
||||
} else {
|
||||
commonVal = getSessionAttribute(s);
|
||||
if (commonVal != null)
|
||||
r.put(s, commonVal);
|
||||
}
|
||||
}
|
||||
}
|
||||
return r;
|
||||
|
||||
}
|
||||
|
||||
@SuppressWarnings("unchecked")
|
||||
private Map getNameSpace() {
|
||||
Map namespace = (Map) getSessionAttribute(entityName);
|
||||
if (namespace == null) {
|
||||
namespace = new HashMap();
|
||||
setSessionAttribute(entityName, namespace);
|
||||
}
|
||||
return namespace;
|
||||
}
|
||||
|
||||
public static final String URL = "url";
|
||||
|
||||
public static final String HAS_MORE = "$hasMore";
|
||||
|
||||
public static final String NEXT_URL = "$nextUrl";
|
||||
|
||||
public static final String XPATH_FIELD_NAME = "$forEach";
|
||||
|
||||
public static final String FOR_EACH = "forEach";
|
||||
|
||||
public static final String XPATH = "xpath";
|
||||
|
||||
public static final String COMMON_FIELD = "commonField";
|
||||
|
||||
public static final String USE_SOLR_ADD_SCHEMA = "useSolrAddSchema";
|
||||
|
||||
public static final String XSL = "xsl";
|
||||
|
||||
}
|
|
@ -0,0 +1,327 @@
|
|||
/**
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
package org.apache.solr.handler.dataimport;
|
||||
|
||||
import javax.xml.stream.XMLInputFactory;
|
||||
import static javax.xml.stream.XMLStreamConstants.*;
|
||||
import javax.xml.stream.XMLStreamException;
|
||||
import javax.xml.stream.XMLStreamReader;
|
||||
import java.io.IOException;
|
||||
import java.io.Reader;
|
||||
import java.util.*;
|
||||
import java.util.regex.Matcher;
|
||||
import java.util.regex.Pattern;
|
||||
|
||||
/**
|
||||
* <p>
|
||||
* A streaming xpath parser which uses StAX for XML parsing. It supports only a
|
||||
* subset of xpath syntax.
|
||||
* </p>
|
||||
* <p/>
|
||||
* <b>This API is experimental and may change in the future.</b>
|
||||
*
|
||||
* @version $Id$
|
||||
* @since solr 1.3
|
||||
*/
|
||||
public class XPathRecordReader {
|
||||
private Node rootNode = new Node("/", null);
|
||||
|
||||
public XPathRecordReader(String forEachXpath) {
|
||||
String[] splits = forEachXpath.split("\\|");
|
||||
for (String split : splits) {
|
||||
split = split.trim();
|
||||
if (split.length() == 0)
|
||||
continue;
|
||||
addField0(split, split, false, true);
|
||||
}
|
||||
}
|
||||
|
||||
public synchronized XPathRecordReader addField(String name, String xpath,
|
||||
boolean multiValued) {
|
||||
if (!xpath.startsWith("/"))
|
||||
throw new RuntimeException("xpath must start with '/' : " + xpath);
|
||||
addField0(xpath, name, multiValued, false);
|
||||
return this;
|
||||
}
|
||||
|
||||
private void addField0(String xpath, String name, boolean multiValued,
|
||||
boolean isRecord) {
|
||||
List<String> paths = new LinkedList<String>(Arrays.asList(xpath.split("/")));
|
||||
if ("".equals(paths.get(0).trim()))
|
||||
paths.remove(0);
|
||||
rootNode.build(paths, name, multiValued, isRecord);
|
||||
}
|
||||
|
||||
public List<Map<String, Object>> getAllRecords(Reader r) {
|
||||
final List<Map<String, Object>> results = new ArrayList<Map<String, Object>>();
|
||||
streamRecords(r, new Handler() {
|
||||
public void handle(Map<String, Object> record, String s) {
|
||||
results.add(record);
|
||||
}
|
||||
});
|
||||
return results;
|
||||
}
|
||||
|
||||
public void streamRecords(Reader r, Handler handler) {
|
||||
try {
|
||||
XMLStreamReader parser = factory.createXMLStreamReader(r);
|
||||
rootNode.parse(parser, handler, new HashMap<String, Object>(),
|
||||
new Stack<Set<String>>(), false);
|
||||
} catch (Exception e) {
|
||||
throw new RuntimeException(e);
|
||||
}
|
||||
}
|
||||
|
||||
private class Node {
|
||||
String name, fieldName, xpathName, forEachPath;
|
||||
|
||||
List<Node> attributes, childNodes;
|
||||
|
||||
List<Map.Entry<String, String>> attribAndValues;
|
||||
|
||||
Node parent;
|
||||
|
||||
boolean hasText = false, multiValued = false, isRecord = false;
|
||||
|
||||
public Node(String name, Node p) {
|
||||
xpathName = this.name = name;
|
||||
parent = p;
|
||||
}
|
||||
|
||||
public Node(String name, String fieldName, boolean multiValued) {
|
||||
this.name = name;
|
||||
this.fieldName = fieldName;
|
||||
this.multiValued = multiValued;
|
||||
}
|
||||
|
||||
private void parse(XMLStreamReader parser, Handler handler,
|
||||
Map<String, Object> values, Stack<Set<String>> stack,
|
||||
boolean recordStarted) throws IOException, XMLStreamException {
|
||||
Set<String> valuesAddedinThisFrame = null;
|
||||
if (isRecord) {
|
||||
recordStarted = true;
|
||||
valuesAddedinThisFrame = new HashSet<String>();
|
||||
stack.push(valuesAddedinThisFrame);
|
||||
} else if (recordStarted) {
|
||||
valuesAddedinThisFrame = stack.peek();
|
||||
} else {
|
||||
if (attributes != null || hasText)
|
||||
valuesAddedinThisFrame = new HashSet<String>();
|
||||
stack.push(valuesAddedinThisFrame);
|
||||
}
|
||||
try {
|
||||
if (attributes != null) {
|
||||
for (Node node : attributes) {
|
||||
String value = parser.getAttributeValue(null, node.name);
|
||||
if (value != null || (recordStarted && !isRecord)) {
|
||||
putText(values, value, node.fieldName, node.multiValued);
|
||||
valuesAddedinThisFrame.add(node.fieldName);
|
||||
}
|
||||
}
|
||||
}
|
||||
Set<Node> childrenFound = new HashSet<Node>();
|
||||
boolean skipNextEvent = false;
|
||||
int event = -1;
|
||||
while (true) {
|
||||
if (!skipNextEvent) {
|
||||
event = parser.next();
|
||||
skipNextEvent = false;
|
||||
}
|
||||
if (event == END_DOCUMENT) {
|
||||
return;
|
||||
}
|
||||
if (event == END_ELEMENT) {
|
||||
if (isRecord)
|
||||
handler.handle(new HashMap<String, Object>(values), forEachPath);
|
||||
if (recordStarted && !isRecord
|
||||
&& !childrenFound.containsAll(childNodes)) {
|
||||
for (Node n : childNodes) {
|
||||
if (!childrenFound.contains(n))
|
||||
n.putNulls(values);
|
||||
}
|
||||
}
|
||||
return;
|
||||
}
|
||||
if ((event == CDATA || event == CHARACTERS || event == SPACE)
|
||||
&& hasText) {
|
||||
valuesAddedinThisFrame.add(fieldName);
|
||||
skipNextEvent = true;
|
||||
String text = parser.getText();
|
||||
event = parser.next();
|
||||
while (event == CDATA || event == CHARACTERS || event == SPACE) {
|
||||
text = text + parser.getText();
|
||||
event = parser.next();
|
||||
}
|
||||
putText(values, text, fieldName, multiValued);
|
||||
} else if (event == START_ELEMENT) {
|
||||
Node n = getMatchingChild(parser);
|
||||
if (n != null) {
|
||||
childrenFound.add(n);
|
||||
n.parse(parser, handler, values, stack, recordStarted);
|
||||
} else {
|
||||
skipTag(parser);
|
||||
}
|
||||
}
|
||||
}
|
||||
} finally {
|
||||
|
||||
Set<String> cleanThis = null;
|
||||
if (isRecord || !recordStarted) {
|
||||
cleanThis = stack.pop();
|
||||
} else {
|
||||
return;
|
||||
}
|
||||
if (cleanThis != null) {
|
||||
for (String fld : cleanThis) {
|
||||
values.remove(fld);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
private Node getMatchingChild(XMLStreamReader parser) {
|
||||
if (childNodes == null)
|
||||
return null;
|
||||
String localName = parser.getLocalName();
|
||||
for (Node n : childNodes) {
|
||||
if (n.name.equals(localName)) {
|
||||
if (n.attribAndValues == null)
|
||||
return n;
|
||||
if (checkForAttributes(parser, n.attribAndValues))
|
||||
return n;
|
||||
}
|
||||
}
|
||||
return null;
|
||||
}
|
||||
|
||||
private boolean checkForAttributes(XMLStreamReader parser,
|
||||
List<Map.Entry<String, String>> attrs) {
|
||||
for (Map.Entry<String, String> e : attrs) {
|
||||
String val = parser.getAttributeValue(null, e.getKey());
|
||||
if (val == null)
|
||||
return false;
|
||||
if (e.getValue() != null && !e.getValue().equals(val))
|
||||
return false;
|
||||
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
private void putNulls(Map<String, Object> values) {
|
||||
if (attributes != null) {
|
||||
for (Node n : attributes) {
|
||||
if (n.multiValued)
|
||||
putText(values, null, n.fieldName, true);
|
||||
}
|
||||
}
|
||||
if (hasText && multiValued)
|
||||
putText(values, null, fieldName, true);
|
||||
if (childNodes != null) {
|
||||
for (Node childNode : childNodes)
|
||||
childNode.putNulls(values);
|
||||
}
|
||||
}
|
||||
|
||||
@SuppressWarnings("unchecked")
|
||||
private void putText(Map<String, Object> values, String value,
|
||||
String fieldName, boolean multiValued) {
|
||||
if (multiValued) {
|
||||
List<String> v = (List<String>) values.get(fieldName);
|
||||
if (v == null) {
|
||||
v = new ArrayList<String>();
|
||||
values.put(fieldName, v);
|
||||
}
|
||||
v.add(value);
|
||||
} else {
|
||||
values.put(fieldName, value);
|
||||
}
|
||||
}
|
||||
|
||||
private void skipTag(XMLStreamReader parser) throws IOException,
|
||||
XMLStreamException {
|
||||
int type;
|
||||
while ((type = parser.next()) != END_ELEMENT) {
|
||||
if (type == START_ELEMENT)
|
||||
skipTag(parser);
|
||||
}
|
||||
}
|
||||
|
||||
public void build(List<String> paths, String fieldName,
|
||||
boolean multiValued, boolean record) {
|
||||
String name = paths.remove(0);
|
||||
if (paths.isEmpty() && name.startsWith("@")) {
|
||||
if (attributes == null) {
|
||||
attributes = new ArrayList<Node>();
|
||||
}
|
||||
name = name.substring(1);
|
||||
attributes.add(new Node(name, fieldName, multiValued));
|
||||
|
||||
} else {
|
||||
if (childNodes == null)
|
||||
childNodes = new ArrayList<Node>();
|
||||
Node n = getOrAddChildNode(name);
|
||||
if (paths.isEmpty()) {
|
||||
if (record) {
|
||||
n.isRecord = true;
|
||||
n.forEachPath = fieldName;
|
||||
} else {
|
||||
n.hasText = true;
|
||||
n.fieldName = fieldName;
|
||||
n.multiValued = multiValued;
|
||||
}
|
||||
} else {
|
||||
n.build(paths, fieldName, multiValued, record);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
private Node getOrAddChildNode(String xpathName) {
|
||||
for (Node n : childNodes)
|
||||
if (n.xpathName.equals(xpathName))
|
||||
return n;
|
||||
|
||||
Node n = new Node(xpathName, this);
|
||||
Matcher m = ATTRIB_PRESENT_WITHVAL.matcher(xpathName);
|
||||
if (m.find()) {
|
||||
n.name = m.group(1);
|
||||
int start = m.start(2);
|
||||
while (true) {
|
||||
HashMap<String, String> attribs = new HashMap<String, String>();
|
||||
if (!m.find(start))
|
||||
break;
|
||||
attribs.put(m.group(3), m.group(5));
|
||||
start = m.end(6);
|
||||
if (n.attribAndValues == null)
|
||||
n.attribAndValues = new ArrayList<Map.Entry<String, String>>();
|
||||
n.attribAndValues.addAll(attribs.entrySet());
|
||||
|
||||
}
|
||||
}
|
||||
childNodes.add(n);
|
||||
return n;
|
||||
}
|
||||
}
|
||||
|
||||
static XMLInputFactory factory = XMLInputFactory.newInstance();
|
||||
|
||||
public static interface Handler {
|
||||
public void handle(Map<String, Object> record, String xpath);
|
||||
}
|
||||
|
||||
private static final Pattern ATTRIB_PRESENT_WITHVAL = Pattern
|
||||
.compile("(\\S*?)?(\\[@)(\\S*?)(='(.*?)')?(\\])");
|
||||
}
|
|
@ -0,0 +1,24 @@
|
|||
<%@ page contentType="text/html; charset=utf-8" pageEncoding="UTF-8"%>
|
||||
<%--
|
||||
Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
contributor license agreements. See the NOTICE file distributed with
|
||||
this work for additional information regarding copyright ownership.
|
||||
The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
(the "License"); you may not use this file except in compliance with
|
||||
the License. You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
--%>
|
||||
<%-- do a verbatim include so we can use the local vars --%>
|
||||
<html>
|
||||
<frameset cols = "50%, 50%">
|
||||
<frame src ="debug.jsp" />
|
||||
<frame src ="../dataimport?command=full-import&debug=on&verbose=true" name="result"/>
|
||||
</frameset>
|
||||
</html>
|
|
@ -0,0 +1,73 @@
|
|||
<%@ page contentType="text/html; charset=utf-8" pageEncoding="UTF-8"%>
|
||||
<%--
|
||||
Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
contributor license agreements. See the NOTICE file distributed with
|
||||
this work for additional information regarding copyright ownership.
|
||||
The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
(the "License"); you may not use this file except in compliance with
|
||||
the License. You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
--%>
|
||||
<%-- do a verbatim include so we can use the local vars --%>
|
||||
<%@include file="_info.jsp"%>
|
||||
<html>
|
||||
<head>
|
||||
<title>DataImportHandler Interactive Development</title>
|
||||
<link rel="stylesheet" type="text/css" href="solr-admin.css">
|
||||
<link rel="icon" href="favicon.ico" type="image/ico"></link>
|
||||
<link rel="shortcut icon" href="favicon.ico" type="image/ico"></link>
|
||||
<script src="jquery-1.2.3.min.js"></script>
|
||||
</head>
|
||||
<body>
|
||||
<h1>DataImportHandler Development Console</h1>
|
||||
<br />
|
||||
<form action="../dataimport" target="result" method="post">
|
||||
<input type="hidden" name="debug" value="on">
|
||||
<table>
|
||||
<tr>
|
||||
<td colspan="2">
|
||||
<table width="100%">
|
||||
<tr>
|
||||
<td>
|
||||
<select name="command">
|
||||
<option value="full-import" selected="selected">full-import</option>
|
||||
<option value="delta-import">delta-import</option>
|
||||
</select>
|
||||
</td>
|
||||
<td><strong>Verbose</strong> <input
|
||||
name="verbose" type="checkbox"></td>
|
||||
<td><strong>Commit</strong> <input
|
||||
name="commit" type="checkbox"></td>
|
||||
<td><strong>Clean</strong> <input
|
||||
name="clean" type="checkbox"></td>
|
||||
<td><strong>Start Row</strong> <input
|
||||
name="start" size="4" type="text" value="0"></td>
|
||||
<td><strong>No:of Rows</strong> <input name="rows"
|
||||
type="text" size="4" value="10"></td>
|
||||
</tr>
|
||||
</table>
|
||||
</td>
|
||||
<tr>
|
||||
<td><strong>data config xml</strong></td>
|
||||
<td><input class="stdbutton" type="submit" value="debug now">
|
||||
</td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td colspan="2"><textarea id="txtDataConfig" rows="30" cols="80" name="dataConfig"></textarea></td>
|
||||
<script type="text/javascript" language="Javascript">
|
||||
$.get('../dataimport?command=show-config', function(data){
|
||||
$('#txtDataConfig').attr('value', data);
|
||||
});
|
||||
</script>
|
||||
</tr>
|
||||
</table>
|
||||
</form>
|
||||
</body>
|
||||
</html>
|
|
@ -0,0 +1,250 @@
|
|||
/**
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
package org.apache.solr.handler.dataimport;
|
||||
|
||||
import org.junit.Assert;
|
||||
import org.junit.Test;
|
||||
|
||||
import java.util.ArrayList;
|
||||
import java.util.List;
|
||||
import java.util.Map;
|
||||
|
||||
/**
|
||||
* <p>
|
||||
* Test for CachedSqlEntityProcessor
|
||||
* </p>
|
||||
*
|
||||
* @version $Id$
|
||||
* @since solr 1.3
|
||||
*/
|
||||
public class TestCachedSqlEntityProcessor {
|
||||
|
||||
@Test
|
||||
public void withoutWhereClause() {
|
||||
List fields = new ArrayList();
|
||||
fields.add(AbstractDataImportHandlerTest.createMap("column", "id"));
|
||||
fields.add(AbstractDataImportHandlerTest.createMap("column", "desc"));
|
||||
String q = "select * from x where id=${x.id}";
|
||||
Map<String, String> entityAttrs = AbstractDataImportHandlerTest.createMap(
|
||||
"query", q);
|
||||
MockDataSource ds = new MockDataSource();
|
||||
VariableResolverImpl vr = new VariableResolverImpl();
|
||||
|
||||
vr.addNamespace("x", AbstractDataImportHandlerTest.createMap("id", 1));
|
||||
Context context = AbstractDataImportHandlerTest.getContext(null, vr, ds, 0,
|
||||
fields, entityAttrs);
|
||||
List<Map<String, Object>> rows = new ArrayList<Map<String, Object>>();
|
||||
rows.add(AbstractDataImportHandlerTest.createMap("id", 1, "desc", "one"));
|
||||
rows.add(AbstractDataImportHandlerTest.createMap("id", 1, "desc",
|
||||
"another one"));
|
||||
MockDataSource.setIterator(vr.replaceTokens(q), rows.iterator());
|
||||
CachedSqlEntityProcessor csep = new CachedSqlEntityProcessor();
|
||||
csep.init(context);
|
||||
rows = new ArrayList<Map<String, Object>>();
|
||||
while (true) {
|
||||
Map<String, Object> r = csep.nextRow();
|
||||
if (r == null)
|
||||
break;
|
||||
rows.add(r);
|
||||
}
|
||||
Assert.assertEquals(2, rows.size());
|
||||
ds.close();
|
||||
csep.init(context);
|
||||
rows = new ArrayList<Map<String, Object>>();
|
||||
while (true) {
|
||||
Map<String, Object> r = csep.nextRow();
|
||||
if (r == null)
|
||||
break;
|
||||
rows.add(r);
|
||||
}
|
||||
Assert.assertEquals(2, rows.size());
|
||||
Assert.assertEquals(2, rows.get(0).size());
|
||||
Assert.assertEquals(2, rows.get(1).size());
|
||||
}
|
||||
|
||||
@Test
|
||||
public void withoutWhereClauseWithTransformers() {
|
||||
List fields = new ArrayList();
|
||||
fields.add(AbstractDataImportHandlerTest.createMap("column", "id"));
|
||||
fields.add(AbstractDataImportHandlerTest.createMap("column", "desc"));
|
||||
String q = "select * from x where id=${x.id}";
|
||||
Map<String, String> entityAttrs = AbstractDataImportHandlerTest.createMap(
|
||||
"query", q, "transformer", UppercaseTransformer.class.getName());
|
||||
MockDataSource ds = new MockDataSource();
|
||||
VariableResolverImpl vr = new VariableResolverImpl();
|
||||
|
||||
vr.addNamespace("x", AbstractDataImportHandlerTest.createMap("id", 1));
|
||||
Context context = AbstractDataImportHandlerTest.getContext(null, vr, ds, 0,
|
||||
fields, entityAttrs);
|
||||
List<Map<String, Object>> rows = new ArrayList<Map<String, Object>>();
|
||||
rows.add(AbstractDataImportHandlerTest.createMap("id", 1, "desc", "one"));
|
||||
rows.add(AbstractDataImportHandlerTest.createMap("id", 1, "desc",
|
||||
"another one"));
|
||||
MockDataSource.setIterator(vr.replaceTokens(q), rows.iterator());
|
||||
CachedSqlEntityProcessor csep = new CachedSqlEntityProcessor();
|
||||
csep.init(context);
|
||||
rows = new ArrayList<Map<String, Object>>();
|
||||
while (true) {
|
||||
Map<String, Object> r = csep.nextRow();
|
||||
if (r == null)
|
||||
break;
|
||||
rows.add(r);
|
||||
}
|
||||
Assert.assertEquals(2, rows.size());
|
||||
ds.close();
|
||||
csep.init(context);
|
||||
rows = new ArrayList<Map<String, Object>>();
|
||||
while (true) {
|
||||
Map<String, Object> r = csep.nextRow();
|
||||
if (r == null)
|
||||
break;
|
||||
rows.add(r);
|
||||
Assert.assertEquals(r.get("desc").toString().toUpperCase(), r.get("desc"));
|
||||
}
|
||||
Assert.assertEquals(2, rows.size());
|
||||
Assert.assertEquals(2, rows.get(0).size());
|
||||
Assert.assertEquals(2, rows.get(1).size());
|
||||
}
|
||||
|
||||
@Test
|
||||
public void withoutWhereClauseWithMultiRowTransformer() {
|
||||
List fields = new ArrayList();
|
||||
fields.add(AbstractDataImportHandlerTest.createMap("column", "id"));
|
||||
fields.add(AbstractDataImportHandlerTest.createMap("column", "desc"));
|
||||
String q = "select * from x where id=${x.id}";
|
||||
Map<String, String> entityAttrs = AbstractDataImportHandlerTest.createMap(
|
||||
"query", q, "transformer", DoubleTransformer.class.getName());
|
||||
MockDataSource ds = new MockDataSource();
|
||||
VariableResolverImpl vr = new VariableResolverImpl();
|
||||
|
||||
vr.addNamespace("x", AbstractDataImportHandlerTest.createMap("id", 1));
|
||||
Context context = AbstractDataImportHandlerTest.getContext(null, vr, ds, 0,
|
||||
fields, entityAttrs);
|
||||
List<Map<String, Object>> rows = new ArrayList<Map<String, Object>>();
|
||||
rows.add(AbstractDataImportHandlerTest.createMap("id", 1, "desc", "one"));
|
||||
rows.add(AbstractDataImportHandlerTest.createMap("id", 1, "desc",
|
||||
"another one"));
|
||||
MockDataSource.setIterator(vr.replaceTokens(q), rows.iterator());
|
||||
CachedSqlEntityProcessor csep = new CachedSqlEntityProcessor();
|
||||
csep.init(context);
|
||||
rows = new ArrayList<Map<String, Object>>();
|
||||
while (true) {
|
||||
Map<String, Object> r = csep.nextRow();
|
||||
if (r == null)
|
||||
break;
|
||||
rows.add(r);
|
||||
}
|
||||
Assert.assertEquals(4, rows.size());
|
||||
ds.close();
|
||||
csep.init(context);
|
||||
rows = new ArrayList<Map<String, Object>>();
|
||||
while (true) {
|
||||
Map<String, Object> r = csep.nextRow();
|
||||
if (r == null)
|
||||
break;
|
||||
rows.add(r);
|
||||
}
|
||||
Assert.assertEquals(4, rows.size());
|
||||
Assert.assertEquals(2, rows.get(0).size());
|
||||
Assert.assertEquals(2, rows.get(1).size());
|
||||
}
|
||||
|
||||
public static class DoubleTransformer extends Transformer {
|
||||
|
||||
public Object transformRow(Map<String, Object> row, Context context) {
|
||||
List<Map<String, Object>> rows = new ArrayList<Map<String, Object>>();
|
||||
rows.add(row);
|
||||
rows.add(row);
|
||||
|
||||
return rows;
|
||||
}
|
||||
}
|
||||
|
||||
public static class UppercaseTransformer extends Transformer {
|
||||
|
||||
public Object transformRow(Map<String, Object> row, Context context) {
|
||||
for (Map.Entry<String, Object> entry : row.entrySet()) {
|
||||
Object val = entry.getValue();
|
||||
if (val instanceof String) {
|
||||
String s = (String) val;
|
||||
entry.setValue(s.toUpperCase());
|
||||
}
|
||||
}
|
||||
return row;
|
||||
}
|
||||
}
|
||||
|
||||
@Test
|
||||
public void withWhereClause() {
|
||||
List fields = new ArrayList();
|
||||
fields.add(AbstractDataImportHandlerTest.createMap("column", "id"));
|
||||
fields.add(AbstractDataImportHandlerTest.createMap("column", "desc"));
|
||||
String q = "select * from x";
|
||||
Map<String, String> entityAttrs = AbstractDataImportHandlerTest.createMap(
|
||||
"query", q, "where", "id=x.id");
|
||||
MockDataSource ds = new MockDataSource();
|
||||
VariableResolverImpl vr = new VariableResolverImpl();
|
||||
Map xNamespace = AbstractDataImportHandlerTest.createMap("id", 0);
|
||||
vr.addNamespace("x", xNamespace);
|
||||
Context context = AbstractDataImportHandlerTest.getContext(null, vr, ds, 0,
|
||||
fields, entityAttrs);
|
||||
List<Map<String, Object>> rows = new ArrayList<Map<String, Object>>();
|
||||
rows.add(AbstractDataImportHandlerTest.createMap("id", 1, "desc", "one"));
|
||||
rows.add(AbstractDataImportHandlerTest.createMap("id", 2, "desc", "two"));
|
||||
rows.add(AbstractDataImportHandlerTest.createMap("id", 2, "desc",
|
||||
"another two"));
|
||||
rows.add(AbstractDataImportHandlerTest.createMap("id", 3, "desc", "three"));
|
||||
rows.add(AbstractDataImportHandlerTest.createMap("id", 3, "desc",
|
||||
"another three"));
|
||||
rows.add(AbstractDataImportHandlerTest.createMap("id", 3, "desc",
|
||||
"another another three"));
|
||||
MockDataSource.setIterator(q, rows.iterator());
|
||||
CachedSqlEntityProcessor csep = new CachedSqlEntityProcessor();
|
||||
csep.init(context);
|
||||
rows = new ArrayList<Map<String, Object>>();
|
||||
while (true) {
|
||||
Map<String, Object> r = csep.nextRow();
|
||||
if (r == null)
|
||||
break;
|
||||
rows.add(r);
|
||||
}
|
||||
Assert.assertEquals(0, rows.size());
|
||||
ds.close();
|
||||
|
||||
csep.init(context);
|
||||
rows = new ArrayList<Map<String, Object>>();
|
||||
xNamespace.put("id", 2);
|
||||
while (true) {
|
||||
Map<String, Object> r = csep.nextRow();
|
||||
if (r == null)
|
||||
break;
|
||||
rows.add(r);
|
||||
}
|
||||
Assert.assertEquals(2, rows.size());
|
||||
|
||||
csep.init(context);
|
||||
rows = new ArrayList<Map<String, Object>>();
|
||||
xNamespace.put("id", 3);
|
||||
while (true) {
|
||||
Map<String, Object> r = csep.nextRow();
|
||||
if (r == null)
|
||||
break;
|
||||
rows.add(r);
|
||||
}
|
||||
Assert.assertEquals(3, rows.size());
|
||||
}
|
||||
}
|
|
@ -0,0 +1,92 @@
|
|||
/**
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
package org.apache.solr.handler.dataimport;
|
||||
|
||||
import junit.framework.Assert;
|
||||
import org.junit.Test;
|
||||
import org.w3c.dom.Document;
|
||||
|
||||
import javax.xml.parsers.DocumentBuilderFactory;
|
||||
import java.io.ByteArrayInputStream;
|
||||
import java.util.ArrayList;
|
||||
import java.util.List;
|
||||
|
||||
/**
|
||||
* <p>
|
||||
* Test for DataConfig
|
||||
* </p>
|
||||
*
|
||||
* @version $Id$
|
||||
* @since solr 1.3
|
||||
*/
|
||||
public class TestDataConfig extends AbstractDataImportHandlerTest {
|
||||
|
||||
@Override
|
||||
public void setUp() throws Exception {
|
||||
super.setUp();
|
||||
}
|
||||
|
||||
@Override
|
||||
public void tearDown() throws Exception {
|
||||
super.tearDown();
|
||||
}
|
||||
|
||||
@Override
|
||||
public String getSchemaFile() {
|
||||
return "dataimport-schema.xml";
|
||||
}
|
||||
|
||||
@Override
|
||||
public String getSolrConfigFile() {
|
||||
return "dataimport-nodatasource-solrconfig.xml";
|
||||
}
|
||||
|
||||
@Test
|
||||
@SuppressWarnings("unchecked")
|
||||
public void testDataConfigWithDataSource() throws Exception {
|
||||
List rows = new ArrayList();
|
||||
rows.add(createMap("id", "1", "desc", "one"));
|
||||
MockDataSource.setIterator("select * from x", rows.iterator());
|
||||
|
||||
super.runFullImport(loadDataConfig("data-config-with-datasource.xml"));
|
||||
|
||||
assertQ(req("id:1"), "//*[@numFound='1']");
|
||||
}
|
||||
|
||||
@Test
|
||||
public void basic() throws Exception {
|
||||
javax.xml.parsers.DocumentBuilder builder = DocumentBuilderFactory
|
||||
.newInstance().newDocumentBuilder();
|
||||
Document doc = builder.parse(new ByteArrayInputStream(xml.getBytes()));
|
||||
|
||||
DataConfig dc = new DataConfig();
|
||||
dc.readFromXml(doc.getDocumentElement());
|
||||
Assert.assertEquals("atrimlisting",
|
||||
dc.documents.get(0).entities.get(0).name);
|
||||
}
|
||||
|
||||
private static final String xml = "<dataConfig>\n"
|
||||
+ "\t<document name=\"autos\" >\n"
|
||||
+ "\t\t<entity name=\"atrimlisting\" pk=\"acode\"\n"
|
||||
+ "\t\t\tquery=\"select acode,make,model,year,msrp,category,image,izmo_image_url,price_range_low,price_range_high,invoice_range_low,invoice_range_high from atrimlisting\"\n"
|
||||
+ "\t\t\tdeltaQuery=\"select acode from atrimlisting where last_modified > '${indexer.last_index_time}'\">\n"
|
||||
+
|
||||
|
||||
"\t\t</entity>\n" +
|
||||
|
||||
"\t</document>\n" + "</dataConfig>";
|
||||
}
|
|
@ -0,0 +1,92 @@
|
|||
/**
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
package org.apache.solr.handler.dataimport;
|
||||
|
||||
import org.junit.Assert;
|
||||
import org.junit.Test;
|
||||
|
||||
import java.text.SimpleDateFormat;
|
||||
import java.util.*;
|
||||
|
||||
/**
|
||||
* <p>
|
||||
* Test for DateFormatTransformer
|
||||
* </p>
|
||||
*
|
||||
* @version $Id$
|
||||
* @since solr 1.3
|
||||
*/
|
||||
public class TestDateFormatTransformer {
|
||||
|
||||
@Test
|
||||
@SuppressWarnings("unchecked")
|
||||
public void testTransformRow_SingleRow() throws Exception {
|
||||
List fields = new ArrayList();
|
||||
fields.add(AbstractDataImportHandlerTest.createMap(DataImporter.COLUMN,
|
||||
"lastModified"));
|
||||
fields.add(AbstractDataImportHandlerTest.createMap(DataImporter.COLUMN,
|
||||
"dateAdded", RegexTransformer.SRC_COL_NAME, "lastModified",
|
||||
DateFormatTransformer.DATE_TIME_FMT, "MM/dd/yyyy"));
|
||||
|
||||
SimpleDateFormat format = new SimpleDateFormat("MM/dd/yyyy");
|
||||
Date now = format.parse(format.format(new Date()));
|
||||
|
||||
Map row = AbstractDataImportHandlerTest.createMap("lastModified", format
|
||||
.format(now));
|
||||
|
||||
VariableResolverImpl resolver = new VariableResolverImpl();
|
||||
resolver.addNamespace("e", row);
|
||||
|
||||
Context context = AbstractDataImportHandlerTest.getContext(null, resolver,
|
||||
null, 0, fields, null);
|
||||
new DateFormatTransformer().transformRow(row, context);
|
||||
Assert.assertEquals(now, row.get("dateAdded"));
|
||||
}
|
||||
|
||||
@Test
|
||||
@SuppressWarnings("unchecked")
|
||||
public void testTransformRow_MultipleRows() throws Exception {
|
||||
List fields = new ArrayList();
|
||||
fields.add(AbstractDataImportHandlerTest.createMap(DataImporter.COLUMN,
|
||||
"lastModified"));
|
||||
fields.add(AbstractDataImportHandlerTest.createMap(DataImporter.COLUMN,
|
||||
"dateAdded", RegexTransformer.SRC_COL_NAME, "lastModified",
|
||||
DateFormatTransformer.DATE_TIME_FMT, "MM/dd/yyyy hh:mm:ss.SSS"));
|
||||
|
||||
SimpleDateFormat format = new SimpleDateFormat("MM/dd/yyyy hh:mm:ss.SSS");
|
||||
Date now1 = format.parse(format.format(new Date()));
|
||||
Date now2 = format.parse(format.format(new Date()));
|
||||
|
||||
Map row = new HashMap();
|
||||
List list = new ArrayList();
|
||||
list.add(format.format(now1));
|
||||
list.add(format.format(now2));
|
||||
row.put("lastModified", list);
|
||||
|
||||
VariableResolverImpl resolver = new VariableResolverImpl();
|
||||
resolver.addNamespace("e", row);
|
||||
|
||||
Context context = AbstractDataImportHandlerTest.getContext(null, resolver,
|
||||
null, 0, fields, null);
|
||||
new DateFormatTransformer().transformRow(row, context);
|
||||
List output = new ArrayList();
|
||||
output.add(now1);
|
||||
output.add(now2);
|
||||
Assert.assertEquals(output, row.get("dateAdded"));
|
||||
}
|
||||
|
||||
}
|
|
@ -0,0 +1,201 @@
|
|||
/**
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
package org.apache.solr.handler.dataimport;
|
||||
|
||||
import org.apache.solr.common.SolrInputDocument;
|
||||
import static org.apache.solr.handler.dataimport.AbstractDataImportHandlerTest.createMap;
|
||||
import org.junit.Assert;
|
||||
import org.junit.Test;
|
||||
|
||||
import java.util.ArrayList;
|
||||
import java.util.Collections;
|
||||
import java.util.List;
|
||||
import java.util.Map;
|
||||
|
||||
/**
|
||||
* <p>
|
||||
* Test for DocBuilder
|
||||
* </p>
|
||||
*
|
||||
* @version $Id$
|
||||
* @since solr 1.3
|
||||
*/
|
||||
public class TestDocBuilder {
|
||||
|
||||
@Test
|
||||
public void loadClass() throws Exception {
|
||||
Class clz = DocBuilder.loadClass("RegexTransformer");
|
||||
Assert.assertNotNull(clz);
|
||||
}
|
||||
|
||||
@Test
|
||||
public void singleEntityNoRows() {
|
||||
try {
|
||||
DataImporter di = new DataImporter();
|
||||
di.loadDataConfig(dc_singleEntity);
|
||||
DataConfig cfg = di.getConfig();
|
||||
DataConfig.Entity ent = cfg.documents.get(0).entities.get(0);
|
||||
for (DataConfig.Field field : ent.fields) {
|
||||
field.nameOrColName = field.name = field.column;
|
||||
}
|
||||
MockDataSource.setIterator("select * from x", new ArrayList().iterator());
|
||||
ent.dataSrc = new MockDataSource();
|
||||
ent.isDocRoot = true;
|
||||
DataImporter.RequestParams rp = new DataImporter.RequestParams();
|
||||
rp.command = "full-import";
|
||||
SolrWriterImpl swi = new SolrWriterImpl();
|
||||
di.rumCmd(rp, swi, Collections.EMPTY_MAP);
|
||||
Assert.assertEquals(Boolean.TRUE, swi.deleteAllCalled);
|
||||
Assert.assertEquals(Boolean.TRUE, swi.commitCalled);
|
||||
Assert.assertEquals(0, swi.docs.size());
|
||||
Assert.assertEquals(1, di.getDocBuilder().importStatistics.queryCount
|
||||
.get());
|
||||
Assert
|
||||
.assertEquals(0, di.getDocBuilder().importStatistics.docCount.get());
|
||||
Assert.assertEquals(0, di.getDocBuilder().importStatistics.rowsCount
|
||||
.get());
|
||||
} finally {
|
||||
MockDataSource.clearCache();
|
||||
}
|
||||
}
|
||||
|
||||
@Test
|
||||
public void singleEntityOneRow() {
|
||||
try {
|
||||
DataImporter di = new DataImporter();
|
||||
di.loadDataConfig(dc_singleEntity);
|
||||
DataConfig cfg = di.getConfig();
|
||||
DataConfig.Entity ent = cfg.documents.get(0).entities.get(0);
|
||||
for (DataConfig.Field field : ent.fields) {
|
||||
field.nameOrColName = field.name = field.column;
|
||||
}
|
||||
List l = new ArrayList();
|
||||
l.add(createMap("id", 1, "desc", "one"));
|
||||
MockDataSource.setIterator("select * from x", l.iterator());
|
||||
ent.dataSrc = new MockDataSource();
|
||||
ent.isDocRoot = true;
|
||||
DataImporter.RequestParams rp = new DataImporter.RequestParams();
|
||||
rp.command = "full-import";
|
||||
SolrWriterImpl swi = new SolrWriterImpl();
|
||||
di.rumCmd(rp, swi, Collections.EMPTY_MAP);
|
||||
Assert.assertEquals(Boolean.TRUE, swi.deleteAllCalled);
|
||||
Assert.assertEquals(Boolean.TRUE, swi.commitCalled);
|
||||
Assert.assertEquals(1, swi.docs.size());
|
||||
Assert.assertEquals(1, di.getDocBuilder().importStatistics.queryCount
|
||||
.get());
|
||||
Assert
|
||||
.assertEquals(1, di.getDocBuilder().importStatistics.docCount.get());
|
||||
Assert.assertEquals(1, di.getDocBuilder().importStatistics.rowsCount
|
||||
.get());
|
||||
|
||||
for (int i = 0; i < l.size(); i++) {
|
||||
Map<String, Object> map = (Map<String, Object>) l.get(i);
|
||||
SolrInputDocument doc = swi.docs.get(i);
|
||||
for (Map.Entry<String, Object> entry : map.entrySet()) {
|
||||
Assert.assertEquals(entry.getValue(), doc.getFieldValue(entry
|
||||
.getKey()));
|
||||
}
|
||||
}
|
||||
} finally {
|
||||
MockDataSource.clearCache();
|
||||
}
|
||||
}
|
||||
|
||||
@Test
|
||||
public void singleEntityMultipleRows() {
|
||||
try {
|
||||
DataImporter di = new DataImporter();
|
||||
di.loadDataConfig(dc_singleEntity);
|
||||
DataConfig cfg = di.getConfig();
|
||||
DataConfig.Entity ent = cfg.documents.get(0).entities.get(0);
|
||||
ent.isDocRoot = true;
|
||||
DataImporter.RequestParams rp = new DataImporter.RequestParams();
|
||||
rp.command = "full-import";
|
||||
for (DataConfig.Field field : ent.fields) {
|
||||
field.nameOrColName = field.name = field.column;
|
||||
}
|
||||
List l = new ArrayList();
|
||||
l.add(createMap("id", 1, "desc", "one"));
|
||||
l.add(createMap("id", 2, "desc", "two"));
|
||||
l.add(createMap("id", 3, "desc", "three"));
|
||||
|
||||
MockDataSource.setIterator("select * from x", l.iterator());
|
||||
ent.dataSrc = new MockDataSource();
|
||||
SolrWriterImpl swi = new SolrWriterImpl();
|
||||
di.rumCmd(rp, swi, Collections.EMPTY_MAP);
|
||||
Assert.assertEquals(Boolean.TRUE, swi.deleteAllCalled);
|
||||
Assert.assertEquals(Boolean.TRUE, swi.commitCalled);
|
||||
Assert.assertEquals(3, swi.docs.size());
|
||||
for (int i = 0; i < l.size(); i++) {
|
||||
Map<String, Object> map = (Map<String, Object>) l.get(i);
|
||||
SolrInputDocument doc = swi.docs.get(i);
|
||||
for (Map.Entry<String, Object> entry : map.entrySet()) {
|
||||
Assert.assertEquals(entry.getValue(), doc.getFieldValue(entry
|
||||
.getKey()));
|
||||
}
|
||||
}
|
||||
Assert.assertEquals(1, di.getDocBuilder().importStatistics.queryCount
|
||||
.get());
|
||||
Assert
|
||||
.assertEquals(3, di.getDocBuilder().importStatistics.docCount.get());
|
||||
Assert.assertEquals(3, di.getDocBuilder().importStatistics.rowsCount
|
||||
.get());
|
||||
} finally {
|
||||
MockDataSource.clearCache();
|
||||
}
|
||||
}
|
||||
|
||||
static class SolrWriterImpl extends SolrWriter {
|
||||
List<SolrInputDocument> docs = new ArrayList<SolrInputDocument>();
|
||||
|
||||
Boolean deleteAllCalled;
|
||||
|
||||
Boolean commitCalled;
|
||||
|
||||
public SolrWriterImpl() {
|
||||
super(null, ".");
|
||||
}
|
||||
|
||||
public SolrDoc getSolrDocInstance() {
|
||||
return new DataImportHandler.SolrDocumentWrapper();
|
||||
}
|
||||
|
||||
public boolean upload(SolrDoc d) {
|
||||
return docs.add(((DataImportHandler.SolrDocumentWrapper) d).doc);
|
||||
}
|
||||
|
||||
public void log(int event, String name, Object row) {
|
||||
// Do nothing
|
||||
}
|
||||
|
||||
public void doDeleteAll() {
|
||||
deleteAllCalled = Boolean.TRUE;
|
||||
}
|
||||
|
||||
public void commit(boolean b) {
|
||||
commitCalled = Boolean.TRUE;
|
||||
}
|
||||
}
|
||||
|
||||
public static final String dc_singleEntity = "<dataConfig>\n"
|
||||
+ " <document name=\"X\" >\n"
|
||||
+ " <entity name=\"x\" query=\"select * from x\">\n"
|
||||
+ " <field column=\"id\"/>\n"
|
||||
+ " <field column=\"desc\"/>\n" + " </entity>\n"
|
||||
+ " </document>\n" + "</dataConfig>";
|
||||
|
||||
}
|
|
@ -0,0 +1,68 @@
|
|||
/**
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
package org.apache.solr.handler.dataimport;
|
||||
|
||||
import org.junit.After;
|
||||
import org.junit.Before;
|
||||
import org.junit.Test;
|
||||
|
||||
import java.util.ArrayList;
|
||||
import java.util.List;
|
||||
|
||||
/**
|
||||
* <p>
|
||||
* Test for DocBuilder using the test harness
|
||||
* </p>
|
||||
*
|
||||
* @version $Id$
|
||||
* @since solr 1.3
|
||||
*/
|
||||
public class TestDocBuilder2 extends AbstractDataImportHandlerTest {
|
||||
|
||||
@Before
|
||||
public void setUp() throws Exception {
|
||||
super.setUp();
|
||||
}
|
||||
|
||||
@After
|
||||
public void tearDown() throws Exception {
|
||||
super.tearDown();
|
||||
}
|
||||
|
||||
@Override
|
||||
public String getSchemaFile() {
|
||||
return "dataimport-schema.xml";
|
||||
}
|
||||
|
||||
@Override
|
||||
public String getSolrConfigFile() {
|
||||
return "dataimport-solrconfig.xml";
|
||||
}
|
||||
|
||||
@Test
|
||||
@SuppressWarnings("unchecked")
|
||||
public void testSingleEntity() throws Exception {
|
||||
List rows = new ArrayList();
|
||||
rows.add(createMap("id", "1", "desc", "one"));
|
||||
MockDataSource.setIterator("select * from x", rows.iterator());
|
||||
|
||||
super.runFullImport(loadDataConfig("single-entity-data-config.xml"));
|
||||
|
||||
assertQ(req("id:1"), "//*[@numFound='1']");
|
||||
}
|
||||
|
||||
}
|
|
@ -0,0 +1,83 @@
|
|||
/**
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
package org.apache.solr.handler.dataimport;
|
||||
|
||||
import org.junit.Assert;
|
||||
import org.junit.Test;
|
||||
|
||||
import java.util.ArrayList;
|
||||
import java.util.HashMap;
|
||||
import java.util.List;
|
||||
import java.util.Map;
|
||||
|
||||
/**
|
||||
* <p>
|
||||
* Test for EntityProcessorBase
|
||||
* </p>
|
||||
*
|
||||
* @version $Id$
|
||||
* @since solr 1.3
|
||||
*/
|
||||
public class TestEntityProcessorBase {
|
||||
|
||||
@Test
|
||||
public void multiTransformer() {
|
||||
List<Map<String, String>> fields = new ArrayList<Map<String, String>>();
|
||||
Map<String, String> entity = new HashMap<String, String>();
|
||||
entity.put("transformer", T1.class.getName() + "," + T2.class.getName()
|
||||
+ "," + T3.class.getName());
|
||||
fields.add(TestRegexTransformer.getField("A", null, null, null, null));
|
||||
fields.add(TestRegexTransformer.getField("B", null, null, null, null));
|
||||
|
||||
Context context = AbstractDataImportHandlerTest.getContext(null, null,
|
||||
null, 0, fields, entity);
|
||||
Map<String, Object> src = new HashMap<String, Object>();
|
||||
src.put("A", "NA");
|
||||
src.put("B", "NA");
|
||||
SqlEntityProcessor sep = new SqlEntityProcessor();
|
||||
sep.init(context);
|
||||
Map<String, Object> res = sep.applyTransformer(src);
|
||||
Assert.assertNotNull(res.get("T1"));
|
||||
Assert.assertNotNull(res.get("T2"));
|
||||
Assert.assertNotNull(res.get("T3"));
|
||||
}
|
||||
|
||||
static class T1 extends Transformer {
|
||||
|
||||
public Object transformRow(Map<String, Object> aRow, Context context) {
|
||||
aRow.put("T1", "T1 called");
|
||||
return aRow;
|
||||
|
||||
}
|
||||
}
|
||||
|
||||
static class T2 extends Transformer {
|
||||
|
||||
public Object transformRow(Map<String, Object> aRow, Context context) {
|
||||
aRow.put("T2", "T2 called");
|
||||
return aRow;
|
||||
}
|
||||
}
|
||||
|
||||
static class T3 {
|
||||
|
||||
public Object transformRow(Map<String, Object> aRow) {
|
||||
aRow.put("T3", "T3 called");
|
||||
return aRow;
|
||||
}
|
||||
}
|
||||
}
|
|
@ -0,0 +1,118 @@
|
|||
/**
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
package org.apache.solr.handler.dataimport;
|
||||
|
||||
import static org.junit.Assert.assertEquals;
|
||||
import org.junit.Before;
|
||||
import org.junit.Ignore;
|
||||
import org.junit.Test;
|
||||
|
||||
import java.net.URLEncoder;
|
||||
import java.text.SimpleDateFormat;
|
||||
import java.util.Date;
|
||||
import java.util.HashMap;
|
||||
import java.util.Map;
|
||||
|
||||
/**
|
||||
* <p>
|
||||
* Test for EvaluatorBag
|
||||
* </p>
|
||||
*
|
||||
* @version $Id$
|
||||
* @since solr 1.3
|
||||
*/
|
||||
public class TestEvaluatorBag {
|
||||
private static final String ENCODING = "UTF-8";
|
||||
|
||||
VariableResolverImpl resolver;
|
||||
|
||||
Map<String, String> sqlTests;
|
||||
|
||||
Map<String, String> urlTests;
|
||||
|
||||
@Before
|
||||
public void setUp() throws Exception {
|
||||
resolver = new VariableResolverImpl();
|
||||
|
||||
sqlTests = new HashMap<String, String>();
|
||||
|
||||
sqlTests.put("foo\"", "foo\"\"");
|
||||
sqlTests.put("foo'", "foo''");
|
||||
sqlTests.put("foo''", "foo''''");
|
||||
sqlTests.put("'foo\"", "''foo\"\"");
|
||||
sqlTests.put("\"Albert D'souza\"", "\"\"Albert D''souza\"\"");
|
||||
|
||||
urlTests = new HashMap<String, String>();
|
||||
|
||||
urlTests.put("*:*", URLEncoder.encode("*:*", ENCODING));
|
||||
urlTests.put("price:[* TO 200]", URLEncoder.encode("price:[* TO 200]",
|
||||
ENCODING));
|
||||
urlTests.put("review:\"hybrid sedan\"", URLEncoder.encode(
|
||||
"review:\"hybrid sedan\"", ENCODING));
|
||||
}
|
||||
|
||||
/**
|
||||
* Test method for
|
||||
* {@link EvaluatorBag#getSqlEscapingEvaluator()}.
|
||||
*/
|
||||
@Test
|
||||
public void testGetSqlEscapingEvaluator() {
|
||||
Evaluator sqlEscaper = EvaluatorBag.getSqlEscapingEvaluator();
|
||||
runTests(sqlTests, sqlEscaper);
|
||||
}
|
||||
|
||||
/**
|
||||
* Test method for
|
||||
* {@link EvaluatorBag#getUrlEvaluator()}.
|
||||
*/
|
||||
@Test
|
||||
public void testGetUrlEvaluator() throws Exception {
|
||||
Evaluator urlEvaluator = EvaluatorBag.getUrlEvaluator();
|
||||
runTests(urlTests, urlEvaluator);
|
||||
}
|
||||
|
||||
/**
|
||||
* Test method for
|
||||
* {@link EvaluatorBag#getDateFormatEvaluator()}.
|
||||
*/
|
||||
@Test
|
||||
@Ignore
|
||||
public void testGetDateFormatEvaluator() {
|
||||
Evaluator dateFormatEval = EvaluatorBag.getDateFormatEvaluator();
|
||||
assertEquals(new SimpleDateFormat("yyyy-MM-dd").format(new Date()),
|
||||
dateFormatEval.evaluate(resolver, "'NOW',yyyy-MM-dd HH:mm"));
|
||||
|
||||
Map<String, Object> map = new HashMap<String, Object>();
|
||||
map.put("key", new Date());
|
||||
resolver.addNamespace("A", map);
|
||||
|
||||
assertEquals(new SimpleDateFormat("yyyy-MM-dd HH:mm").format(new Date()),
|
||||
dateFormatEval.evaluate(resolver, "A.key, yyyy-MM-dd HH:mm"));
|
||||
}
|
||||
|
||||
private void runTests(Map<String, String> tests, Evaluator evaluator) {
|
||||
for (Map.Entry<String, String> entry : tests.entrySet()) {
|
||||
Map<String, Object> values = new HashMap<String, Object>();
|
||||
values.put("key", entry.getKey());
|
||||
resolver.addNamespace("A", values);
|
||||
|
||||
String expected = (String) entry.getValue();
|
||||
String actual = evaluator.evaluate(resolver, "A.key");
|
||||
assertEquals(expected, actual);
|
||||
}
|
||||
}
|
||||
}
|
|
@ -0,0 +1,108 @@
|
|||
package org.apache.solr.handler.dataimport;
|
||||
|
||||
import org.junit.Assert;
|
||||
import org.junit.Test;
|
||||
|
||||
import java.io.File;
|
||||
import java.io.FileOutputStream;
|
||||
import java.io.IOException;
|
||||
import java.util.ArrayList;
|
||||
import java.util.Collections;
|
||||
import java.util.List;
|
||||
import java.util.Map;
|
||||
|
||||
/**
|
||||
* <p>
|
||||
* Test for FileListEntityProcessor
|
||||
* </p>
|
||||
*
|
||||
* @version $Id$
|
||||
* @since solr 1.3
|
||||
*/
|
||||
public class TestFileListEntityProcessor {
|
||||
|
||||
@Test
|
||||
@SuppressWarnings("unchecked")
|
||||
public void testSimple() throws IOException {
|
||||
long time = System.currentTimeMillis();
|
||||
File tmpdir = new File("." + time);
|
||||
tmpdir.mkdir();
|
||||
tmpdir.deleteOnExit();
|
||||
createFile(tmpdir, "a.xml", "a.xml".getBytes(), false);
|
||||
createFile(tmpdir, "b.xml", "b.xml".getBytes(), false);
|
||||
createFile(tmpdir, "c.props", "c.props".getBytes(), false);
|
||||
Map attrs = AbstractDataImportHandlerTest.createMap(
|
||||
FileListEntityProcessor.FILE_NAME, "xml$",
|
||||
FileListEntityProcessor.BASE_DIR, tmpdir.getAbsolutePath());
|
||||
Context c = AbstractDataImportHandlerTest.getContext(null,
|
||||
new VariableResolverImpl(), null, 0, Collections.EMPTY_LIST, attrs);
|
||||
FileListEntityProcessor fileListEntityProcessor = new FileListEntityProcessor();
|
||||
fileListEntityProcessor.init(c);
|
||||
List<String> fList = new ArrayList<String>();
|
||||
while (true) {
|
||||
Map<String, Object> f = fileListEntityProcessor.nextRow();
|
||||
if (f == null)
|
||||
break;
|
||||
fList.add((String) f.get(FileListEntityProcessor.ABSOLUTE_FILE));
|
||||
}
|
||||
Assert.assertEquals(2, fList.size());
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testNTOT() throws IOException {
|
||||
long time = System.currentTimeMillis();
|
||||
File tmpdir = new File("." + time);
|
||||
tmpdir.mkdir();
|
||||
tmpdir.deleteOnExit();
|
||||
createFile(tmpdir, "a.xml", "a.xml".getBytes(), true);
|
||||
createFile(tmpdir, "b.xml", "b.xml".getBytes(), true);
|
||||
createFile(tmpdir, "c.props", "c.props".getBytes(), true);
|
||||
Map attrs = AbstractDataImportHandlerTest.createMap(
|
||||
FileListEntityProcessor.FILE_NAME, "xml$",
|
||||
FileListEntityProcessor.BASE_DIR, tmpdir.getAbsolutePath(),
|
||||
FileListEntityProcessor.OLDER_THAN, "'NOW'");
|
||||
Context c = AbstractDataImportHandlerTest.getContext(null,
|
||||
new VariableResolverImpl(), null, 0, Collections.EMPTY_LIST, attrs);
|
||||
FileListEntityProcessor fileListEntityProcessor = new FileListEntityProcessor();
|
||||
fileListEntityProcessor.init(c);
|
||||
List<String> fList = new ArrayList<String>();
|
||||
while (true) {
|
||||
Map<String, Object> f = fileListEntityProcessor.nextRow();
|
||||
if (f == null)
|
||||
break;
|
||||
fList.add((String) f.get(FileListEntityProcessor.ABSOLUTE_FILE));
|
||||
}
|
||||
System.out.println("List of files when given OLDER_THAN -- " + fList);
|
||||
Assert.assertEquals(2, fList.size());
|
||||
attrs = AbstractDataImportHandlerTest.createMap(
|
||||
FileListEntityProcessor.FILE_NAME, "xml$",
|
||||
FileListEntityProcessor.BASE_DIR, tmpdir.getAbsolutePath(),
|
||||
FileListEntityProcessor.NEWER_THAN, "'NOW-2HOURS'");
|
||||
c = AbstractDataImportHandlerTest.getContext(null,
|
||||
new VariableResolverImpl(), null, 0, Collections.EMPTY_LIST, attrs);
|
||||
fileListEntityProcessor.init(c);
|
||||
fList.clear();
|
||||
while (true) {
|
||||
Map<String, Object> f = fileListEntityProcessor.nextRow();
|
||||
if (f == null)
|
||||
break;
|
||||
fList.add((String) f.get(FileListEntityProcessor.ABSOLUTE_FILE));
|
||||
}
|
||||
System.out.println("List of files when given NEWER_THAN -- " + fList);
|
||||
Assert.assertEquals(2, fList.size());
|
||||
}
|
||||
|
||||
public static File createFile(File tmpdir, String name, byte[] content,
|
||||
boolean changeModifiedTime) throws IOException {
|
||||
File file = new File(tmpdir.getAbsolutePath() + File.separator + name);
|
||||
file.deleteOnExit();
|
||||
FileOutputStream f = new FileOutputStream(file);
|
||||
f.write(content);
|
||||
f.close();
|
||||
// System.out.println("before "+file.lastModified());
|
||||
if (changeModifiedTime)
|
||||
file.setLastModified(System.currentTimeMillis() - 3600000);
|
||||
// System.out.println("after "+file.lastModified());
|
||||
return file;
|
||||
}
|
||||
}
|
|
@ -0,0 +1,78 @@
|
|||
/**
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
package org.apache.solr.handler.dataimport;
|
||||
|
||||
import org.junit.Assert;
|
||||
import org.junit.Ignore;
|
||||
import org.junit.Test;
|
||||
|
||||
import java.util.*;
|
||||
|
||||
/**
|
||||
* <p>
|
||||
* Test for JdbcDataSource
|
||||
* </p>
|
||||
* <p/>
|
||||
* <p>
|
||||
* Note: The tests are ignored for the lack of DB support for testing
|
||||
* </p>
|
||||
*
|
||||
* @version $Id$
|
||||
* @since solr 1.3
|
||||
*/
|
||||
public class TestJdbcDataSource {
|
||||
|
||||
@Test
|
||||
@Ignore
|
||||
public void basic() throws Exception {
|
||||
JdbcDataSource dataSource = new JdbcDataSource();
|
||||
Properties p = new Properties();
|
||||
p.put("driver", "com.mysql.jdbc.Driver");
|
||||
p.put("url", "jdbc:mysql://localhost/autos");
|
||||
p.put("user", "root");
|
||||
p.put("password", "");
|
||||
|
||||
List<Map<String, String>> flds = new ArrayList<Map<String, String>>();
|
||||
Map<String, String> f = new HashMap<String, String>();
|
||||
f.put("column", "trim_id");
|
||||
f.put("type", "long");
|
||||
flds.add(f);
|
||||
f = new HashMap<String, String>();
|
||||
f.put("column", "msrp");
|
||||
f.put("type", "float");
|
||||
flds.add(f);
|
||||
|
||||
Context c = AbstractDataImportHandlerTest.getContext(null, null,
|
||||
dataSource, 0, flds, null);
|
||||
dataSource.init(c, p);
|
||||
Iterator<Map<String, Object>> i = dataSource
|
||||
.getData("select make,model,year,msrp,trim_id from atrimlisting where make='Acura'");
|
||||
int count = 0;
|
||||
Object msrp = null;
|
||||
Object trim_id = null;
|
||||
while (i.hasNext()) {
|
||||
Map<String, Object> map = i.next();
|
||||
msrp = map.get("msrp");
|
||||
trim_id = map.get("trim_id");
|
||||
count++;
|
||||
}
|
||||
Assert.assertEquals(5, count);
|
||||
Assert.assertEquals(Float.class, msrp.getClass());
|
||||
Assert.assertEquals(Long.class, trim_id.getClass());
|
||||
}
|
||||
|
||||
}
|
|
@ -0,0 +1,78 @@
|
|||
/**
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
package org.apache.solr.handler.dataimport;
|
||||
|
||||
import org.junit.Assert;
|
||||
import org.junit.Test;
|
||||
|
||||
import java.util.ArrayList;
|
||||
import java.util.List;
|
||||
import java.util.Map;
|
||||
|
||||
/**
|
||||
* <p>
|
||||
* Test for NumberFormatTransformer
|
||||
* </p>
|
||||
*
|
||||
* @version $Id$
|
||||
* @since solr 1.3
|
||||
*/
|
||||
public class TestNumberFormatTransformer {
|
||||
@Test
|
||||
@SuppressWarnings("unchecked")
|
||||
public void testTransformRow_SingleNumber() {
|
||||
List l = new ArrayList();
|
||||
l.add(AbstractDataImportHandlerTest.createMap("column", "num",
|
||||
NumberFormatTransformer.FORMAT_STYLE, NumberFormatTransformer.NUMBER));
|
||||
Context c = AbstractDataImportHandlerTest.getContext(null, null, null, 0,
|
||||
l, null);
|
||||
Map m = AbstractDataImportHandlerTest.createMap("num", "123,567");
|
||||
new NumberFormatTransformer().transformRow(m, c);
|
||||
Assert.assertEquals(new Long(123567), m.get("num"));
|
||||
}
|
||||
|
||||
@Test
|
||||
@SuppressWarnings("unchecked")
|
||||
public void testTransformRow_MultipleNumbers() throws Exception {
|
||||
List fields = new ArrayList();
|
||||
fields.add(AbstractDataImportHandlerTest.createMap(DataImporter.COLUMN,
|
||||
"inputs"));
|
||||
fields.add(AbstractDataImportHandlerTest.createMap(DataImporter.COLUMN,
|
||||
"outputs", RegexTransformer.SRC_COL_NAME, "inputs",
|
||||
NumberFormatTransformer.FORMAT_STYLE, NumberFormatTransformer.NUMBER));
|
||||
|
||||
List inputs = new ArrayList();
|
||||
inputs.add("123,567");
|
||||
inputs.add("245,678");
|
||||
Map row = AbstractDataImportHandlerTest.createMap("inputs", inputs);
|
||||
|
||||
VariableResolverImpl resolver = new VariableResolverImpl();
|
||||
resolver.addNamespace("e", row);
|
||||
|
||||
Context context = AbstractDataImportHandlerTest.getContext(null, resolver,
|
||||
null, 0, fields, null);
|
||||
new NumberFormatTransformer().transformRow(row, context);
|
||||
|
||||
List output = new ArrayList();
|
||||
output.add(new Long(123567));
|
||||
output.add(new Long(245678));
|
||||
Map outputRow = AbstractDataImportHandlerTest.createMap("inputs", inputs,
|
||||
"outputs", output);
|
||||
|
||||
Assert.assertEquals(outputRow, row);
|
||||
}
|
||||
}
|
|
@ -0,0 +1,112 @@
|
|||
/**
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
package org.apache.solr.handler.dataimport;
|
||||
|
||||
import org.junit.Assert;
|
||||
import org.junit.Test;
|
||||
|
||||
import java.util.ArrayList;
|
||||
import java.util.HashMap;
|
||||
import java.util.List;
|
||||
import java.util.Map;
|
||||
|
||||
/**
|
||||
* <p>
|
||||
* Test for RegexTransformer
|
||||
* </p>
|
||||
*
|
||||
* @version $Id$
|
||||
* @since solr 1.3
|
||||
*/
|
||||
public class TestRegexTransformer {
|
||||
|
||||
@Test
|
||||
public void commaSeparated() {
|
||||
List<Map<String, String>> fields = new ArrayList<Map<String, String>>();
|
||||
fields.add(getField("col1", "string", null, "a", ","));
|
||||
Context context = AbstractDataImportHandlerTest.getContext(null, null,
|
||||
null, 0, fields, null);
|
||||
Map<String, Object> src = new HashMap<String, Object>();
|
||||
String s = "a,bb,cc,d";
|
||||
src.put("a", s);
|
||||
Map<String, Object> result = new RegexTransformer().transformRow(src,
|
||||
context);
|
||||
Assert.assertEquals(2, result.size());
|
||||
Assert.assertEquals(4, ((List) result.get("col1")).size());
|
||||
}
|
||||
|
||||
@Test
|
||||
public void replaceWith() {
|
||||
List<Map<String, String>> fields = new ArrayList<Map<String, String>>();
|
||||
Map<String, String> fld = getField("name", "string", "'", null, null);
|
||||
fld.put("replaceWith", "''");
|
||||
fields.add(fld);
|
||||
Context context = AbstractDataImportHandlerTest.getContext(null, null,
|
||||
null, 0, fields, null);
|
||||
Map<String, Object> src = new HashMap<String, Object>();
|
||||
String s = "D'souza";
|
||||
src.put("name", s);
|
||||
Map<String, Object> result = new RegexTransformer().transformRow(src,
|
||||
context);
|
||||
Assert.assertEquals("D''souza", result.get("name"));
|
||||
}
|
||||
|
||||
@Test
|
||||
public void mileage() {
|
||||
Context context = AbstractDataImportHandlerTest.getContext(null, null,
|
||||
null, 0, getFields(), null);
|
||||
|
||||
Map<String, Object> src = new HashMap<String, Object>();
|
||||
String s = "Fuel Economy Range: 26 mpg Hwy, 19 mpg City";
|
||||
src.put("rowdata", s);
|
||||
Map<String, Object> result = new RegexTransformer().transformRow(src,
|
||||
context);
|
||||
Assert.assertEquals(3, result.size());
|
||||
Assert.assertEquals(s, result.get("rowdata"));
|
||||
Assert.assertEquals("26", result.get("highway_mileage"));
|
||||
Assert.assertEquals("19", result.get("city_mileage"));
|
||||
|
||||
}
|
||||
|
||||
public static List<Map<String, String>> getFields() {
|
||||
List<Map<String, String>> fields = new ArrayList<Map<String, String>>();
|
||||
fields.add(getField("city_mileage", "sint",
|
||||
"Fuel Economy Range:\\s*?\\d*?\\s*?mpg Hwy,\\s*?(\\d*?)\\s*?mpg City",
|
||||
"rowdata", null));
|
||||
fields.add(getField("highway_mileage", "sint",
|
||||
"Fuel Economy Range:\\s*?(\\d*?)\\s*?mpg Hwy,\\s*?\\d*?\\s*?mpg City",
|
||||
"rowdata", null));
|
||||
fields.add(getField("seating_capacity", "sint", "Seating capacity:(.*)",
|
||||
"rowdata", null));
|
||||
fields
|
||||
.add(getField("warranty", "string", "Warranty:(.*)", "rowdata", null));
|
||||
fields.add(getField("rowdata", "string", null, "rowdata", null));
|
||||
return fields;
|
||||
|
||||
}
|
||||
|
||||
public static Map<String, String> getField(String col, String type,
|
||||
String re, String srcCol, String splitBy) {
|
||||
HashMap<String, String> vals = new HashMap<String, String>();
|
||||
vals.put("column", col);
|
||||
vals.put("type", type);
|
||||
vals.put("regex", re);
|
||||
vals.put("sourceColName", srcCol);
|
||||
vals.put("splitBy", splitBy);
|
||||
return vals;
|
||||
}
|
||||
}
|
|
@ -0,0 +1,146 @@
|
|||
/**
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
package org.apache.solr.handler.dataimport;
|
||||
|
||||
import org.junit.Assert;
|
||||
import org.junit.Ignore;
|
||||
import org.junit.Test;
|
||||
import org.w3c.dom.Document;
|
||||
import org.w3c.dom.Element;
|
||||
import org.xml.sax.InputSource;
|
||||
|
||||
import javax.xml.parsers.DocumentBuilder;
|
||||
import javax.xml.parsers.DocumentBuilderFactory;
|
||||
import java.io.StringReader;
|
||||
import java.util.ArrayList;
|
||||
import java.util.HashMap;
|
||||
import java.util.List;
|
||||
import java.util.Map;
|
||||
|
||||
/**
|
||||
* <p>
|
||||
* Test for ScriptTransformer
|
||||
* </p>
|
||||
* <p/>
|
||||
* All tests in this have been ignored because script support is only available
|
||||
* in Java 1.6+
|
||||
*
|
||||
* @version $Id$
|
||||
* @since solr 1.3
|
||||
*/
|
||||
public class TestScriptTransformer {
|
||||
|
||||
@Test
|
||||
@Ignore
|
||||
public void basic() {
|
||||
String script = "function f1(row,context){"
|
||||
+ "row.put('name','Hello ' + row.get('name'));" + "return row;\n" + "}";
|
||||
Context context = getContext("f1", script);
|
||||
Map<String, Object> map = new HashMap<String, Object>();
|
||||
map.put("name", "Scott");
|
||||
SqlEntityProcessor sep = new SqlEntityProcessor();
|
||||
sep.init(context);
|
||||
sep.applyTransformer(map);
|
||||
Assert.assertEquals(map.get("name"), "Hello Scott");
|
||||
|
||||
}
|
||||
|
||||
private Context getContext(String funcName, String script) {
|
||||
List<Map<String, String>> fields = new ArrayList<Map<String, String>>();
|
||||
Map<String, String> entity = new HashMap<String, String>();
|
||||
entity.put("name", "hello");
|
||||
entity.put("transformer", "script:" + funcName);
|
||||
Map<String, Object> dataImporterNs = new HashMap<String, Object>();
|
||||
dataImporterNs.put(DataConfig.SCRIPT_LANG, "JavaScript");
|
||||
dataImporterNs.put(DataConfig.SCRIPT, script);
|
||||
VariableResolverImpl vr = new VariableResolverImpl();
|
||||
vr.addNamespace(DataConfig.IMPORTER_NS, dataImporterNs);
|
||||
|
||||
Context context = AbstractDataImportHandlerTest.getContext(null, vr, null,
|
||||
0, fields, entity);
|
||||
return context;
|
||||
}
|
||||
|
||||
@Test
|
||||
@Ignore
|
||||
public void oneparam() {
|
||||
|
||||
String script = "function f1(row){"
|
||||
+ "row.put('name','Hello ' + row.get('name'));" + "return row;\n" + "}";
|
||||
|
||||
Context context = getContext("f1", script);
|
||||
Map<String, Object> map = new HashMap<String, Object>();
|
||||
map.put("name", "Scott");
|
||||
SqlEntityProcessor sep = new SqlEntityProcessor();
|
||||
sep.init(context);
|
||||
sep.applyTransformer(map);
|
||||
Assert.assertEquals(map.get("name"), "Hello Scott");
|
||||
|
||||
}
|
||||
|
||||
@Test
|
||||
@Ignore
|
||||
public void readScriptTag() throws Exception {
|
||||
DocumentBuilder builder = DocumentBuilderFactory.newInstance()
|
||||
.newDocumentBuilder();
|
||||
Document document = builder.parse(new InputSource(new StringReader(xml)));
|
||||
DataConfig config = new DataConfig();
|
||||
config.readFromXml((Element) document.getElementsByTagName("dataConfig")
|
||||
.item(0));
|
||||
Assert.assertTrue(config.script.script.indexOf("checkNextToken") > -1);
|
||||
}
|
||||
|
||||
@Test
|
||||
@Ignore
|
||||
public void checkScript() throws Exception {
|
||||
DocumentBuilder builder = DocumentBuilderFactory.newInstance()
|
||||
.newDocumentBuilder();
|
||||
Document document = builder.parse(new InputSource(new StringReader(xml)));
|
||||
DataConfig config = new DataConfig();
|
||||
config.readFromXml((Element) document.getElementsByTagName("dataConfig")
|
||||
.item(0));
|
||||
|
||||
Context c = getContext("checkNextToken", config.script.script);
|
||||
|
||||
Map map = new HashMap();
|
||||
map.put("nextToken", "hello");
|
||||
SqlEntityProcessor sep = new SqlEntityProcessor();
|
||||
sep.init(c);
|
||||
sep.applyTransformer(map);
|
||||
Assert.assertEquals("true", map.get("$hasMore"));
|
||||
map = new HashMap();
|
||||
map.put("nextToken", "");
|
||||
sep.applyTransformer(map);
|
||||
Assert.assertNull(map.get("$hasMore"));
|
||||
|
||||
}
|
||||
|
||||
static String xml = "<dataConfig>\n"
|
||||
+ "<script><![CDATA[\n"
|
||||
+ "function checkNextToken(row)\t{\n"
|
||||
+ " var nt = row.get('nextToken');"
|
||||
+ " if (nt && nt !='' ){ "
|
||||
+ " row.put('$hasMore', 'true');}\n"
|
||||
+ " return row;\n"
|
||||
+ "}]]></script>\t<document>\n"
|
||||
+ "\t\t<entity name=\"mbx\" pk=\"articleNumber\" processor=\"XPathEntityProcessor\"\n"
|
||||
+ "\t\t\turl=\"?boardId=${dataimporter.defaults.boardId}&maxRecords=20&includeBody=true&startDate=${dataimporter.defaults.startDate}&guid=:autosearch001&reqId=1&transactionId=stringfortracing&listPos=${mbx.nextToken}\"\n"
|
||||
+ "\t\t\tforEach=\"/mbmessage/articles/navigation | /mbmessage/articles/article\" transformer=\"script:checkNextToken\">\n"
|
||||
+ "\n" + "\t\t\t<field column=\"nextToken\"\n"
|
||||
+ "\t\t\t\txpath=\"/mbmessage/articles/navigation/nextToken\" />\n"
|
||||
+ "\n" + "\t\t</entity>\n" + "\t</document>\n" + "</dataConfig>";
|
||||
}
|
|
@ -0,0 +1,179 @@
|
|||
/**
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
package org.apache.solr.handler.dataimport;
|
||||
|
||||
import org.junit.Assert;
|
||||
import org.junit.Test;
|
||||
|
||||
import java.util.*;
|
||||
|
||||
/**
|
||||
* <p>
|
||||
* Test for SqlEntityProcessor
|
||||
* </p>
|
||||
*
|
||||
* @version $Id$
|
||||
* @since solr 1.3
|
||||
*/
|
||||
public class TestSqlEntityProcessor {
|
||||
private static ThreadLocal<Integer> local = new ThreadLocal<Integer>();
|
||||
|
||||
@Test
|
||||
public void singleBatch() {
|
||||
SqlEntityProcessor sep = new SqlEntityProcessor();
|
||||
List<Map<String, Object>> rows = getRows(3);
|
||||
VariableResolverImpl vr = new VariableResolverImpl();
|
||||
HashMap<String, String> ea = new HashMap<String, String>();
|
||||
ea.put("query", "SELECT * FROM A");
|
||||
Context c = AbstractDataImportHandlerTest.getContext(null, vr, getDs(rows),
|
||||
Context.FULL_DUMP, null, ea);
|
||||
sep.init(c);
|
||||
int count = 0;
|
||||
while (true) {
|
||||
Map<String, Object> r = sep.nextRow();
|
||||
if (r == null)
|
||||
break;
|
||||
count++;
|
||||
}
|
||||
|
||||
Assert.assertEquals(3, count);
|
||||
}
|
||||
|
||||
@Test
|
||||
public void tranformer() {
|
||||
SqlEntityProcessor sep = new SqlEntityProcessor();
|
||||
List<Map<String, Object>> rows = getRows(2);
|
||||
VariableResolverImpl vr = new VariableResolverImpl();
|
||||
HashMap<String, String> ea = new HashMap<String, String>();
|
||||
ea.put("query", "SELECT * FROM A");
|
||||
ea.put("transformer", T.class.getName());
|
||||
|
||||
sep.init(AbstractDataImportHandlerTest.getContext(null, vr, getDs(rows),
|
||||
Context.FULL_DUMP, null, ea));
|
||||
List<Map<String, Object>> rs = new ArrayList<Map<String, Object>>();
|
||||
Map<String, Object> r = null;
|
||||
while (true) {
|
||||
r = sep.nextRow();
|
||||
if (r == null)
|
||||
break;
|
||||
rs.add(r);
|
||||
|
||||
}
|
||||
Assert.assertEquals(2, rs.size());
|
||||
Assert.assertNotNull(rs.get(0).get("T"));
|
||||
}
|
||||
|
||||
@Test
|
||||
public void tranformerWithReflection() {
|
||||
SqlEntityProcessor sep = new SqlEntityProcessor();
|
||||
List<Map<String, Object>> rows = getRows(2);
|
||||
VariableResolverImpl vr = new VariableResolverImpl();
|
||||
HashMap<String, String> ea = new HashMap<String, String>();
|
||||
ea.put("query", "SELECT * FROM A");
|
||||
ea.put("transformer", T3.class.getName());
|
||||
|
||||
sep.init(AbstractDataImportHandlerTest.getContext(null, vr, getDs(rows),
|
||||
Context.FULL_DUMP, null, ea));
|
||||
List<Map<String, Object>> rs = new ArrayList<Map<String, Object>>();
|
||||
Map<String, Object> r = null;
|
||||
while (true) {
|
||||
r = sep.nextRow();
|
||||
if (r == null)
|
||||
break;
|
||||
rs.add(r);
|
||||
|
||||
}
|
||||
Assert.assertEquals(2, rs.size());
|
||||
Assert.assertNotNull(rs.get(0).get("T3"));
|
||||
}
|
||||
|
||||
@Test
|
||||
public void tranformerList() {
|
||||
SqlEntityProcessor sep = new SqlEntityProcessor();
|
||||
List<Map<String, Object>> rows = getRows(2);
|
||||
VariableResolverImpl vr = new VariableResolverImpl();
|
||||
|
||||
HashMap<String, String> ea = new HashMap<String, String>();
|
||||
ea.put("query", "SELECT * FROM A");
|
||||
ea.put("transformer", T2.class.getName());
|
||||
sep.init(AbstractDataImportHandlerTest.getContext(null, vr, getDs(rows),
|
||||
Context.FULL_DUMP, null, ea));
|
||||
|
||||
local.set(0);
|
||||
Map<String, Object> r = null;
|
||||
int count = 0;
|
||||
while (true) {
|
||||
r = sep.nextRow();
|
||||
if (r == null)
|
||||
break;
|
||||
count++;
|
||||
}
|
||||
Assert.assertEquals(2, local.get());
|
||||
Assert.assertEquals(4, count);
|
||||
}
|
||||
|
||||
private List<Map<String, Object>> getRows(int count) {
|
||||
List<Map<String, Object>> rows = new ArrayList<Map<String, Object>>();
|
||||
for (int i = 0; i < count; i++) {
|
||||
Map<String, Object> row = new HashMap<String, Object>();
|
||||
row.put("id", i);
|
||||
row.put("value", "The value is " + i);
|
||||
rows.add(row);
|
||||
}
|
||||
return rows;
|
||||
}
|
||||
|
||||
private static DataSource<Iterator<Map<String, Object>>> getDs(
|
||||
final List<Map<String, Object>> rows) {
|
||||
return new DataSource<Iterator<Map<String, Object>>>() {
|
||||
public Iterator<Map<String, Object>> getData(String query) {
|
||||
return rows.iterator();
|
||||
}
|
||||
|
||||
public void init(Context context, Properties initProps) {
|
||||
}
|
||||
|
||||
public void close() {
|
||||
}
|
||||
};
|
||||
}
|
||||
|
||||
public static class T extends Transformer {
|
||||
public Object transformRow(Map<String, Object> aRow, Context context) {
|
||||
aRow.put("T", "Class T");
|
||||
return aRow;
|
||||
}
|
||||
}
|
||||
|
||||
public static class T3 {
|
||||
public Object transformRow(Map<String, Object> aRow) {
|
||||
aRow.put("T3", "T3 class");
|
||||
return aRow;
|
||||
}
|
||||
}
|
||||
|
||||
public static class T2 extends Transformer {
|
||||
public Object transformRow(Map<String, Object> aRow, Context context) {
|
||||
Integer count = local.get();
|
||||
local.set(count + 1);
|
||||
List<Map<String, Object>> l = new ArrayList<Map<String, Object>>();
|
||||
l.add(aRow);
|
||||
l.add(aRow);
|
||||
return l;
|
||||
}
|
||||
}
|
||||
}
|
|
@ -0,0 +1,105 @@
|
|||
/**
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
package org.apache.solr.handler.dataimport;
|
||||
|
||||
import org.junit.Test;
|
||||
|
||||
import java.util.ArrayList;
|
||||
import java.util.List;
|
||||
|
||||
/**
|
||||
* <p>
|
||||
* Test for SqlEntityProcessor which checks full and delta imports using the
|
||||
* test harness
|
||||
* </p>
|
||||
*
|
||||
* @version $Id$
|
||||
* @since solr 1.3
|
||||
*/
|
||||
public class TestSqlEntityProcessor2 extends AbstractDataImportHandlerTest {
|
||||
@Override
|
||||
public String getSchemaFile() {
|
||||
return "dataimport-schema.xml";
|
||||
}
|
||||
|
||||
@Override
|
||||
public String getSolrConfigFile() {
|
||||
return "dataimport-solrconfig.xml";
|
||||
}
|
||||
|
||||
@Override
|
||||
public void setUp() throws Exception {
|
||||
super.setUp();
|
||||
}
|
||||
|
||||
@Override
|
||||
public void tearDown() throws Exception {
|
||||
super.tearDown();
|
||||
}
|
||||
|
||||
@Test
|
||||
@SuppressWarnings("unchecked")
|
||||
public void testCompositePk_FullImport() throws Exception {
|
||||
List parentRow = new ArrayList();
|
||||
parentRow.add(createMap("id", "1"));
|
||||
MockDataSource.setIterator("select * from x", parentRow.iterator());
|
||||
|
||||
List childRow = new ArrayList();
|
||||
childRow.add(createMap("desc", "hello"));
|
||||
|
||||
MockDataSource.setIterator("select * from y where y.A=1", childRow
|
||||
.iterator());
|
||||
|
||||
super.runFullImport(dataConfig);
|
||||
|
||||
assertQ(req("id:1"), "//*[@numFound='1']");
|
||||
assertQ(req("desc:hello"), "//*[@numFound='1']");
|
||||
}
|
||||
|
||||
@Test
|
||||
@SuppressWarnings("unchecked")
|
||||
public void testCompositePk_DeltaImport() throws Exception {
|
||||
List deltaRow = new ArrayList();
|
||||
deltaRow.add(createMap("id", "5"));
|
||||
MockDataSource.setIterator("select id from x where last_modified > NOW",
|
||||
deltaRow.iterator());
|
||||
|
||||
List parentRow = new ArrayList();
|
||||
parentRow.add(createMap("id", "5"));
|
||||
MockDataSource.setIterator("select * from x where x.id = '5'", parentRow
|
||||
.iterator());
|
||||
|
||||
List childRow = new ArrayList();
|
||||
childRow.add(createMap("desc", "hello"));
|
||||
MockDataSource.setIterator("select * from y where y.A=5", childRow
|
||||
.iterator());
|
||||
|
||||
super.runDeltaImport(dataConfig);
|
||||
|
||||
assertQ(req("id:5"), "//*[@numFound='1']");
|
||||
assertQ(req("desc:hello"), "//*[@numFound='1']");
|
||||
}
|
||||
|
||||
private static String dataConfig = "<dataConfig>\n"
|
||||
+ " <document>\n"
|
||||
+ " <entity name=\"x\" pk=\"x.id\" query=\"select * from x\" deltaQuery=\"select id from x where last_modified > NOW\">\n"
|
||||
+ " <field column=\"id\" />\n"
|
||||
+ " <entity name=\"y\" query=\"select * from y where y.A=${x.id}\">\n"
|
||||
+ " <field column=\"desc\" />\n"
|
||||
+ " </entity>\n" + " </entity>\n"
|
||||
+ " </document>\n" + "</dataConfig>\n";
|
||||
}
|
|
@ -0,0 +1,55 @@
|
|||
/**
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
package org.apache.solr.handler.dataimport;
|
||||
|
||||
import org.junit.Assert;
|
||||
import org.junit.Test;
|
||||
|
||||
import java.util.HashMap;
|
||||
import java.util.Map;
|
||||
import java.util.Properties;
|
||||
import java.util.regex.Pattern;
|
||||
|
||||
/**
|
||||
* <p>
|
||||
* Test for TemplateString
|
||||
* </p>
|
||||
*
|
||||
* @version $Id$
|
||||
* @since solr 1.3
|
||||
*/
|
||||
public class TestTemplateString {
|
||||
@Test
|
||||
public void testSimple() {
|
||||
VariableResolverImpl vri = new VariableResolverImpl();
|
||||
Map<String, Object> ns = new HashMap<String, Object>();
|
||||
ns.put("last_index_time", Long.valueOf(1199429363730l));
|
||||
vri.addNamespace("indexer", ns);
|
||||
Assert
|
||||
.assertEquals(
|
||||
"select id from subject where last_modified > 1199429363730",
|
||||
new TemplateString()
|
||||
.replaceTokens(
|
||||
"select id from subject where last_modified > ${indexer.last_index_time}",
|
||||
vri));
|
||||
}
|
||||
|
||||
private static Properties EMPTY_PROPS = new Properties();
|
||||
|
||||
private static Pattern SELECT_WHERE_PATTERN = Pattern.compile(
|
||||
"^\\s*(select\\b.*?\\b)(where).*", Pattern.CASE_INSENSITIVE);
|
||||
}
|
|
@ -0,0 +1,60 @@
|
|||
/**
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
package org.apache.solr.handler.dataimport;
|
||||
|
||||
import org.junit.Assert;
|
||||
import org.junit.Test;
|
||||
|
||||
import java.util.ArrayList;
|
||||
import java.util.List;
|
||||
import java.util.Map;
|
||||
|
||||
/**
|
||||
* <p>
|
||||
* Test for TemplateTransformer
|
||||
* </p>
|
||||
*
|
||||
* @version $Id$
|
||||
* @since solr 1.3
|
||||
*/
|
||||
public class TestTemplateTransformer {
|
||||
|
||||
@Test
|
||||
@SuppressWarnings("unchecked")
|
||||
public void testTransformRow() {
|
||||
List fields = new ArrayList();
|
||||
fields.add(AbstractDataImportHandlerTest.createMap("column", "firstName"));
|
||||
fields.add(AbstractDataImportHandlerTest.createMap("column", "lastName"));
|
||||
fields.add(AbstractDataImportHandlerTest.createMap("column", "middleName"));
|
||||
fields.add(AbstractDataImportHandlerTest.createMap("column", "name",
|
||||
TemplateTransformer.TEMPLATE,
|
||||
"${e.lastName}, ${e.firstName} ${e.middleName}"));
|
||||
|
||||
Map row = AbstractDataImportHandlerTest.createMap("firstName", "Shalin",
|
||||
"middleName", "Shekhar", "lastName", "Mangar");
|
||||
|
||||
VariableResolverImpl resolver = new VariableResolverImpl();
|
||||
Map<String, String> entityAttrs = AbstractDataImportHandlerTest.createMap(
|
||||
"name", "e");
|
||||
|
||||
Context context = AbstractDataImportHandlerTest.getContext(null, resolver,
|
||||
null, 0, fields, entityAttrs);
|
||||
new TemplateTransformer().transformRow(row, context);
|
||||
Assert.assertEquals("Mangar, Shalin Shekhar", row.get("name"));
|
||||
}
|
||||
|
||||
}
|
|
@ -0,0 +1,139 @@
|
|||
/**
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
package org.apache.solr.handler.dataimport;
|
||||
|
||||
import org.junit.Assert;
|
||||
import org.junit.Test;
|
||||
|
||||
import java.text.SimpleDateFormat;
|
||||
import java.util.Date;
|
||||
import java.util.HashMap;
|
||||
import java.util.Map;
|
||||
|
||||
/**
|
||||
* <p>
|
||||
* Test for VariableResolver
|
||||
* </p>
|
||||
*
|
||||
* @version $Id$
|
||||
* @since solr 1.3
|
||||
*/
|
||||
public class TestVariableResolver {
|
||||
|
||||
@Test
|
||||
public void testSimpleNamespace() {
|
||||
VariableResolverImpl vri = new VariableResolverImpl();
|
||||
Map<String, Object> ns = new HashMap<String, Object>();
|
||||
ns.put("world", "WORLD");
|
||||
vri.addNamespace("hello", ns);
|
||||
Assert.assertEquals("WORLD", vri.resolve("hello.world"));
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testNestedNamespace() {
|
||||
VariableResolverImpl vri = new VariableResolverImpl();
|
||||
Map<String, Object> ns = new HashMap<String, Object>();
|
||||
ns.put("world", "WORLD");
|
||||
vri.addNamespace("hello", ns);
|
||||
ns = new HashMap<String, Object>();
|
||||
ns.put("world1", "WORLD1");
|
||||
vri.addNamespace("hello.my", ns);
|
||||
Assert.assertEquals("WORLD1", vri.resolve("hello.my.world1"));
|
||||
}
|
||||
|
||||
@Test
|
||||
public void test3LevelNestedNamespace() {
|
||||
VariableResolverImpl vri = new VariableResolverImpl();
|
||||
Map<String, Object> ns = new HashMap<String, Object>();
|
||||
ns.put("world", "WORLD");
|
||||
vri.addNamespace("hello", ns);
|
||||
ns = new HashMap<String, Object>();
|
||||
ns.put("world1", "WORLD1");
|
||||
vri.addNamespace("hello.my.new", ns);
|
||||
Assert.assertEquals("WORLD1", vri.resolve("hello.my.new.world1"));
|
||||
}
|
||||
|
||||
@Test
|
||||
public void dateNamespaceWithValue() {
|
||||
VariableResolverImpl vri = new VariableResolverImpl();
|
||||
HashMap<String, Evaluator> evaluators = new HashMap<String, Evaluator>();
|
||||
evaluators.put("formatDate", EvaluatorBag.getDateFormatEvaluator());
|
||||
vri.addNamespace("dataimporter.functions", EvaluatorBag
|
||||
.getFunctionsNamespace(vri, evaluators));
|
||||
Map<String, Object> ns = new HashMap<String, Object>();
|
||||
Date d = new Date();
|
||||
ns.put("dt", d);
|
||||
vri.addNamespace("A", ns);
|
||||
Assert
|
||||
.assertEquals(
|
||||
new SimpleDateFormat("yyyy-MM-dd HH:mm:ss").format(d),
|
||||
vri
|
||||
.replaceTokens("${dataimporter.functions.formatDate(A.dt,yyyy-MM-dd HH:mm:ss)}"));
|
||||
}
|
||||
|
||||
@Test
|
||||
public void dateNamespaceWithExpr() {
|
||||
VariableResolverImpl vri = new VariableResolverImpl();
|
||||
HashMap<String, Evaluator> evaluators = new HashMap<String, Evaluator>();
|
||||
evaluators.put("formatDate", EvaluatorBag.getDateFormatEvaluator());
|
||||
vri.addNamespace("dataimporter.functions", EvaluatorBag
|
||||
.getFunctionsNamespace(vri, evaluators));
|
||||
String s = vri
|
||||
.replaceTokens("${dataimporter.functions.formatDate('NOW',yyyy-MM-dd HH:mm)}");
|
||||
Assert.assertEquals(new SimpleDateFormat("yyyy-MM-dd HH:mm")
|
||||
.format(new Date()), s);
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testDefaultNamespace() {
|
||||
VariableResolverImpl vri = new VariableResolverImpl();
|
||||
Map<String, Object> ns = new HashMap<String, Object>();
|
||||
ns.put("world", "WORLD");
|
||||
vri.addNamespace(null, ns);
|
||||
Assert.assertEquals("WORLD", vri.resolve("world"));
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testDefaultNamespace1() {
|
||||
VariableResolverImpl vri = new VariableResolverImpl();
|
||||
Map<String, Object> ns = new HashMap<String, Object>();
|
||||
ns.put("world", "WORLD");
|
||||
vri.addNamespace(null, ns);
|
||||
Assert.assertEquals("WORLD", vri.resolve("world"));
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testFunctionNamespace1() {
|
||||
final VariableResolverImpl resolver = new VariableResolverImpl();
|
||||
final Map<String, Evaluator> evaluators = new HashMap<String, Evaluator>();
|
||||
evaluators.put("formatDate", EvaluatorBag.getDateFormatEvaluator());
|
||||
evaluators.put("test", new Evaluator() {
|
||||
public String evaluate(VariableResolver resolver, String expression) {
|
||||
return "Hello World";
|
||||
}
|
||||
});
|
||||
|
||||
resolver.addNamespace("dataimporter.functions", EvaluatorBag
|
||||
.getFunctionsNamespace(resolver, evaluators));
|
||||
String s = resolver
|
||||
.replaceTokens("${dataimporter.functions.formatDate('NOW',yyyy-MM-dd HH:mm)}");
|
||||
Assert.assertEquals(new SimpleDateFormat("yyyy-MM-dd HH:mm")
|
||||
.format(new Date()), s);
|
||||
Assert.assertEquals("Hello World", resolver
|
||||
.replaceTokens("${dataimporter.functions.test('TEST')}"));
|
||||
}
|
||||
}
|
|
@ -0,0 +1,160 @@
|
|||
/**
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
package org.apache.solr.handler.dataimport;
|
||||
|
||||
import static org.apache.solr.handler.dataimport.AbstractDataImportHandlerTest.createMap;
|
||||
import org.junit.Assert;
|
||||
import org.junit.Test;
|
||||
|
||||
import java.io.File;
|
||||
import java.io.Reader;
|
||||
import java.io.StringReader;
|
||||
import java.util.ArrayList;
|
||||
import java.util.List;
|
||||
import java.util.Map;
|
||||
import java.util.Properties;
|
||||
|
||||
/**
|
||||
* <p>
|
||||
* Test for XPathEntityProcessor
|
||||
* </p>
|
||||
*
|
||||
* @version $Id$
|
||||
* @since solr 1.3
|
||||
*/
|
||||
public class TestXPathEntityProcessor {
|
||||
@Test
|
||||
public void withFieldsAndXpath() throws Exception {
|
||||
long time = System.currentTimeMillis();
|
||||
File tmpdir = new File("." + time);
|
||||
tmpdir.mkdir();
|
||||
tmpdir.deleteOnExit();
|
||||
TestFileListEntityProcessor.createFile(tmpdir, "x.xsl", xsl.getBytes(),
|
||||
false);
|
||||
Map entityAttrs = createMap("name", "e", "url", "cd.xml",
|
||||
XPathEntityProcessor.FOR_EACH, "/catalog/cd");
|
||||
List fields = new ArrayList();
|
||||
fields.add(createMap("column", "title", "xpath", "/catalog/cd/title"));
|
||||
fields.add(createMap("column", "artist", "xpath", "/catalog/cd/artist"));
|
||||
fields.add(createMap("column", "year", "xpath", "/catalog/cd/year"));
|
||||
Context c = AbstractDataImportHandlerTest.getContext(null,
|
||||
new VariableResolverImpl(), getds(), 0, fields, entityAttrs);
|
||||
XPathEntityProcessor xPathEntityProcessor = new XPathEntityProcessor();
|
||||
xPathEntityProcessor.init(c);
|
||||
List<Map<String, Object>> result = new ArrayList<Map<String, Object>>();
|
||||
while (true) {
|
||||
Map<String, Object> row = xPathEntityProcessor.nextRow();
|
||||
if (row == null)
|
||||
break;
|
||||
result.add(row);
|
||||
}
|
||||
Assert.assertEquals(3, result.size());
|
||||
Assert.assertEquals("Empire Burlesque", result.get(0).get("title"));
|
||||
Assert.assertEquals("Bonnie Tyler", result.get(1).get("artist"));
|
||||
Assert.assertEquals("1982", result.get(2).get("year"));
|
||||
}
|
||||
|
||||
@Test
|
||||
public void withDefaultSolrAndXsl() throws Exception {
|
||||
long time = System.currentTimeMillis();
|
||||
File tmpdir = new File("." + time);
|
||||
tmpdir.mkdir();
|
||||
tmpdir.deleteOnExit();
|
||||
TestFileListEntityProcessor.createFile(tmpdir, "x.xsl", xsl.getBytes(),
|
||||
false);
|
||||
Map entityAttrs = createMap("name", "e",
|
||||
XPathEntityProcessor.USE_SOLR_ADD_SCHEMA, "true", "xsl", ""
|
||||
+ new File(tmpdir, "x.xsl").getAbsolutePath(), "url", "cd.xml");
|
||||
Context c = AbstractDataImportHandlerTest.getContext(null,
|
||||
new VariableResolverImpl(), getds(), 0, null, entityAttrs);
|
||||
XPathEntityProcessor xPathEntityProcessor = new XPathEntityProcessor();
|
||||
xPathEntityProcessor.init(c);
|
||||
List<Map<String, Object>> result = new ArrayList<Map<String, Object>>();
|
||||
while (true) {
|
||||
Map<String, Object> row = xPathEntityProcessor.nextRow();
|
||||
if (row == null)
|
||||
break;
|
||||
result.add(row);
|
||||
}
|
||||
Assert.assertEquals(3, result.size());
|
||||
Assert.assertEquals("Empire Burlesque", result.get(0).get("title"));
|
||||
Assert.assertEquals("Bonnie Tyler", result.get(1).get("artist"));
|
||||
Assert.assertEquals("1982", result.get(2).get("year"));
|
||||
}
|
||||
|
||||
private DataSource<Reader> getds() {
|
||||
return new DataSource<Reader>() {
|
||||
|
||||
public void init(Context context, Properties initProps) {
|
||||
}
|
||||
|
||||
public void close() {
|
||||
}
|
||||
|
||||
public Reader getData(String query) {
|
||||
return new StringReader(cdData);
|
||||
}
|
||||
};
|
||||
}
|
||||
|
||||
private static final String xsl = "<?xml version=\"1.0\" encoding=\"UTF-8\"?>\n"
|
||||
+ "<xsl:stylesheet version=\"1.0\"\n"
|
||||
+ "xmlns:xsl=\"http://www.w3.org/1999/XSL/Transform\">\n"
|
||||
+ "<xsl:output version='1.0' method='xml' encoding='UTF-8' indent='yes'/>\n"
|
||||
+ "\n"
|
||||
+ "<xsl:template match=\"/\">\n"
|
||||
+ " <add> \n"
|
||||
+ " <xsl:for-each select=\"catalog/cd\">\n"
|
||||
+ " <doc>\n"
|
||||
+ " <field name=\"title\"><xsl:value-of select=\"title\"/></field>\n"
|
||||
+ " <field name=\"artist\"><xsl:value-of select=\"artist\"/></field>\n"
|
||||
+ " <field name=\"country\"><xsl:value-of select=\"country\"/></field>\n"
|
||||
+ " <field name=\"company\"><xsl:value-of select=\"company\"/></field> \n"
|
||||
+ " <field name=\"price\"><xsl:value-of select=\"price\"/></field>\n"
|
||||
+ " <field name=\"year\"><xsl:value-of select=\"year\"/></field> \n"
|
||||
+ " </doc>\n"
|
||||
+ " </xsl:for-each>\n"
|
||||
+ " </add> \n"
|
||||
+ "</xsl:template>\n" + "</xsl:stylesheet>";
|
||||
|
||||
private static final String cdData = "<?xml version=\"1.0\" encoding=\"UTF-8\"?>\n"
|
||||
+ "<?xml-stylesheet type=\"text/xsl\" href=\"solr.xsl\"?>\n"
|
||||
+ "<catalog>\n"
|
||||
+ "\t<cd>\n"
|
||||
+ "\t\t<title>Empire Burlesque</title>\n"
|
||||
+ "\t\t<artist>Bob Dylan</artist>\n"
|
||||
+ "\t\t<country>USA</country>\n"
|
||||
+ "\t\t<company>Columbia</company>\n"
|
||||
+ "\t\t<price>10.90</price>\n"
|
||||
+ "\t\t<year>1985</year>\n"
|
||||
+ "\t</cd>\n"
|
||||
+ "\t<cd>\n"
|
||||
+ "\t\t<title>Hide your heart</title>\n"
|
||||
+ "\t\t<artist>Bonnie Tyler</artist>\n"
|
||||
+ "\t\t<country>UK</country>\n"
|
||||
+ "\t\t<company>CBS Records</company>\n"
|
||||
+ "\t\t<price>9.90</price>\n"
|
||||
+ "\t\t<year>1988</year>\n"
|
||||
+ "\t</cd>\n"
|
||||
+ "\t<cd>\n"
|
||||
+ "\t\t<title>Greatest Hits</title>\n"
|
||||
+ "\t\t<artist>Dolly Parton</artist>\n"
|
||||
+ "\t\t<country>USA</country>\n"
|
||||
+ "\t\t<company>RCA</company>\n"
|
||||
+ "\t\t<price>9.90</price>\n"
|
||||
+ "\t\t<year>1982</year>\n" + "\t</cd>\n" + "</catalog>\t";
|
||||
}
|
|
@ -0,0 +1,220 @@
|
|||
/**
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed onT an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
package org.apache.solr.handler.dataimport;
|
||||
|
||||
import org.junit.Assert;
|
||||
import org.junit.Test;
|
||||
|
||||
import java.io.StringReader;
|
||||
import java.util.ArrayList;
|
||||
import java.util.List;
|
||||
import java.util.Map;
|
||||
|
||||
/**
|
||||
* <p>
|
||||
* Test for XPathRecordReader
|
||||
* </p>
|
||||
*
|
||||
* @version $Id$
|
||||
* @since solr 1.3
|
||||
*/
|
||||
public class TestXPathRecordReader {
|
||||
@Test
|
||||
public void basic() {
|
||||
String xml = "<root>\n" + " <b>\n" + " <c>Hello C1</c>\n"
|
||||
+ " <c>Hello C1</c>\n" + " </b>\n" + " <b>\n"
|
||||
+ " <c>Hello C2</c>\n" + " </b>\n" + "</root>";
|
||||
XPathRecordReader rr = new XPathRecordReader("/root/b");
|
||||
rr.addField("c", "/root/b/c", true);
|
||||
List<Map<String, Object>> l = rr.getAllRecords(new StringReader(xml));
|
||||
Assert.assertEquals(2, l.size());
|
||||
Assert.assertEquals(2, ((List) l.get(0).get("c")).size());
|
||||
Assert.assertEquals(1, ((List) l.get(1).get("c")).size());
|
||||
}
|
||||
|
||||
@Test
|
||||
public void attributes() {
|
||||
String xml = "<root>\n" + " <b a=\"x0\" b=\"y0\" />\n"
|
||||
+ " <b a=\"x1\" b=\"y1\" />\n" + " <b a=\"x2\" b=\"y2\" />\n"
|
||||
+ "</root>";
|
||||
XPathRecordReader rr = new XPathRecordReader("/root/b");
|
||||
rr.addField("a", "/root/b/@a", false);
|
||||
rr.addField("b", "/root/b/@b", false);
|
||||
List<Map<String, Object>> l = rr.getAllRecords(new StringReader(xml));
|
||||
Assert.assertEquals(3, l.size());
|
||||
Assert.assertEquals("x0", l.get(0).get("a"));
|
||||
Assert.assertEquals("y1", l.get(1).get("b"));
|
||||
}
|
||||
|
||||
@Test
|
||||
public void attributes2Level() {
|
||||
String xml = "<root>\n" + "<a>\n" + " <b a=\"x0\" b=\"y0\" />\n"
|
||||
+ " <b a=\"x1\" b=\"y1\" />\n" + " <b a=\"x2\" b=\"y2\" />\n"
|
||||
+ "</a>" + "</root>";
|
||||
XPathRecordReader rr = new XPathRecordReader("/root/a/b");
|
||||
rr.addField("a", "/root/a/b/@a", false);
|
||||
rr.addField("b", "/root/a/b/@b", false);
|
||||
List<Map<String, Object>> l = rr.getAllRecords(new StringReader(xml));
|
||||
Assert.assertEquals(3, l.size());
|
||||
Assert.assertEquals("x0", l.get(0).get("a"));
|
||||
Assert.assertEquals("y1", l.get(1).get("b"));
|
||||
}
|
||||
|
||||
@Test
|
||||
public void attributes2LevelHetero() {
|
||||
String xml = "<root>\n" + "<a>\n" + " <b a=\"x0\" b=\"y0\" />\n"
|
||||
+ " <b a=\"x1\" b=\"y1\" />\n" + " <b a=\"x2\" b=\"y2\" />\n"
|
||||
+ "</a>" + "<x>\n" + " <b a=\"x4\" b=\"y4\" />\n"
|
||||
+ " <b a=\"x5\" b=\"y5\" />\n" + " <b a=\"x6\" b=\"y6\" />\n"
|
||||
+ "</x>" + "</root>";
|
||||
XPathRecordReader rr = new XPathRecordReader("/root/a | /root/x");
|
||||
rr.addField("a", "/root/a/b/@a", false);
|
||||
rr.addField("b", "/root/a/b/@b", false);
|
||||
rr.addField("a", "/root/x/b/@a", false);
|
||||
rr.addField("b", "/root/x/b/@b", false);
|
||||
|
||||
final List<Map<String, Object>> a = new ArrayList<Map<String, Object>>();
|
||||
final List<Map<String, Object>> x = new ArrayList<Map<String, Object>>();
|
||||
rr.streamRecords(new StringReader(xml), new XPathRecordReader.Handler() {
|
||||
public void handle(Map<String, Object> record, String xpath) {
|
||||
if (record == null)
|
||||
return;
|
||||
if (xpath.equals("/root/a"))
|
||||
a.add(record);
|
||||
if (xpath.equals("/root/x"))
|
||||
x.add(record);
|
||||
}
|
||||
});
|
||||
|
||||
Assert.assertEquals(1, a.size());
|
||||
Assert.assertEquals(1, x.size());
|
||||
}
|
||||
|
||||
@Test
|
||||
public void attributes2LevelMissingAttrVal() {
|
||||
String xml = "<root>\n" + "<a>\n" + " <b a=\"x0\" b=\"y0\" />\n"
|
||||
+ " <b a=\"x1\" b=\"y1\" />\n" + "</a>" + "<a>\n"
|
||||
+ " <b a=\"x3\" />\n" + " <b b=\"y4\" />\n" + "</a>" + "</root>";
|
||||
XPathRecordReader rr = new XPathRecordReader("/root/a");
|
||||
rr.addField("a", "/root/a/b/@a", true);
|
||||
rr.addField("b", "/root/a/b/@b", true);
|
||||
List<Map<String, Object>> l = rr.getAllRecords(new StringReader(xml));
|
||||
Assert.assertEquals(2, l.size());
|
||||
Assert.assertNull(((List) l.get(1).get("a")).get(1));
|
||||
Assert.assertNull(((List) l.get(1).get("b")).get(0));
|
||||
}
|
||||
|
||||
@Test
|
||||
public void elems2LevelMissing() {
|
||||
String xml = "<root>\n" + "\t<a>\n" + "\t <b>\n" + "\t <x>x0</x>\n"
|
||||
+ "\t <y>y0</y>\n" + "\t </b>\n" + "\t <b>\n"
|
||||
+ "\t \t<x>x1</x>\n" + "\t \t<y>y1</y>\n" + "\t </b>\n"
|
||||
+ "\t</a>\n" + "\t<a>\n" + "\t <b>\n" + "\t <x>x3</x>\n"
|
||||
+ "\t </b>\n" + "\t <b>\n" + "\t \t<y>y4</y>\n" + "\t </b>\n"
|
||||
+ "\t</a>\n" + "</root>";
|
||||
XPathRecordReader rr = new XPathRecordReader("/root/a");
|
||||
rr.addField("a", "/root/a/b/x", true);
|
||||
rr.addField("b", "/root/a/b/y", true);
|
||||
List<Map<String, Object>> l = rr.getAllRecords(new StringReader(xml));
|
||||
Assert.assertEquals(2, l.size());
|
||||
Assert.assertNull(((List) l.get(1).get("a")).get(1));
|
||||
Assert.assertNull(((List) l.get(1).get("b")).get(0));
|
||||
}
|
||||
|
||||
@Test
|
||||
public void elems2LevelWithAttrib() {
|
||||
String xml = "<root>\n" + "\t<a>\n" + "\t <b k=\"x\">\n"
|
||||
+ "\t <x>x0</x>\n" + "\t <y>y0</y>\n" + "\t </b>\n"
|
||||
+ "\t <b k=\"y\">\n" + "\t \t<x>x1</x>\n" + "\t \t<y>y1</y>\n"
|
||||
+ "\t </b>\n" + "\t</a>\n" + "\t<a>\n" + "\t <b>\n"
|
||||
+ "\t <x>x3</x>\n" + "\t </b>\n" + "\t <b>\n"
|
||||
+ "\t \t<y>y4</y>\n" + "\t </b>\n" + "\t</a>\n" + "</root>";
|
||||
XPathRecordReader rr = new XPathRecordReader("/root/a");
|
||||
rr.addField("x", "/root/a/b[@k]/x", true);
|
||||
rr.addField("y", "/root/a/b[@k]/y", true);
|
||||
List<Map<String, Object>> l = rr.getAllRecords(new StringReader(xml));
|
||||
Assert.assertEquals(2, l.size());
|
||||
Assert.assertEquals(2, ((List) l.get(0).get("x")).size());
|
||||
Assert.assertEquals(2, ((List) l.get(0).get("y")).size());
|
||||
Assert.assertEquals(0, l.get(1).size());
|
||||
}
|
||||
|
||||
@Test
|
||||
public void elems2LevelWithAttribMultiple() {
|
||||
String xml = "<root>\n" + "\t<a>\n" + "\t <b k=\"x\" m=\"n\" >\n"
|
||||
+ "\t <x>x0</x>\n" + "\t <y>y0</y>\n" + "\t </b>\n"
|
||||
+ "\t <b k=\"y\" m=\"p\">\n" + "\t \t<x>x1</x>\n"
|
||||
+ "\t \t<y>y1</y>\n" + "\t </b>\n" + "\t</a>\n" + "\t<a>\n"
|
||||
+ "\t <b k=\"x\">\n" + "\t <x>x3</x>\n" + "\t </b>\n"
|
||||
+ "\t <b m=\"n\">\n" + "\t \t<y>y4</y>\n" + "\t </b>\n"
|
||||
+ "\t</a>\n" + "</root>";
|
||||
XPathRecordReader rr = new XPathRecordReader("/root/a");
|
||||
rr.addField("x", "/root/a/b[@k][@m='n']/x", true);
|
||||
rr.addField("y", "/root/a/b[@k][@m='n']/y", true);
|
||||
List<Map<String, Object>> l = rr.getAllRecords(new StringReader(xml));
|
||||
Assert.assertEquals(2, l.size());
|
||||
Assert.assertEquals(1, ((List) l.get(0).get("x")).size());
|
||||
Assert.assertEquals(1, ((List) l.get(0).get("y")).size());
|
||||
Assert.assertEquals(0, l.get(1).size());
|
||||
}
|
||||
|
||||
@Test
|
||||
public void elems2LevelWithAttribVal() {
|
||||
String xml = "<root>\n" + "\t<a>\n" + "\t <b k=\"x\">\n"
|
||||
+ "\t <x>x0</x>\n" + "\t <y>y0</y>\n" + "\t </b>\n"
|
||||
+ "\t <b k=\"y\">\n" + "\t \t<x>x1</x>\n" + "\t \t<y>y1</y>\n"
|
||||
+ "\t </b>\n" + "\t</a>\n" + "\t<a>\n" + "\t <b>\n"
|
||||
+ "\t <x>x3</x>\n" + "\t </b>\n" + "\t <b>\n"
|
||||
+ "\t \t<y>y4</y>\n" + "\t </b>\n" + "\t</a>\n" + "</root>";
|
||||
XPathRecordReader rr = new XPathRecordReader("/root/a");
|
||||
rr.addField("x", "/root/a/b[@k='x']/x", true);
|
||||
rr.addField("y", "/root/a/b[@k='x']/y", true);
|
||||
List<Map<String, Object>> l = rr.getAllRecords(new StringReader(xml));
|
||||
Assert.assertEquals(2, l.size());
|
||||
Assert.assertEquals(1, ((List) l.get(0).get("x")).size());
|
||||
Assert.assertEquals(1, ((List) l.get(0).get("y")).size());
|
||||
Assert.assertEquals(0, l.get(1).size());
|
||||
}
|
||||
|
||||
@Test
|
||||
public void another() {
|
||||
String xml = "<root>\n"
|
||||
+ " <contenido id=\"10097\" idioma=\"cat\">\n"
|
||||
+ " <antetitulo></antetitulo>\n" + " <titulo>\n"
|
||||
+ " This is my title\n" + " </titulo>\n"
|
||||
+ " <resumen>\n" + " This is my summary\n"
|
||||
+ " </resumen>\n" + " <texto>\n"
|
||||
+ " This is the body of my text\n" + " </texto>\n"
|
||||
+ " </contenido>\n" + "</root>";
|
||||
XPathRecordReader rr = new XPathRecordReader("/root/contenido");
|
||||
rr.addField("id", "/root/contenido/@id", false);
|
||||
rr.addField("title", "/root/contenido/titulo", false);
|
||||
rr.addField("resume", "/root/contenido/resumen", false);
|
||||
rr.addField("text", "/root/contenido/texto", false);
|
||||
|
||||
List<Map<String, Object>> l = rr.getAllRecords(new StringReader(xml));
|
||||
Assert.assertEquals(1, l.size());
|
||||
Map<String, Object> m = l.get(0);
|
||||
Assert.assertEquals("10097", m.get("id").toString().trim());
|
||||
Assert.assertEquals("This is my title", m.get("title").toString().trim());
|
||||
Assert
|
||||
.assertEquals("This is my summary", m.get("resume").toString().trim());
|
||||
Assert.assertEquals("This is the body of my text", m.get("text").toString()
|
||||
.trim());
|
||||
}
|
||||
|
||||
}
|
|
@ -0,0 +1,2 @@
|
|||
#Thu Jul 31 00:57:19 IST 2008
|
||||
last_index_time=2008-07-31 00\:57\:19
|
|
@ -0,0 +1,9 @@
|
|||
<dataConfig>
|
||||
<dataSource type="MockDataSource" />
|
||||
<document>
|
||||
<entity name="x" query="select * from x">
|
||||
<field column="id" />
|
||||
<field column="desc" />
|
||||
</entity>
|
||||
</document>
|
||||
</dataConfig>
|
|
@ -0,0 +1,404 @@
|
|||
<?xml version="1.0" encoding="UTF-8" ?>
|
||||
<!--
|
||||
Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
contributor license agreements. See the NOTICE file distributed with
|
||||
this work for additional information regarding copyright ownership.
|
||||
The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
(the "License"); you may not use this file except in compliance with
|
||||
the License. You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
-->
|
||||
|
||||
<config>
|
||||
<!-- Set this to 'false' if you want solr to continue working after it has
|
||||
encountered an severe configuration error. In a production environment,
|
||||
you may want solr to keep working even if one handler is mis-configured.
|
||||
|
||||
You may also set this to false using by setting the system property:
|
||||
-Dsolr.abortOnConfigurationError=false
|
||||
-->
|
||||
<abortOnConfigurationError>${solr.abortOnConfigurationError:true}</abortOnConfigurationError>
|
||||
|
||||
<!-- Used to specify an alternate directory to hold all index data
|
||||
other than the default ./data under the Solr home.
|
||||
If replication is in use, this should match the replication configuration. -->
|
||||
<dataDir>${solr.data.dir:./solr/data}</dataDir>
|
||||
|
||||
|
||||
<indexDefaults>
|
||||
<!-- Values here affect all index writers and act as a default unless overridden. -->
|
||||
<useCompoundFile>false</useCompoundFile>
|
||||
|
||||
<mergeFactor>10</mergeFactor>
|
||||
<!--
|
||||
If both ramBufferSizeMB and maxBufferedDocs is set, then Lucene will flush based on whichever limit is hit first.
|
||||
|
||||
-->
|
||||
<!--<maxBufferedDocs>1000</maxBufferedDocs>-->
|
||||
<!-- Tell Lucene when to flush documents to disk.
|
||||
Giving Lucene more memory for indexing means faster indexing at the cost of more RAM
|
||||
|
||||
If both ramBufferSizeMB and maxBufferedDocs is set, then Lucene will flush based on whichever limit is hit first.
|
||||
|
||||
-->
|
||||
<ramBufferSizeMB>32</ramBufferSizeMB>
|
||||
<maxMergeDocs>2147483647</maxMergeDocs>
|
||||
<maxFieldLength>10000</maxFieldLength>
|
||||
<writeLockTimeout>1000</writeLockTimeout>
|
||||
<commitLockTimeout>10000</commitLockTimeout>
|
||||
|
||||
<!--
|
||||
Expert: Turn on Lucene's auto commit capability.
|
||||
|
||||
TODO: Add recommendations on why you would want to do this.
|
||||
|
||||
NOTE: Despite the name, this value does not have any relation to Solr's autoCommit functionality
|
||||
|
||||
-->
|
||||
<!--<luceneAutoCommit>false</luceneAutoCommit>-->
|
||||
<!--
|
||||
Expert:
|
||||
The Merge Policy in Lucene controls how merging is handled by Lucene. The default in 2.3 is the LogByteSizeMergePolicy, previous
|
||||
versions used LogDocMergePolicy.
|
||||
|
||||
LogByteSizeMergePolicy chooses segments to merge based on their size. The Lucene 2.2 default, LogDocMergePolicy chose when
|
||||
to merge based on number of documents
|
||||
|
||||
Other implementations of MergePolicy must have a no-argument constructor
|
||||
-->
|
||||
<!--<mergePolicy>org.apache.lucene.index.LogByteSizeMergePolicy</mergePolicy>-->
|
||||
|
||||
<!--
|
||||
Expert:
|
||||
The Merge Scheduler in Lucene controls how merges are performed. The ConcurrentMergeScheduler (Lucene 2.3 default)
|
||||
can perform merges in the background using separate threads. The SerialMergeScheduler (Lucene 2.2 default) does not.
|
||||
-->
|
||||
<!--<mergeScheduler>org.apache.lucene.index.ConcurrentMergeScheduler</mergeScheduler>-->
|
||||
|
||||
<!--
|
||||
As long as Solr is the only process modifying your index, it is
|
||||
safe to use Lucene's in process locking mechanism. But you may
|
||||
specify one of the other Lucene LockFactory implementations in
|
||||
the event that you have a custom situation.
|
||||
|
||||
none = NoLockFactory (typically only used with read only indexes)
|
||||
single = SingleInstanceLockFactory (suggested)
|
||||
native = NativeFSLockFactory
|
||||
simple = SimpleFSLockFactory
|
||||
|
||||
('simple' is the default for backwards compatibility with Solr 1.2)
|
||||
-->
|
||||
<lockType>single</lockType>
|
||||
</indexDefaults>
|
||||
|
||||
<mainIndex>
|
||||
<!-- options specific to the main on-disk lucene index -->
|
||||
<useCompoundFile>false</useCompoundFile>
|
||||
<ramBufferSizeMB>32</ramBufferSizeMB>
|
||||
<mergeFactor>10</mergeFactor>
|
||||
<!-- Deprecated -->
|
||||
<!--<maxBufferedDocs>1000</maxBufferedDocs>-->
|
||||
<maxMergeDocs>2147483647</maxMergeDocs>
|
||||
<maxFieldLength>10000</maxFieldLength>
|
||||
|
||||
<!-- If true, unlock any held write or commit locks on startup.
|
||||
This defeats the locking mechanism that allows multiple
|
||||
processes to safely access a lucene index, and should be
|
||||
used with care.
|
||||
This is not needed if lock type is 'none' or 'single'
|
||||
-->
|
||||
<unlockOnStartup>false</unlockOnStartup>
|
||||
</mainIndex>
|
||||
|
||||
<!-- the default high-performance update handler -->
|
||||
<updateHandler class="solr.DirectUpdateHandler2">
|
||||
|
||||
<!-- A prefix of "solr." for class names is an alias that
|
||||
causes solr to search appropriate packages, including
|
||||
org.apache.solr.(search|update|request|core|analysis)
|
||||
-->
|
||||
|
||||
<!-- Limit the number of deletions Solr will buffer during doc updating.
|
||||
|
||||
Setting this lower can help bound memory use during indexing.
|
||||
-->
|
||||
<maxPendingDeletes>100000</maxPendingDeletes>
|
||||
|
||||
</updateHandler>
|
||||
|
||||
|
||||
<query>
|
||||
<!-- Maximum number of clauses in a boolean query... can affect
|
||||
range or prefix queries that expand to big boolean
|
||||
queries. An exception is thrown if exceeded. -->
|
||||
<maxBooleanClauses>1024</maxBooleanClauses>
|
||||
|
||||
|
||||
<!-- Cache used by SolrIndexSearcher for filters (DocSets),
|
||||
unordered sets of *all* documents that match a query.
|
||||
When a new searcher is opened, its caches may be prepopulated
|
||||
or "autowarmed" using data from caches in the old searcher.
|
||||
autowarmCount is the number of items to prepopulate. For LRUCache,
|
||||
the autowarmed items will be the most recently accessed items.
|
||||
Parameters:
|
||||
class - the SolrCache implementation (currently only LRUCache)
|
||||
size - the maximum number of entries in the cache
|
||||
initialSize - the initial capacity (number of entries) of
|
||||
the cache. (seel java.util.HashMap)
|
||||
autowarmCount - the number of entries to prepopulate from
|
||||
and old cache.
|
||||
-->
|
||||
<filterCache
|
||||
class="solr.LRUCache"
|
||||
size="512"
|
||||
initialSize="512"
|
||||
autowarmCount="256"/>
|
||||
|
||||
<!-- queryResultCache caches results of searches - ordered lists of
|
||||
document ids (DocList) based on a query, a sort, and the range
|
||||
of documents requested. -->
|
||||
<queryResultCache
|
||||
class="solr.LRUCache"
|
||||
size="512"
|
||||
initialSize="512"
|
||||
autowarmCount="256"/>
|
||||
|
||||
<!-- documentCache caches Lucene Document objects (the stored fields for each document).
|
||||
Since Lucene internal document ids are transient, this cache will not be autowarmed. -->
|
||||
<documentCache
|
||||
class="solr.LRUCache"
|
||||
size="512"
|
||||
initialSize="512"
|
||||
autowarmCount="0"/>
|
||||
|
||||
<!-- If true, stored fields that are not requested will be loaded lazily.
|
||||
|
||||
This can result in a significant speed improvement if the usual case is to
|
||||
not load all stored fields, especially if the skipped fields are large compressed
|
||||
text fields.
|
||||
-->
|
||||
<enableLazyFieldLoading>true</enableLazyFieldLoading>
|
||||
|
||||
<!-- Example of a generic cache. These caches may be accessed by name
|
||||
through SolrIndexSearcher.getCache(),cacheLookup(), and cacheInsert().
|
||||
The purpose is to enable easy caching of user/application level data.
|
||||
The regenerator argument should be specified as an implementation
|
||||
of solr.search.CacheRegenerator if autowarming is desired. -->
|
||||
<!--
|
||||
<cache name="myUserCache"
|
||||
class="solr.LRUCache"
|
||||
size="4096"
|
||||
initialSize="1024"
|
||||
autowarmCount="1024"
|
||||
regenerator="org.mycompany.mypackage.MyRegenerator"
|
||||
/>
|
||||
-->
|
||||
|
||||
<!-- An optimization that attempts to use a filter to satisfy a search.
|
||||
If the requested sort does not include score, then the filterCache
|
||||
will be checked for a filter matching the query. If found, the filter
|
||||
will be used as the source of document ids, and then the sort will be
|
||||
applied to that.
|
||||
<useFilterForSortedQuery>true</useFilterForSortedQuery>
|
||||
-->
|
||||
|
||||
<!-- An optimization for use with the queryResultCache. When a search
|
||||
is requested, a superset of the requested number of document ids
|
||||
are collected. For example, if a search for a particular query
|
||||
requests matching documents 10 through 19, and queryWindowSize is 50,
|
||||
then documents 0 through 49 will be collected and cached. Any further
|
||||
requests in that range can be satisfied via the cache. -->
|
||||
<queryResultWindowSize>50</queryResultWindowSize>
|
||||
|
||||
<!-- Maximum number of documents to cache for any entry in the
|
||||
queryResultCache. -->
|
||||
<queryResultMaxDocsCached>200</queryResultMaxDocsCached>
|
||||
|
||||
<!-- This entry enables an int hash representation for filters (DocSets)
|
||||
when the number of items in the set is less than maxSize. For smaller
|
||||
sets, this representation is more memory efficient, more efficient to
|
||||
iterate over, and faster to take intersections. -->
|
||||
<HashDocSet maxSize="3000" loadFactor="0.75"/>
|
||||
|
||||
<!-- a newSearcher event is fired whenever a new searcher is being prepared
|
||||
and there is a current searcher handling requests (aka registered). -->
|
||||
<!-- QuerySenderListener takes an array of NamedList and executes a
|
||||
local query request for each NamedList in sequence. -->
|
||||
<listener event="newSearcher" class="solr.QuerySenderListener">
|
||||
<arr name="queries">
|
||||
<lst> <str name="q">solr</str> <str name="start">0</str> <str name="rows">10</str> </lst>
|
||||
<lst> <str name="q">rocks</str> <str name="start">0</str> <str name="rows">10</str> </lst>
|
||||
<lst><str name="q">static newSearcher warming query from solrconfig.xml</str></lst>
|
||||
</arr>
|
||||
</listener>
|
||||
|
||||
<!-- a firstSearcher event is fired whenever a new searcher is being
|
||||
prepared but there is no current registered searcher to handle
|
||||
requests or to gain autowarming data from. -->
|
||||
<listener event="firstSearcher" class="solr.QuerySenderListener">
|
||||
<arr name="queries">
|
||||
</arr>
|
||||
</listener>
|
||||
|
||||
<!-- If a search request comes in and there is no current registered searcher,
|
||||
then immediately register the still warming searcher and use it. If
|
||||
"false" then all requests will block until the first searcher is done
|
||||
warming. -->
|
||||
<useColdSearcher>false</useColdSearcher>
|
||||
|
||||
<!-- Maximum number of searchers that may be warming in the background
|
||||
concurrently. An error is returned if this limit is exceeded. Recommend
|
||||
1-2 for read-only slaves, higher for masters w/o cache warming. -->
|
||||
<maxWarmingSearchers>4</maxWarmingSearchers>
|
||||
|
||||
</query>
|
||||
|
||||
<!--
|
||||
Let the dispatch filter handler /select?qt=XXX
|
||||
handleSelect=true will use consistent error handling for /select and /update
|
||||
handleSelect=false will use solr1.1 style error formatting
|
||||
-->
|
||||
<requestDispatcher handleSelect="true" >
|
||||
<!--Make sure your system has some authentication before enabling remote streaming! -->
|
||||
<requestParsers enableRemoteStreaming="false" multipartUploadLimitInKB="2048" />
|
||||
|
||||
<!-- Set HTTP caching related parameters (for proxy caches and clients).
|
||||
|
||||
To get the behaviour of Solr 1.2 (ie: no caching related headers)
|
||||
use the never304="true" option and do not specify a value for
|
||||
<cacheControl>
|
||||
-->
|
||||
<httpCaching never304="true">
|
||||
<!--httpCaching lastModifiedFrom="openTime"
|
||||
etagSeed="Solr"-->
|
||||
<!-- lastModFrom="openTime" is the default, the Last-Modified value
|
||||
(and validation against If-Modified-Since requests) will all be
|
||||
relative to when the current Searcher was opened.
|
||||
You can change it to lastModFrom="dirLastMod" if you want the
|
||||
value to exactly corrispond to when the physical index was last
|
||||
modified.
|
||||
|
||||
etagSeed="..." is an option you can change to force the ETag
|
||||
header (and validation against If-None-Match requests) to be
|
||||
differnet even if the index has not changed (ie: when making
|
||||
significant changes to your config file)
|
||||
|
||||
lastModifiedFrom and etagSeed are both ignored if you use the
|
||||
never304="true" option.
|
||||
-->
|
||||
<!-- If you include a <cacheControl> directive, it will be used to
|
||||
generate a Cache-Control header, as well as an Expires header
|
||||
if the value contains "max-age="
|
||||
|
||||
By default, no Cache-Control header is generated.
|
||||
|
||||
You can use the <cacheControl> option even if you have set
|
||||
never304="true"
|
||||
-->
|
||||
<!-- <cacheControl>max-age=30, public</cacheControl> -->
|
||||
</httpCaching>
|
||||
</requestDispatcher>
|
||||
|
||||
|
||||
<!-- requestHandler plugins... incoming queries will be dispatched to the
|
||||
correct handler based on the path or the qt (query type) param.
|
||||
Names starting with a '/' are accessed with the a path equal to the
|
||||
registered name. Names without a leading '/' are accessed with:
|
||||
http://host/app/select?qt=name
|
||||
If no qt is defined, the requestHandler that declares default="true"
|
||||
will be used.
|
||||
-->
|
||||
<requestHandler name="standard" class="solr.StandardRequestHandler" default="true">
|
||||
<!-- default values for query parameters -->
|
||||
<lst name="defaults">
|
||||
<str name="echoParams">explicit</str>
|
||||
<!--
|
||||
<int name="rows">10</int>
|
||||
<str name="fl">*</str>
|
||||
<str name="version">2.1</str>
|
||||
-->
|
||||
</lst>
|
||||
</requestHandler>
|
||||
|
||||
<requestHandler name="/dataimport" class="org.apache.solr.handler.dataimport.DataImportHandler">
|
||||
</requestHandler>
|
||||
|
||||
<!--
|
||||
|
||||
Search components are registered to SolrCore and used by Search Handlers
|
||||
|
||||
By default, the following components are avaliable:
|
||||
|
||||
<searchComponent name="query" class="org.apache.solr.handler.component.QueryComponent" />
|
||||
<searchComponent name="facet" class="org.apache.solr.handler.component.FacetComponent" />
|
||||
<searchComponent name="mlt" class="org.apache.solr.handler.component.MoreLikeThisComponent" />
|
||||
<searchComponent name="highlight" class="org.apache.solr.handler.component.HighlightComponent" />
|
||||
<searchComponent name="debug" class="org.apache.solr.handler.component.DebugComponent" />
|
||||
|
||||
If you register a searchComponent to one of the standard names, that will be used instead.
|
||||
|
||||
-->
|
||||
|
||||
<requestHandler name="/search" class="org.apache.solr.handler.component.SearchHandler">
|
||||
<lst name="defaults">
|
||||
<str name="echoParams">explicit</str>
|
||||
</lst>
|
||||
<!--
|
||||
By default, this will register the following components:
|
||||
|
||||
<arr name="components">
|
||||
<str>query</str>
|
||||
<str>facet</str>
|
||||
<str>mlt</str>
|
||||
<str>highlight</str>
|
||||
<str>debug</str>
|
||||
</arr>
|
||||
|
||||
To insert handlers before or after the 'standard' components, use:
|
||||
|
||||
<arr name="first-components">
|
||||
<str>first</str>
|
||||
</arr>
|
||||
|
||||
<arr name="last-components">
|
||||
<str>last</str>
|
||||
</arr>
|
||||
|
||||
-->
|
||||
</requestHandler>
|
||||
|
||||
<!-- Update request handler.
|
||||
|
||||
Note: Since solr1.1 requestHandlers requires a valid content type header if posted in
|
||||
the body. For example, curl now requires: -H 'Content-type:text/xml; charset=utf-8'
|
||||
The response format differs from solr1.1 formatting and returns a standard error code.
|
||||
|
||||
To enable solr1.1 behavior, remove the /update handler or change its path
|
||||
|
||||
"update.processor.class" is the class name for the UpdateRequestProcessor. It is initalized
|
||||
only once. This can not be changed for each request.
|
||||
-->
|
||||
<requestHandler name="/update" class="solr.XmlUpdateRequestHandler" >
|
||||
<!--
|
||||
<str name="update.processor.class">org.apache.solr.handler.UpdateRequestProcessor</str>
|
||||
-->
|
||||
</requestHandler>
|
||||
|
||||
<!-- config for the admin interface -->
|
||||
<admin>
|
||||
<defaultQuery>*:*</defaultQuery>
|
||||
|
||||
<!-- configure a healthcheck file for servers behind a loadbalancer
|
||||
<healthcheck type="file">server-enabled</healthcheck>
|
||||
-->
|
||||
</admin>
|
||||
|
||||
</config>
|
||||
|
|
@ -0,0 +1,304 @@
|
|||
<?xml version="1.0" encoding="UTF-8" ?>
|
||||
<!--
|
||||
Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
contributor license agreements. See the NOTICE file distributed with
|
||||
this work for additional information regarding copyright ownership.
|
||||
The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
(the "License"); you may not use this file except in compliance with
|
||||
the License. You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
-->
|
||||
|
||||
<!--
|
||||
This is the Solr schema file. This file should be named "schema.xml" and
|
||||
should be in the conf directory under the solr home
|
||||
(i.e. ./solr/conf/schema.xml by default)
|
||||
or located where the classloader for the Solr webapp can find it.
|
||||
|
||||
This example schema is the recommended starting point for users.
|
||||
It should be kept correct and concise, usable out-of-the-box.
|
||||
|
||||
For more information, on how to customize this file, please see
|
||||
http://wiki.apache.org/solr/SchemaXml
|
||||
-->
|
||||
|
||||
<schema name="test" version="1.1">
|
||||
<!-- attribute "name" is the name of this schema and is only used for display purposes.
|
||||
Applications should change this to reflect the nature of the search collection.
|
||||
version="1.1" is Solr's version number for the schema syntax and semantics. It should
|
||||
not normally be changed by applications.
|
||||
1.0: multiValued attribute did not exist, all fields are multiValued by nature
|
||||
1.1: multiValued attribute introduced, false by default -->
|
||||
|
||||
<types>
|
||||
<!-- field type definitions. The "name" attribute is
|
||||
just a label to be used by field definitions. The "class"
|
||||
attribute and any other attributes determine the real
|
||||
behavior of the fieldType.
|
||||
Class names starting with "solr" refer to java classes in the
|
||||
org.apache.solr.analysis package.
|
||||
-->
|
||||
|
||||
<!-- The StrField type is not analyzed, but indexed/stored verbatim.
|
||||
- StrField and TextField support an optional compressThreshold which
|
||||
limits compression (if enabled in the derived fields) to values which
|
||||
exceed a certain size (in characters).
|
||||
-->
|
||||
<fieldType name="string" class="solr.StrField" sortMissingLast="true" omitNorms="true"/>
|
||||
|
||||
<!-- boolean type: "true" or "false" -->
|
||||
<fieldType name="boolean" class="solr.BoolField" sortMissingLast="true" omitNorms="true"/>
|
||||
|
||||
<!-- The optional sortMissingLast and sortMissingFirst attributes are
|
||||
currently supported on types that are sorted internally as strings.
|
||||
- If sortMissingLast="true", then a sort on this field will cause documents
|
||||
without the field to come after documents with the field,
|
||||
regardless of the requested sort order (asc or desc).
|
||||
- If sortMissingFirst="true", then a sort on this field will cause documents
|
||||
without the field to come before documents with the field,
|
||||
regardless of the requested sort order.
|
||||
- If sortMissingLast="false" and sortMissingFirst="false" (the default),
|
||||
then default lucene sorting will be used which places docs without the
|
||||
field first in an ascending sort and last in a descending sort.
|
||||
-->
|
||||
|
||||
|
||||
<!-- numeric field types that store and index the text
|
||||
value verbatim (and hence don't support range queries, since the
|
||||
lexicographic ordering isn't equal to the numeric ordering) -->
|
||||
<fieldType name="integer" class="solr.IntField" omitNorms="true"/>
|
||||
<fieldType name="long" class="solr.LongField" omitNorms="true"/>
|
||||
<fieldType name="float" class="solr.FloatField" omitNorms="true"/>
|
||||
<fieldType name="double" class="solr.DoubleField" omitNorms="true"/>
|
||||
|
||||
|
||||
<!-- Numeric field types that manipulate the value into
|
||||
a string value that isn't human-readable in its internal form,
|
||||
but with a lexicographic ordering the same as the numeric ordering,
|
||||
so that range queries work correctly. -->
|
||||
<fieldType name="sint" class="solr.SortableIntField" sortMissingLast="true" omitNorms="true"/>
|
||||
<fieldType name="slong" class="solr.SortableLongField" sortMissingLast="true" omitNorms="true"/>
|
||||
<fieldType name="sfloat" class="solr.SortableFloatField" sortMissingLast="true" omitNorms="true"/>
|
||||
<fieldType name="sdouble" class="solr.SortableDoubleField" sortMissingLast="true" omitNorms="true"/>
|
||||
|
||||
|
||||
<!-- The format for this date field is of the form 1995-12-31T23:59:59Z, and
|
||||
is a more restricted form of the canonical representation of dateTime
|
||||
http://www.w3.org/TR/xmlschema-2/#dateTime
|
||||
The trailing "Z" designates UTC time and is mandatory.
|
||||
Optional fractional seconds are allowed: 1995-12-31T23:59:59.999Z
|
||||
All other components are mandatory.
|
||||
|
||||
Expressions can also be used to denote calculations that should be
|
||||
performed relative to "NOW" to determine the value, ie...
|
||||
|
||||
NOW/HOUR
|
||||
... Round to the start of the current hour
|
||||
NOW-1DAY
|
||||
... Exactly 1 day prior to now
|
||||
NOW/DAY+6MONTHS+3DAYS
|
||||
... 6 months and 3 days in the future from the start of
|
||||
the current day
|
||||
|
||||
Consult the DateField javadocs for more information.
|
||||
-->
|
||||
<fieldType name="date" class="solr.DateField" sortMissingLast="true" omitNorms="true"/>
|
||||
|
||||
|
||||
<!-- The "RandomSortField" is not used to store or search any
|
||||
data. You can declare fields of this type it in your schema
|
||||
to generate psuedo-random orderings of your docs for sorting
|
||||
purposes. The ordering is generated based on the field name
|
||||
and the version of the index, As long as the index version
|
||||
remains unchanged, and the same field name is reused,
|
||||
the ordering of the docs will be consistent.
|
||||
If you want differend psuedo-random orderings of documents,
|
||||
for the same version of the index, use a dynamicField and
|
||||
change the name
|
||||
-->
|
||||
<fieldType name="random" class="solr.RandomSortField" indexed="true" />
|
||||
|
||||
<!-- solr.TextField allows the specification of custom text analyzers
|
||||
specified as a tokenizer and a list of token filters. Different
|
||||
analyzers may be specified for indexing and querying.
|
||||
|
||||
The optional positionIncrementGap puts space between multiple fields of
|
||||
this type on the same document, with the purpose of preventing false phrase
|
||||
matching across fields.
|
||||
|
||||
For more info on customizing your analyzer chain, please see
|
||||
http://wiki.apache.org/solr/AnalyzersTokenizersTokenFilters
|
||||
-->
|
||||
|
||||
<!-- One can also specify an existing Analyzer class that has a
|
||||
default constructor via the class attribute on the analyzer element
|
||||
<fieldType name="text_greek" class="solr.TextField">
|
||||
<analyzer class="org.apache.lucene.analysis.el.GreekAnalyzer"/>
|
||||
</fieldType>
|
||||
-->
|
||||
|
||||
<!-- A text field that only splits on whitespace for exact matching of words -->
|
||||
<fieldType name="text_ws" class="solr.TextField" positionIncrementGap="100">
|
||||
<analyzer>
|
||||
<tokenizer class="solr.WhitespaceTokenizerFactory"/>
|
||||
</analyzer>
|
||||
</fieldType>
|
||||
|
||||
<!-- A text field that uses WordDelimiterFilter to enable splitting and matching of
|
||||
words on case-change, alpha numeric boundaries, and non-alphanumeric chars,
|
||||
so that a query of "wifi" or "wi fi" could match a document containing "Wi-Fi".
|
||||
Synonyms and stopwords are customized by external files, and stemming is enabled.
|
||||
Duplicate tokens at the same position (which may result from Stemmed Synonyms or
|
||||
WordDelim parts) are removed.
|
||||
-->
|
||||
<fieldType name="text" class="solr.TextField" positionIncrementGap="100">
|
||||
<analyzer type="index">
|
||||
<tokenizer class="solr.WhitespaceTokenizerFactory"/>
|
||||
<!-- in this example, we will only use synonyms at query time
|
||||
<filter class="solr.SynonymFilterFactory" synonyms="index_synonyms.txt" ignoreCase="true" expand="false"/>
|
||||
-->
|
||||
<filter class="solr.StopFilterFactory" ignoreCase="true" words="stopwords.txt"/>
|
||||
<filter class="solr.WordDelimiterFilterFactory" generateWordParts="1" generateNumberParts="1" catenateWords="1" catenateNumbers="1" catenateAll="0" splitOnCaseChange="1"/>
|
||||
<filter class="solr.LowerCaseFilterFactory"/>
|
||||
<filter class="solr.EnglishPorterFilterFactory" protected="protwords.txt"/>
|
||||
<filter class="solr.RemoveDuplicatesTokenFilterFactory"/>
|
||||
</analyzer>
|
||||
<analyzer type="query">
|
||||
<tokenizer class="solr.WhitespaceTokenizerFactory"/>
|
||||
<filter class="solr.SynonymFilterFactory" synonyms="synonyms.txt" ignoreCase="true" expand="true"/>
|
||||
<filter class="solr.StopFilterFactory" ignoreCase="true" words="stopwords.txt"/>
|
||||
<filter class="solr.WordDelimiterFilterFactory" generateWordParts="1" generateNumberParts="1" catenateWords="0" catenateNumbers="0" catenateAll="0" splitOnCaseChange="1"/>
|
||||
<filter class="solr.LowerCaseFilterFactory"/>
|
||||
<filter class="solr.EnglishPorterFilterFactory" protected="protwords.txt"/>
|
||||
<filter class="solr.RemoveDuplicatesTokenFilterFactory"/>
|
||||
</analyzer>
|
||||
</fieldType>
|
||||
|
||||
|
||||
<!-- Less flexible matching, but less false matches. Probably not ideal for product names,
|
||||
but may be good for SKUs. Can insert dashes in the wrong place and still match. -->
|
||||
<fieldType name="textTight" class="solr.TextField" positionIncrementGap="100" >
|
||||
<analyzer>
|
||||
<tokenizer class="solr.WhitespaceTokenizerFactory"/>
|
||||
<filter class="solr.SynonymFilterFactory" synonyms="synonyms.txt" ignoreCase="true" expand="false"/>
|
||||
<filter class="solr.StopFilterFactory" ignoreCase="true" words="stopwords.txt"/>
|
||||
<filter class="solr.WordDelimiterFilterFactory" generateWordParts="0" generateNumberParts="0" catenateWords="1" catenateNumbers="1" catenateAll="0"/>
|
||||
<filter class="solr.LowerCaseFilterFactory"/>
|
||||
<filter class="solr.EnglishPorterFilterFactory" protected="protwords.txt"/>
|
||||
<filter class="solr.RemoveDuplicatesTokenFilterFactory"/>
|
||||
</analyzer>
|
||||
</fieldType>
|
||||
|
||||
<!-- This is an example of using the KeywordTokenizer along
|
||||
With various TokenFilterFactories to produce a sortable field
|
||||
that does not include some properties of the source text
|
||||
-->
|
||||
<fieldType name="alphaOnlySort" class="solr.TextField" sortMissingLast="true" omitNorms="true">
|
||||
<analyzer>
|
||||
<!-- KeywordTokenizer does no actual tokenizing, so the entire
|
||||
input string is preserved as a single token
|
||||
-->
|
||||
<tokenizer class="solr.KeywordTokenizerFactory"/>
|
||||
<!-- The LowerCase TokenFilter does what you expect, which can be
|
||||
when you want your sorting to be case insensitive
|
||||
-->
|
||||
<filter class="solr.LowerCaseFilterFactory" />
|
||||
<!-- The TrimFilter removes any leading or trailing whitespace -->
|
||||
<filter class="solr.TrimFilterFactory" />
|
||||
<!-- The PatternReplaceFilter gives you the flexibility to use
|
||||
Java Regular expression to replace any sequence of characters
|
||||
matching a pattern with an arbitrary replacement string,
|
||||
which may include back refrences to portions of the orriginal
|
||||
string matched by the pattern.
|
||||
|
||||
See the Java Regular Expression documentation for more
|
||||
infomation on pattern and replacement string syntax.
|
||||
|
||||
http://java.sun.com/j2se/1.5.0/docs/api/java/util/regex/package-summary.html
|
||||
-->
|
||||
<filter class="solr.PatternReplaceFilterFactory"
|
||||
pattern="([^a-z])" replacement="" replace="all"
|
||||
/>
|
||||
</analyzer>
|
||||
</fieldType>
|
||||
|
||||
<!-- since fields of this type are by default not stored or indexed, any data added to
|
||||
them will be ignored outright
|
||||
-->
|
||||
<fieldtype name="ignored" stored="false" indexed="false" class="solr.StrField" />
|
||||
|
||||
</types>
|
||||
|
||||
|
||||
<fields>
|
||||
<!-- Valid attributes for fields:
|
||||
name: mandatory - the name for the field
|
||||
type: mandatory - the name of a previously defined type from the <types> section
|
||||
indexed: true if this field should be indexed (searchable or sortable)
|
||||
stored: true if this field should be retrievable
|
||||
compressed: [false] if this field should be stored using gzip compression
|
||||
(this will only apply if the field type is compressable; among
|
||||
the standard field types, only TextField and StrField are)
|
||||
multiValued: true if this field may contain multiple values per document
|
||||
omitNorms: (expert) set to true to omit the norms associated with
|
||||
this field (this disables length normalization and index-time
|
||||
boosting for the field, and saves some memory). Only full-text
|
||||
fields or fields that need an index-time boost need norms.
|
||||
termVectors: [false] set to true to store the term vector for a given field.
|
||||
When using MoreLikeThis, fields used for similarity should be stored for
|
||||
best performance.
|
||||
-->
|
||||
|
||||
<field name="id" type="string" indexed="true" stored="true" required="true" />
|
||||
<field name="desc" type="string" indexed="true" stored="true" multiValued="true" />
|
||||
|
||||
<field name="date" type="date" indexed="true" stored="true" />
|
||||
|
||||
<field name="timestamp" type="date" indexed="true" stored="true" default="NOW" multiValued="false"/>
|
||||
|
||||
|
||||
<!-- Dynamic field definitions. If a field name is not found, dynamicFields
|
||||
will be used if the name matches any of the patterns.
|
||||
RESTRICTION: the glob-like pattern in the name attribute must have
|
||||
a "*" only at the start or the end.
|
||||
EXAMPLE: name="*_i" will match any field ending in _i (like myid_i, z_i)
|
||||
Longer patterns will be matched first. if equal size patterns
|
||||
both match, the first appearing in the schema will be used. -->
|
||||
<dynamicField name="*_i" type="sint" indexed="true" stored="true"/>
|
||||
<dynamicField name="*_s" type="string" indexed="true" stored="true"/>
|
||||
<dynamicField name="*_l" type="slong" indexed="true" stored="true"/>
|
||||
<dynamicField name="*_t" type="text" indexed="true" stored="true"/>
|
||||
<dynamicField name="*_b" type="boolean" indexed="true" stored="true"/>
|
||||
<dynamicField name="*_f" type="sfloat" indexed="true" stored="true"/>
|
||||
<dynamicField name="*_d" type="sdouble" indexed="true" stored="true"/>
|
||||
<dynamicField name="*_dt" type="date" indexed="true" stored="true"/>
|
||||
|
||||
<dynamicField name="random*" type="random" />
|
||||
|
||||
<!-- uncomment the following to ignore any fields that don't already match an existing
|
||||
field name or dynamic field, rather than reporting them as an error.
|
||||
alternately, change the type="ignored" to some other type e.g. "text" if you want
|
||||
unknown fields indexed and/or stored by default -->
|
||||
<!--dynamicField name="*" type="ignored" /-->
|
||||
|
||||
</fields>
|
||||
|
||||
<!-- Field to use to determine and enforce document uniqueness.
|
||||
Unless this field is marked with required="false", it will be a required field
|
||||
-->
|
||||
<uniqueKey>id</uniqueKey>
|
||||
|
||||
<!-- field for the QueryParser to use when an explicit fieldname is absent -->
|
||||
<defaultSearchField>desc</defaultSearchField>
|
||||
|
||||
<!-- SolrQueryParser configuration: defaultOperator="AND|OR" -->
|
||||
<solrQueryParser defaultOperator="OR"/>
|
||||
|
||||
</schema>
|
|
@ -0,0 +1,409 @@
|
|||
<?xml version="1.0" encoding="UTF-8" ?>
|
||||
<!--
|
||||
Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
contributor license agreements. See the NOTICE file distributed with
|
||||
this work for additional information regarding copyright ownership.
|
||||
The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
(the "License"); you may not use this file except in compliance with
|
||||
the License. You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
-->
|
||||
|
||||
<config>
|
||||
<!-- Set this to 'false' if you want solr to continue working after it has
|
||||
encountered an severe configuration error. In a production environment,
|
||||
you may want solr to keep working even if one handler is mis-configured.
|
||||
|
||||
You may also set this to false using by setting the system property:
|
||||
-Dsolr.abortOnConfigurationError=false
|
||||
-->
|
||||
<abortOnConfigurationError>${solr.abortOnConfigurationError:true}</abortOnConfigurationError>
|
||||
|
||||
<!-- Used to specify an alternate directory to hold all index data
|
||||
other than the default ./data under the Solr home.
|
||||
If replication is in use, this should match the replication configuration. -->
|
||||
<dataDir>${solr.data.dir:./solr/data}</dataDir>
|
||||
|
||||
|
||||
<indexDefaults>
|
||||
<!-- Values here affect all index writers and act as a default unless overridden. -->
|
||||
<useCompoundFile>false</useCompoundFile>
|
||||
|
||||
<mergeFactor>10</mergeFactor>
|
||||
<!--
|
||||
If both ramBufferSizeMB and maxBufferedDocs is set, then Lucene will flush based on whichever limit is hit first.
|
||||
|
||||
-->
|
||||
<!--<maxBufferedDocs>1000</maxBufferedDocs>-->
|
||||
<!-- Tell Lucene when to flush documents to disk.
|
||||
Giving Lucene more memory for indexing means faster indexing at the cost of more RAM
|
||||
|
||||
If both ramBufferSizeMB and maxBufferedDocs is set, then Lucene will flush based on whichever limit is hit first.
|
||||
|
||||
-->
|
||||
<ramBufferSizeMB>32</ramBufferSizeMB>
|
||||
<maxMergeDocs>2147483647</maxMergeDocs>
|
||||
<maxFieldLength>10000</maxFieldLength>
|
||||
<writeLockTimeout>1000</writeLockTimeout>
|
||||
<commitLockTimeout>10000</commitLockTimeout>
|
||||
|
||||
<!--
|
||||
Expert: Turn on Lucene's auto commit capability.
|
||||
|
||||
TODO: Add recommendations on why you would want to do this.
|
||||
|
||||
NOTE: Despite the name, this value does not have any relation to Solr's autoCommit functionality
|
||||
|
||||
-->
|
||||
<!--<luceneAutoCommit>false</luceneAutoCommit>-->
|
||||
<!--
|
||||
Expert:
|
||||
The Merge Policy in Lucene controls how merging is handled by Lucene. The default in 2.3 is the LogByteSizeMergePolicy, previous
|
||||
versions used LogDocMergePolicy.
|
||||
|
||||
LogByteSizeMergePolicy chooses segments to merge based on their size. The Lucene 2.2 default, LogDocMergePolicy chose when
|
||||
to merge based on number of documents
|
||||
|
||||
Other implementations of MergePolicy must have a no-argument constructor
|
||||
-->
|
||||
<!--<mergePolicy>org.apache.lucene.index.LogByteSizeMergePolicy</mergePolicy>-->
|
||||
|
||||
<!--
|
||||
Expert:
|
||||
The Merge Scheduler in Lucene controls how merges are performed. The ConcurrentMergeScheduler (Lucene 2.3 default)
|
||||
can perform merges in the background using separate threads. The SerialMergeScheduler (Lucene 2.2 default) does not.
|
||||
-->
|
||||
<!--<mergeScheduler>org.apache.lucene.index.ConcurrentMergeScheduler</mergeScheduler>-->
|
||||
|
||||
<!--
|
||||
As long as Solr is the only process modifying your index, it is
|
||||
safe to use Lucene's in process locking mechanism. But you may
|
||||
specify one of the other Lucene LockFactory implementations in
|
||||
the event that you have a custom situation.
|
||||
|
||||
none = NoLockFactory (typically only used with read only indexes)
|
||||
single = SingleInstanceLockFactory (suggested)
|
||||
native = NativeFSLockFactory
|
||||
simple = SimpleFSLockFactory
|
||||
|
||||
('simple' is the default for backwards compatibility with Solr 1.2)
|
||||
-->
|
||||
<lockType>single</lockType>
|
||||
</indexDefaults>
|
||||
|
||||
<mainIndex>
|
||||
<!-- options specific to the main on-disk lucene index -->
|
||||
<useCompoundFile>false</useCompoundFile>
|
||||
<ramBufferSizeMB>32</ramBufferSizeMB>
|
||||
<mergeFactor>10</mergeFactor>
|
||||
<!-- Deprecated -->
|
||||
<!--<maxBufferedDocs>1000</maxBufferedDocs>-->
|
||||
<maxMergeDocs>2147483647</maxMergeDocs>
|
||||
<maxFieldLength>10000</maxFieldLength>
|
||||
|
||||
<!-- If true, unlock any held write or commit locks on startup.
|
||||
This defeats the locking mechanism that allows multiple
|
||||
processes to safely access a lucene index, and should be
|
||||
used with care.
|
||||
This is not needed if lock type is 'none' or 'single'
|
||||
-->
|
||||
<unlockOnStartup>false</unlockOnStartup>
|
||||
</mainIndex>
|
||||
|
||||
<!-- the default high-performance update handler -->
|
||||
<updateHandler class="solr.DirectUpdateHandler2">
|
||||
|
||||
<!-- A prefix of "solr." for class names is an alias that
|
||||
causes solr to search appropriate packages, including
|
||||
org.apache.solr.(search|update|request|core|analysis)
|
||||
-->
|
||||
|
||||
<!-- Limit the number of deletions Solr will buffer during doc updating.
|
||||
|
||||
Setting this lower can help bound memory use during indexing.
|
||||
-->
|
||||
<maxPendingDeletes>100000</maxPendingDeletes>
|
||||
|
||||
</updateHandler>
|
||||
|
||||
|
||||
<query>
|
||||
<!-- Maximum number of clauses in a boolean query... can affect
|
||||
range or prefix queries that expand to big boolean
|
||||
queries. An exception is thrown if exceeded. -->
|
||||
<maxBooleanClauses>1024</maxBooleanClauses>
|
||||
|
||||
|
||||
<!-- Cache used by SolrIndexSearcher for filters (DocSets),
|
||||
unordered sets of *all* documents that match a query.
|
||||
When a new searcher is opened, its caches may be prepopulated
|
||||
or "autowarmed" using data from caches in the old searcher.
|
||||
autowarmCount is the number of items to prepopulate. For LRUCache,
|
||||
the autowarmed items will be the most recently accessed items.
|
||||
Parameters:
|
||||
class - the SolrCache implementation (currently only LRUCache)
|
||||
size - the maximum number of entries in the cache
|
||||
initialSize - the initial capacity (number of entries) of
|
||||
the cache. (seel java.util.HashMap)
|
||||
autowarmCount - the number of entries to prepopulate from
|
||||
and old cache.
|
||||
-->
|
||||
<filterCache
|
||||
class="solr.LRUCache"
|
||||
size="512"
|
||||
initialSize="512"
|
||||
autowarmCount="256"/>
|
||||
|
||||
<!-- queryResultCache caches results of searches - ordered lists of
|
||||
document ids (DocList) based on a query, a sort, and the range
|
||||
of documents requested. -->
|
||||
<queryResultCache
|
||||
class="solr.LRUCache"
|
||||
size="512"
|
||||
initialSize="512"
|
||||
autowarmCount="256"/>
|
||||
|
||||
<!-- documentCache caches Lucene Document objects (the stored fields for each document).
|
||||
Since Lucene internal document ids are transient, this cache will not be autowarmed. -->
|
||||
<documentCache
|
||||
class="solr.LRUCache"
|
||||
size="512"
|
||||
initialSize="512"
|
||||
autowarmCount="0"/>
|
||||
|
||||
<!-- If true, stored fields that are not requested will be loaded lazily.
|
||||
|
||||
This can result in a significant speed improvement if the usual case is to
|
||||
not load all stored fields, especially if the skipped fields are large compressed
|
||||
text fields.
|
||||
-->
|
||||
<enableLazyFieldLoading>true</enableLazyFieldLoading>
|
||||
|
||||
<!-- Example of a generic cache. These caches may be accessed by name
|
||||
through SolrIndexSearcher.getCache(),cacheLookup(), and cacheInsert().
|
||||
The purpose is to enable easy caching of user/application level data.
|
||||
The regenerator argument should be specified as an implementation
|
||||
of solr.search.CacheRegenerator if autowarming is desired. -->
|
||||
<!--
|
||||
<cache name="myUserCache"
|
||||
class="solr.LRUCache"
|
||||
size="4096"
|
||||
initialSize="1024"
|
||||
autowarmCount="1024"
|
||||
regenerator="org.mycompany.mypackage.MyRegenerator"
|
||||
/>
|
||||
-->
|
||||
|
||||
<!-- An optimization that attempts to use a filter to satisfy a search.
|
||||
If the requested sort does not include score, then the filterCache
|
||||
will be checked for a filter matching the query. If found, the filter
|
||||
will be used as the source of document ids, and then the sort will be
|
||||
applied to that.
|
||||
<useFilterForSortedQuery>true</useFilterForSortedQuery>
|
||||
-->
|
||||
|
||||
<!-- An optimization for use with the queryResultCache. When a search
|
||||
is requested, a superset of the requested number of document ids
|
||||
are collected. For example, if a search for a particular query
|
||||
requests matching documents 10 through 19, and queryWindowSize is 50,
|
||||
then documents 0 through 49 will be collected and cached. Any further
|
||||
requests in that range can be satisfied via the cache. -->
|
||||
<queryResultWindowSize>50</queryResultWindowSize>
|
||||
|
||||
<!-- Maximum number of documents to cache for any entry in the
|
||||
queryResultCache. -->
|
||||
<queryResultMaxDocsCached>200</queryResultMaxDocsCached>
|
||||
|
||||
<!-- This entry enables an int hash representation for filters (DocSets)
|
||||
when the number of items in the set is less than maxSize. For smaller
|
||||
sets, this representation is more memory efficient, more efficient to
|
||||
iterate over, and faster to take intersections. -->
|
||||
<HashDocSet maxSize="3000" loadFactor="0.75"/>
|
||||
|
||||
<!-- a newSearcher event is fired whenever a new searcher is being prepared
|
||||
and there is a current searcher handling requests (aka registered). -->
|
||||
<!-- QuerySenderListener takes an array of NamedList and executes a
|
||||
local query request for each NamedList in sequence. -->
|
||||
<listener event="newSearcher" class="solr.QuerySenderListener">
|
||||
<arr name="queries">
|
||||
<lst> <str name="q">solr</str> <str name="start">0</str> <str name="rows">10</str> </lst>
|
||||
<lst> <str name="q">rocks</str> <str name="start">0</str> <str name="rows">10</str> </lst>
|
||||
<lst><str name="q">static newSearcher warming query from solrconfig.xml</str></lst>
|
||||
</arr>
|
||||
</listener>
|
||||
|
||||
<!-- a firstSearcher event is fired whenever a new searcher is being
|
||||
prepared but there is no current registered searcher to handle
|
||||
requests or to gain autowarming data from. -->
|
||||
<listener event="firstSearcher" class="solr.QuerySenderListener">
|
||||
<arr name="queries">
|
||||
</arr>
|
||||
</listener>
|
||||
|
||||
<!-- If a search request comes in and there is no current registered searcher,
|
||||
then immediately register the still warming searcher and use it. If
|
||||
"false" then all requests will block until the first searcher is done
|
||||
warming. -->
|
||||
<useColdSearcher>false</useColdSearcher>
|
||||
|
||||
<!-- Maximum number of searchers that may be warming in the background
|
||||
concurrently. An error is returned if this limit is exceeded. Recommend
|
||||
1-2 for read-only slaves, higher for masters w/o cache warming. -->
|
||||
<maxWarmingSearchers>4</maxWarmingSearchers>
|
||||
|
||||
</query>
|
||||
|
||||
<!--
|
||||
Let the dispatch filter handler /select?qt=XXX
|
||||
handleSelect=true will use consistent error handling for /select and /update
|
||||
handleSelect=false will use solr1.1 style error formatting
|
||||
-->
|
||||
<requestDispatcher handleSelect="true" >
|
||||
<!--Make sure your system has some authentication before enabling remote streaming! -->
|
||||
<requestParsers enableRemoteStreaming="false" multipartUploadLimitInKB="2048" />
|
||||
|
||||
<!-- Set HTTP caching related parameters (for proxy caches and clients).
|
||||
|
||||
To get the behaviour of Solr 1.2 (ie: no caching related headers)
|
||||
use the never304="true" option and do not specify a value for
|
||||
<cacheControl>
|
||||
-->
|
||||
<httpCaching never304="true">
|
||||
<!--httpCaching lastModifiedFrom="openTime"
|
||||
etagSeed="Solr"-->
|
||||
<!-- lastModFrom="openTime" is the default, the Last-Modified value
|
||||
(and validation against If-Modified-Since requests) will all be
|
||||
relative to when the current Searcher was opened.
|
||||
You can change it to lastModFrom="dirLastMod" if you want the
|
||||
value to exactly corrispond to when the physical index was last
|
||||
modified.
|
||||
|
||||
etagSeed="..." is an option you can change to force the ETag
|
||||
header (and validation against If-None-Match requests) to be
|
||||
differnet even if the index has not changed (ie: when making
|
||||
significant changes to your config file)
|
||||
|
||||
lastModifiedFrom and etagSeed are both ignored if you use the
|
||||
never304="true" option.
|
||||
-->
|
||||
<!-- If you include a <cacheControl> directive, it will be used to
|
||||
generate a Cache-Control header, as well as an Expires header
|
||||
if the value contains "max-age="
|
||||
|
||||
By default, no Cache-Control header is generated.
|
||||
|
||||
You can use the <cacheControl> option even if you have set
|
||||
never304="true"
|
||||
-->
|
||||
<!-- <cacheControl>max-age=30, public</cacheControl> -->
|
||||
</httpCaching>
|
||||
</requestDispatcher>
|
||||
|
||||
|
||||
<!-- requestHandler plugins... incoming queries will be dispatched to the
|
||||
correct handler based on the path or the qt (query type) param.
|
||||
Names starting with a '/' are accessed with the a path equal to the
|
||||
registered name. Names without a leading '/' are accessed with:
|
||||
http://host/app/select?qt=name
|
||||
If no qt is defined, the requestHandler that declares default="true"
|
||||
will be used.
|
||||
-->
|
||||
<requestHandler name="standard" class="solr.StandardRequestHandler" default="true">
|
||||
<!-- default values for query parameters -->
|
||||
<lst name="defaults">
|
||||
<str name="echoParams">explicit</str>
|
||||
<!--
|
||||
<int name="rows">10</int>
|
||||
<str name="fl">*</str>
|
||||
<str name="version">2.1</str>
|
||||
-->
|
||||
</lst>
|
||||
</requestHandler>
|
||||
|
||||
<requestHandler name="/dataimport" class="org.apache.solr.handler.dataimport.DataImportHandler">
|
||||
<lst name="defaults">
|
||||
<lst name="datasource">
|
||||
<str name="type">MockDataSource</str>
|
||||
</lst>
|
||||
</lst>
|
||||
</requestHandler>
|
||||
|
||||
<!--
|
||||
|
||||
Search components are registered to SolrCore and used by Search Handlers
|
||||
|
||||
By default, the following components are avaliable:
|
||||
|
||||
<searchComponent name="query" class="org.apache.solr.handler.component.QueryComponent" />
|
||||
<searchComponent name="facet" class="org.apache.solr.handler.component.FacetComponent" />
|
||||
<searchComponent name="mlt" class="org.apache.solr.handler.component.MoreLikeThisComponent" />
|
||||
<searchComponent name="highlight" class="org.apache.solr.handler.component.HighlightComponent" />
|
||||
<searchComponent name="debug" class="org.apache.solr.handler.component.DebugComponent" />
|
||||
|
||||
If you register a searchComponent to one of the standard names, that will be used instead.
|
||||
|
||||
-->
|
||||
|
||||
<requestHandler name="/search" class="org.apache.solr.handler.component.SearchHandler">
|
||||
<lst name="defaults">
|
||||
<str name="echoParams">explicit</str>
|
||||
</lst>
|
||||
<!--
|
||||
By default, this will register the following components:
|
||||
|
||||
<arr name="components">
|
||||
<str>query</str>
|
||||
<str>facet</str>
|
||||
<str>mlt</str>
|
||||
<str>highlight</str>
|
||||
<str>debug</str>
|
||||
</arr>
|
||||
|
||||
To insert handlers before or after the 'standard' components, use:
|
||||
|
||||
<arr name="first-components">
|
||||
<str>first</str>
|
||||
</arr>
|
||||
|
||||
<arr name="last-components">
|
||||
<str>last</str>
|
||||
</arr>
|
||||
|
||||
-->
|
||||
</requestHandler>
|
||||
|
||||
<!-- Update request handler.
|
||||
|
||||
Note: Since solr1.1 requestHandlers requires a valid content type header if posted in
|
||||
the body. For example, curl now requires: -H 'Content-type:text/xml; charset=utf-8'
|
||||
The response format differs from solr1.1 formatting and returns a standard error code.
|
||||
|
||||
To enable solr1.1 behavior, remove the /update handler or change its path
|
||||
|
||||
"update.processor.class" is the class name for the UpdateRequestProcessor. It is initalized
|
||||
only once. This can not be changed for each request.
|
||||
-->
|
||||
<requestHandler name="/update" class="solr.XmlUpdateRequestHandler" >
|
||||
<!--
|
||||
<str name="update.processor.class">org.apache.solr.handler.UpdateRequestProcessor</str>
|
||||
-->
|
||||
</requestHandler>
|
||||
|
||||
<!-- config for the admin interface -->
|
||||
<admin>
|
||||
<defaultQuery>*:*</defaultQuery>
|
||||
|
||||
<!-- configure a healthcheck file for servers behind a loadbalancer
|
||||
<healthcheck type="file">server-enabled</healthcheck>
|
||||
-->
|
||||
</admin>
|
||||
|
||||
</config>
|
||||
|
|
@ -0,0 +1,2 @@
|
|||
#Thu Jul 31 00:57:26 IST 2008
|
||||
last_index_time=2008-07-31 00\:57\:26
|
|
@ -0,0 +1,20 @@
|
|||
# Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
# contributor license agreements. See the NOTICE file distributed with
|
||||
# this work for additional information regarding copyright ownership.
|
||||
# The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
# (the "License"); you may not use this file except in compliance with
|
||||
# the License. You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
|
||||
#use a protected word file to avoid stemming two
|
||||
#unrelated words to the same base word.
|
||||
#to test, we will use words that would normally obviously be stemmed.
|
||||
cats
|
||||
ridding
|
|
@ -0,0 +1,8 @@
|
|||
<dataConfig>
|
||||
<document>
|
||||
<entity name="x" query="select * from x">
|
||||
<field column="id" />
|
||||
<field column="desc" />
|
||||
</entity>
|
||||
</document>
|
||||
</dataConfig>
|
|
@ -0,0 +1,16 @@
|
|||
# Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
# contributor license agreements. See the NOTICE file distributed with
|
||||
# this work for additional information regarding copyright ownership.
|
||||
# The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
# (the "License"); you may not use this file except in compliance with
|
||||
# the License. You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
stopworda
|
||||
stopwordb
|
|
@ -0,0 +1,22 @@
|
|||
# Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
# contributor license agreements. See the NOTICE file distributed with
|
||||
# this work for additional information regarding copyright ownership.
|
||||
# The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
# (the "License"); you may not use this file except in compliance with
|
||||
# the License. You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
a => aa
|
||||
b => b1 b2
|
||||
c => c1,c2
|
||||
a\=>a => b\=>b
|
||||
a\,a => b\,b
|
||||
foo,bar,baz
|
||||
|
||||
Television,TV,Televisions
|
Loading…
Reference in New Issue