mirror of https://github.com/apache/lucene.git
SOLR-833 -- A DataSource to read data from a field as a reader. This can be used, for example, to read XMLs residing as CLOBs or BLOBs in databases.
git-svn-id: https://svn.apache.org/repos/asf/lucene/solr/trunk@713343 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
parent
d501b33bc8
commit
dff17dc8a7
|
@ -28,6 +28,10 @@ New Features
|
|||
3. SOLR-842: Better error handling in DataImportHandler with options to abort, skip and continue imports.
|
||||
(Noble Paul, shalin)
|
||||
|
||||
4. SOLR-833: A DataSource to read data from a field as a reader. This can be used, for example, to read XMLs
|
||||
residing as CLOBs or BLOBs in databases.
|
||||
(Noble Paul via shalin)
|
||||
|
||||
Optimizations
|
||||
----------------------
|
||||
|
||||
|
|
|
@ -64,7 +64,7 @@ public class DataImporter {
|
|||
|
||||
private Properties store = new Properties();
|
||||
|
||||
private Map<String, Properties> dataSourceProps;
|
||||
private Map<String, Properties> dataSourceProps = new HashMap<String, Properties>();
|
||||
|
||||
private IndexSchema schema;
|
||||
|
||||
|
@ -159,6 +159,15 @@ public class DataImporter {
|
|||
|
||||
}
|
||||
|
||||
/**Used by tests
|
||||
*/
|
||||
void loadAndInit(String configStr){
|
||||
loadDataConfig(configStr);
|
||||
Map<String, DataConfig.Field> fields = new HashMap<String, DataConfig.Field>();
|
||||
DataConfig.Entity e = getConfig().documents.get(0).entities.get(0);
|
||||
initEntity(e, fields, false);
|
||||
}
|
||||
|
||||
void loadDataConfig(String configFile) {
|
||||
|
||||
try {
|
||||
|
@ -193,27 +202,29 @@ public class DataImporter {
|
|||
if (e.fields != null) {
|
||||
for (DataConfig.Field f : e.fields) {
|
||||
f.nameOrColName = f.getName();
|
||||
SchemaField schemaField = schema.getFieldOrNull(f.getName());
|
||||
if (schemaField != null) {
|
||||
f.multiValued = schemaField.multiValued();
|
||||
f.allAttributes.put(MULTI_VALUED, Boolean.toString(schemaField
|
||||
.multiValued()));
|
||||
f.allAttributes.put(TYPE, schemaField.getType().getTypeName());
|
||||
f.allAttributes.put("indexed", Boolean
|
||||
.toString(schemaField.indexed()));
|
||||
f.allAttributes.put("stored", Boolean.toString(schemaField.stored()));
|
||||
f.allAttributes.put("defaultValue", schemaField.getDefaultValue());
|
||||
} else {
|
||||
if (schema != null) {
|
||||
SchemaField schemaField = schema.getFieldOrNull(f.getName());
|
||||
if (schemaField != null) {
|
||||
f.multiValued = schemaField.multiValued();
|
||||
f.allAttributes.put(MULTI_VALUED, Boolean.toString(schemaField
|
||||
.multiValued()));
|
||||
f.allAttributes.put(TYPE, schemaField.getType().getTypeName());
|
||||
f.allAttributes.put("indexed", Boolean
|
||||
.toString(schemaField.indexed()));
|
||||
f.allAttributes.put("stored", Boolean.toString(schemaField.stored()));
|
||||
f.allAttributes.put("defaultValue", schemaField.getDefaultValue());
|
||||
} else {
|
||||
|
||||
try {
|
||||
f.allAttributes.put(TYPE, schema.getDynamicFieldType(f.getName())
|
||||
.getTypeName());
|
||||
f.allAttributes.put(MULTI_VALUED, "true");
|
||||
f.multiValued = true;
|
||||
} catch (RuntimeException e2) {
|
||||
LOG.info("Field in data-config.xml - " + f.getName()
|
||||
+ " not found in schema.xml");
|
||||
f.toWrite = false;
|
||||
try {
|
||||
f.allAttributes.put(TYPE, schema.getDynamicFieldType(f.getName())
|
||||
.getTypeName());
|
||||
f.allAttributes.put(MULTI_VALUED, "true");
|
||||
f.multiValued = true;
|
||||
} catch (RuntimeException e2) {
|
||||
LOG.info("Field in data-config.xml - " + f.getName()
|
||||
+ " not found in schema.xml");
|
||||
f.toWrite = false;
|
||||
}
|
||||
}
|
||||
}
|
||||
fields.put(f.getName(), f);
|
||||
|
|
|
@ -370,7 +370,7 @@ public class DocBuilder {
|
|||
continue;
|
||||
}
|
||||
DataConfig.Field field = entity.colNameVsField.get(key);
|
||||
if (field == null) {
|
||||
if (field == null && dataImporter.getSchema() != null) {
|
||||
// This can be a dynamic field or a field which does not have an entry in data-config ( an implicit field)
|
||||
SchemaField sf = dataImporter.getSchema().getFieldOrNull(key);
|
||||
if (sf == null) {
|
||||
|
@ -381,7 +381,7 @@ public class DocBuilder {
|
|||
}
|
||||
//else do nothing. if we add it it may fail
|
||||
} else {
|
||||
if (field.toWrite) {
|
||||
if (field != null && field.toWrite) {
|
||||
addFieldToDoc(entry.getValue(), key, field.boost, field.multiValued, doc);
|
||||
}
|
||||
}
|
||||
|
|
|
@ -0,0 +1,122 @@
|
|||
/**
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
package org.apache.solr.handler.dataimport;
|
||||
|
||||
import org.slf4j.Logger;
|
||||
import org.slf4j.LoggerFactory;
|
||||
|
||||
import java.io.*;
|
||||
import java.lang.reflect.InvocationTargetException;
|
||||
import java.lang.reflect.Method;
|
||||
import java.lang.reflect.Modifier;
|
||||
import java.sql.Blob;
|
||||
import java.sql.Clob;
|
||||
import java.util.Properties;
|
||||
|
||||
/**
|
||||
* This can be useful for users who has a DB field containing xml and wish to use a nested XPathEntityProcessor
|
||||
* <p/>
|
||||
* The datasouce may be configured as follows
|
||||
* <p/>
|
||||
* <datasource name="f1" type="FieldReaderDataSource" />
|
||||
* <p/>
|
||||
* The enity which uses this datasource must keep the url value as the varaible name url="field-name"
|
||||
* <p/>
|
||||
* The fieldname must be resolvable from VariableResolver
|
||||
* <p/>
|
||||
* This may be used with any EntityProcessor which uses a DataSource<Reader> eg:XPathEntityProcessor
|
||||
* <p/>
|
||||
* Supports String, BLOB, CLOB data types and there is an extra field (in the entity) 'encoding' for BLOB types
|
||||
*
|
||||
* @version $Id$
|
||||
* @since 1.4
|
||||
*/
|
||||
public class FieldReaderDataSource extends DataSource<Reader> {
|
||||
private static final Logger LOG = LoggerFactory.getLogger(FieldReaderDataSource.class);
|
||||
protected VariableResolver vr;
|
||||
protected String dataField;
|
||||
private String encoding;
|
||||
|
||||
public void init(Context context, Properties initProps) {
|
||||
vr = context.getVariableResolver();
|
||||
dataField = context.getEntityAttribute("dataField");
|
||||
encoding = context.getEntityAttribute("encoding");
|
||||
/*no op*/
|
||||
}
|
||||
|
||||
public Reader getData(String query) {
|
||||
Object o = vr.resolve(dataField);
|
||||
if (o == null) return null;
|
||||
if (o instanceof String) {
|
||||
return new StringReader((String) o);
|
||||
} else if (o instanceof Clob) {
|
||||
Clob clob = (Clob) o;
|
||||
try {
|
||||
//Most of the JDBC drivers have getCharacterStream defined as public
|
||||
// so let us just check it
|
||||
Method m = clob.getClass().getDeclaredMethod("getCharacterStream");
|
||||
if (Modifier.isPublic(m.getModifiers())) {
|
||||
return (Reader) m.invoke(clob);
|
||||
} else {
|
||||
// force invoke
|
||||
m.setAccessible(true);
|
||||
return (Reader) m.invoke(clob);
|
||||
}
|
||||
} catch (Exception e) {
|
||||
LOG.info("Unable to get data from CLOB");
|
||||
return null;
|
||||
|
||||
}
|
||||
|
||||
} else if (o instanceof Blob) {
|
||||
Blob blob = (Blob) o;
|
||||
try {
|
||||
//Most of the JDBC drivers have getBinaryStream defined as public
|
||||
// so let us just check it
|
||||
Method m = blob.getClass().getDeclaredMethod("getBinaryStream");
|
||||
if (Modifier.isPublic(m.getModifiers())) {
|
||||
return getReader(m, blob);
|
||||
} else {
|
||||
// force invoke
|
||||
m.setAccessible(true);
|
||||
return getReader(m, blob);
|
||||
}
|
||||
} catch (Exception e) {
|
||||
LOG.info("Unable to get data from BLOB");
|
||||
return null;
|
||||
|
||||
}
|
||||
} else {
|
||||
return new StringReader(o.toString());
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
private Reader getReader(Method m, Blob blob)
|
||||
throws IllegalAccessException, InvocationTargetException, UnsupportedEncodingException {
|
||||
InputStream is = (InputStream) m.invoke(blob);
|
||||
if (encoding == null) {
|
||||
return (new InputStreamReader(is));
|
||||
} else {
|
||||
return (new InputStreamReader(is, encoding));
|
||||
}
|
||||
}
|
||||
|
||||
public void close() {
|
||||
|
||||
}
|
||||
}
|
|
@ -67,6 +67,8 @@ public class TemplateString {
|
|||
* @return the string with all variables replaced
|
||||
*/
|
||||
public String replaceTokens(String string, VariableResolver resolver) {
|
||||
if (string == null)
|
||||
return null;
|
||||
TemplateString ts = cache.get(string);
|
||||
if (ts == null) {
|
||||
ts = new TemplateString(string);
|
||||
|
|
|
@ -37,8 +37,8 @@ import java.util.concurrent.atomic.AtomicBoolean;
|
|||
import java.util.concurrent.atomic.AtomicReference;
|
||||
|
||||
/**
|
||||
* <p/> An implementation of EntityProcessor which uses a streaming xpath parser to extract values out of XML documents.
|
||||
* It is typically used in conjunction with HttpDataSource or FileDataSource. </p> <p/> <p/> Refer to <a
|
||||
* <p> An implementation of EntityProcessor which uses a streaming xpath parser to extract values out of XML documents.
|
||||
* It is typically used in conjunction with HttpDataSource or FileDataSource. </p> <p/> <p> Refer to <a
|
||||
* href="http://wiki.apache.org/solr/DataImportHandler">http://wiki.apache.org/solr/DataImportHandler</a> for more
|
||||
* details. </p>
|
||||
* <p/>
|
||||
|
@ -131,9 +131,8 @@ public class XPathEntityProcessor extends EntityProcessorBase {
|
|||
"Exception while reading xpaths for fields", e);
|
||||
}
|
||||
}
|
||||
|
||||
List<String> l = TemplateString.getVariables(context
|
||||
.getEntityAttribute(URL));
|
||||
String url = context.getEntityAttribute(URL);
|
||||
List<String> l = url == null ? Collections.EMPTY_LIST : TemplateString.getVariables(url);
|
||||
for (String s : l) {
|
||||
if (s.startsWith(entityName + ".")) {
|
||||
if (placeHolderVariables == null)
|
||||
|
@ -166,7 +165,6 @@ public class XPathEntityProcessor extends EntityProcessorBase {
|
|||
if (pk == null || result.get(pk) != null)
|
||||
return result;
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
@SuppressWarnings("unchecked")
|
||||
|
|
|
@ -0,0 +1,66 @@
|
|||
/**
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
package org.apache.solr.handler.dataimport;
|
||||
|
||||
import junit.framework.Assert;
|
||||
import static org.apache.solr.handler.dataimport.AbstractDataImportHandlerTest.createMap;
|
||||
import org.junit.Test;
|
||||
|
||||
import java.util.ArrayList;
|
||||
import java.util.HashMap;
|
||||
import java.util.List;
|
||||
import java.util.Map;
|
||||
|
||||
/**
|
||||
* Test for FieldReaderDataSource
|
||||
*
|
||||
* @version $Id$
|
||||
* @see org.apache.solr.handler.dataimport.FieldReaderDataSource
|
||||
* @since 1.4
|
||||
*/
|
||||
public class TestFieldReader {
|
||||
|
||||
@Test
|
||||
public void simple() {
|
||||
DataImporter di = new DataImporter();
|
||||
di.loadAndInit(config);
|
||||
TestDocBuilder.SolrWriterImpl sw = new TestDocBuilder.SolrWriterImpl();
|
||||
DataImporter.RequestParams rp = new DataImporter.RequestParams(createMap("command", "full-import"));
|
||||
List<Map<String, Object>> l = new ArrayList<Map<String, Object>>();
|
||||
l.add(createMap("xml", xml));
|
||||
MockDataSource.setIterator("select * from a", l.iterator());
|
||||
di.runCmd(rp, sw, new HashMap<String, String>());
|
||||
Assert.assertEquals(sw.docs.get(0).getFieldValue("y"), "Hello");
|
||||
MockDataSource.clearCache();
|
||||
}
|
||||
|
||||
String config = "<dataConfig>\n" +
|
||||
" <dataSource type=\"FieldReaderDataSource\" name=\"f\"/>\n" +
|
||||
" <dataSource type=\"MockDataSource\"/>\n" +
|
||||
" <document>\n" +
|
||||
" <entity name=\"a\" query=\"select * from a\" >\n" +
|
||||
" <entity name=\"b\" dataSource=\"f\" processor=\"XPathEntityProcessor\" forEach=\"/x\" dataField=\"a.xml\">\n" +
|
||||
" <field column=\"y\" xpath=\"/x/y\"/>\n" +
|
||||
" </entity>\n" +
|
||||
" </entity>\n" +
|
||||
" </document>\n" +
|
||||
"</dataConfig>";
|
||||
|
||||
String xml = "<x>\n" +
|
||||
" <y>Hello</y>\n" +
|
||||
"</x>";
|
||||
}
|
Loading…
Reference in New Issue