mirror of https://github.com/apache/lucene.git
SOLR-980 -- A PlainTextEntityProcessor which can read from any DataSource<Reader> and output a String
git-svn-id: https://svn.apache.org/repos/asf/lucene/solr/trunk@738401 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
parent
61f9aab471
commit
8220d4383e
|
@ -56,6 +56,9 @@ New Features
|
|||
12.SOLR-988: Add a new scope for session data stored in Context to store objects across imports.
|
||||
(Noble Paul via shalin)
|
||||
|
||||
13.SOLR-980: A PlainTextEntityProcessor which can read from any DataSource<Reader> and output a String.
|
||||
(Nathan Adams, Noble Paul via shalin)
|
||||
|
||||
Optimizations
|
||||
----------------------
|
||||
1. SOLR-846: Reduce memory consumption during delta import by removing keys when used
|
||||
|
|
|
@ -0,0 +1,84 @@
|
|||
/**
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
package org.apache.solr.handler.dataimport;
|
||||
|
||||
import static org.apache.solr.handler.dataimport.DataImportHandlerException.SEVERE;
|
||||
import static org.apache.solr.handler.dataimport.DataImportHandlerException.wrapAndThrow;
|
||||
import static org.apache.solr.handler.dataimport.XPathEntityProcessor.URL;
|
||||
import org.slf4j.Logger;
|
||||
import org.slf4j.LoggerFactory;
|
||||
|
||||
import java.io.IOException;
|
||||
import java.io.Reader;
|
||||
import java.io.StringWriter;
|
||||
import java.util.HashMap;
|
||||
import java.util.Map;
|
||||
|
||||
/**
|
||||
* <p>An implementation of EntityProcessor which reads data from a url/file and give out a row which contains one String
|
||||
* value. The name of the field is 'plainText'.
|
||||
*
|
||||
* @version $Id$
|
||||
* @since solr 1.4
|
||||
*/
|
||||
public class PlainTextEntityProcessor extends EntityProcessorBase {
|
||||
private static final Logger LOG = LoggerFactory.getLogger(PlainTextEntityProcessor.class);
|
||||
private boolean ended = false;
|
||||
|
||||
public void init(Context context) {
|
||||
super.init(context);
|
||||
ended = false;
|
||||
}
|
||||
|
||||
public Map<String, Object> nextRow() {
|
||||
if (ended) return null;
|
||||
DataSource<Reader> ds = context.getDataSource();
|
||||
String url = context.getVariableResolver().replaceTokens(context.getEntityAttribute(URL));
|
||||
Reader r = null;
|
||||
try {
|
||||
r = ds.getData(url);
|
||||
} catch (Exception e) {
|
||||
if (ABORT.equals(onError)) {
|
||||
wrapAndThrow(SEVERE, e, "Exception reading url : " + url);
|
||||
}
|
||||
return null;
|
||||
}
|
||||
StringWriter sw = new StringWriter();
|
||||
char[] buf = new char[1024];
|
||||
while (true) {
|
||||
int len = 0;
|
||||
try {
|
||||
len = r.read(buf);
|
||||
} catch (IOException e) {
|
||||
if (ABORT.equals(onError)) {
|
||||
wrapAndThrow(SEVERE, e, "Exception reading url : " + url);
|
||||
} else {
|
||||
LOG.warn("IOException while reading from data source", e);
|
||||
return null;
|
||||
}
|
||||
}
|
||||
if (len <= 0) break;
|
||||
sw.append(new String(buf, 0, len));
|
||||
}
|
||||
Map<String, Object> row = new HashMap<String, Object>();
|
||||
row.put(PLAIN_TEXT, sw.toString());
|
||||
ended = true;
|
||||
return super.applyTransformer(row);
|
||||
}
|
||||
|
||||
public static final String PLAIN_TEXT = "plainText";
|
||||
}
|
|
@ -0,0 +1,70 @@
|
|||
/**
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
package org.apache.solr.handler.dataimport;
|
||||
|
||||
import junit.framework.Assert;
|
||||
import static org.apache.solr.handler.dataimport.AbstractDataImportHandlerTest.createMap;
|
||||
import org.junit.Test;
|
||||
|
||||
import java.io.StringReader;
|
||||
import java.util.Properties;
|
||||
|
||||
/**
|
||||
* Test for PlainTextEntityProcessor
|
||||
*
|
||||
* @version $Id$
|
||||
* @see org.apache.solr.handler.dataimport.PlainTextEntityProcessor
|
||||
* @since solr 1.4
|
||||
*/
|
||||
public class TestPlainTextEntityProcessor {
|
||||
@Test
|
||||
public void simple() {
|
||||
DataImporter di = new DataImporter();
|
||||
di.loadAndInit(DATA_CONFIG);
|
||||
TestDocBuilder.SolrWriterImpl sw = new TestDocBuilder.SolrWriterImpl();
|
||||
DataImporter.RequestParams rp = new DataImporter.RequestParams(createMap("command", "full-import"));
|
||||
di.runCmd(rp, sw);
|
||||
Assert.assertEquals(DS.s, sw.docs.get(0).getFieldValue("x"));
|
||||
|
||||
}
|
||||
|
||||
public static class DS extends DataSource {
|
||||
static String s = "hello world";
|
||||
|
||||
public void init(Context context, Properties initProps) {
|
||||
|
||||
}
|
||||
|
||||
public Object getData(String query) {
|
||||
|
||||
return new StringReader(s);
|
||||
}
|
||||
|
||||
public void close() {
|
||||
|
||||
}
|
||||
}
|
||||
|
||||
static String DATA_CONFIG = "<dataConfig>\n" +
|
||||
"\t<dataSource type=\"TestPlainTextEntityProcessor$DS\" />\n" +
|
||||
"\t<document>\n" +
|
||||
"\t\t<entity processor=\"PlainTextEntityProcessor\" name=\"x\" query=\"x\">\n" +
|
||||
"\t\t\t<field column=\"plainText\" name=\"x\" />\n" +
|
||||
"\t\t</entity>\n" +
|
||||
"\t</document>\n" +
|
||||
"</dataConfig>";
|
||||
}
|
Loading…
Reference in New Issue