mirror of https://github.com/apache/lucene.git
CSV updates: SOLR-66
git-svn-id: https://svn.apache.org/repos/asf/lucene/solr/trunk@524175 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
parent
18dc9d98e4
commit
731416af36
|
@ -135,6 +135,8 @@ New Features
|
|||
19. SOLR-197: New parameters for input: stream.contentType for specifying
|
||||
or overriding the content type of input, and stream.file for reading
|
||||
local files. (Ryan McKinley via yonik)
|
||||
|
||||
20. SOLR-66: CSV data format for document additions and updates. (yonik)
|
||||
|
||||
Changes in runtime behavior
|
||||
1. Highlighting using DisMax will only pick up terms from the main
|
||||
|
|
|
@ -0,0 +1,11 @@
|
|||
id,cat,name,price,inStock,author_t,series_t,sequence_i,genre_s
|
||||
0553573403,book,A Game of Thrones,7.99,true,George R.R. Martin,"A Song of Ice and Fire",1,fantasy
|
||||
0553579908,book,A Clash of Kings,7.99,true,George R.R. Martin,"A Song of Ice and Fire",2,fantasy
|
||||
055357342X,book,A Storm of Swords,7.99,true,George R.R. Martin,"A Song of Ice and Fire",3,fantasy
|
||||
0553293354,book,Foundation,7.99,true,Isaac Asimov,Foundation Novels,1,scifi
|
||||
0812521390,book,The Black Company,6.99,false,Glen Cook,The Chronicles of The Black Company,1,fantasy
|
||||
0812550706,book,Ender's Game,6.99,true,Orson Scott Card,Ender,1,scifi
|
||||
0441385532,book,Jhereg,7.95,false,Steven Brust,Vlad Taltos,1,fantasy
|
||||
0380014300,book,Nine Princes In Amber,6.99,true,Roger Zelazny,the Chronicles of Amber,1,fantasy
|
||||
0805080481,book,The Book of Three,5.99,true,Lloyd Alexander,The Chronicles of Prydain,1,fantasy
|
||||
080508049X,book,The Black Cauldron,5.99,true,Lloyd Alexander,The Chronicles of Prydain,2,fantasy
|
|
|
@ -384,6 +384,11 @@
|
|||
<!-- NOTE, /update is mapped to a servlet, we can have the filter handle requests off that! -->
|
||||
<requestHandler name="/update/commit" class="solr.CommitRequestHandler" />
|
||||
|
||||
|
||||
<!-- CSV update handler, loaded on demand -->
|
||||
<requestHandler name="/update/csv" class="solr.CSVRequestHandler" startup="lazy">
|
||||
</requestHandler>
|
||||
|
||||
|
||||
<!-- queryResponseWriter plugins... query responses will be written using the
|
||||
writer specified by the 'wt' request parameter matching the name of a registered
|
||||
|
|
|
@ -0,0 +1,2 @@
|
|||
AnyObjectId[8e096258a36f86e9b956e52f55df0b5afbe8999f] was removed in git history.
|
||||
Apache SVN contains full history.
|
|
@ -0,0 +1,386 @@
|
|||
/**
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
package org.apache.solr.handler;
|
||||
|
||||
import org.apache.solr.request.SolrQueryRequest;
|
||||
import org.apache.solr.request.SolrParams;
|
||||
import org.apache.solr.request.SolrQueryResponse;
|
||||
import org.apache.solr.util.ContentStream;
|
||||
import org.apache.solr.core.SolrException;
|
||||
import org.apache.solr.schema.IndexSchema;
|
||||
import org.apache.solr.schema.SchemaField;
|
||||
import org.apache.solr.util.StrUtils;
|
||||
import org.apache.solr.update.*;
|
||||
import org.apache.commons.csv.CSVStrategy;
|
||||
import org.apache.commons.csv.CSVParser;
|
||||
import org.apache.commons.io.IOUtils;
|
||||
|
||||
import java.util.regex.Pattern;
|
||||
import java.util.List;
|
||||
import java.io.*;
|
||||
|
||||
/**
|
||||
* @author yonik
|
||||
* @version $Id$
|
||||
*/
|
||||
|
||||
public class CSVRequestHandler extends RequestHandlerBase {
|
||||
|
||||
public void handleRequestBody(SolrQueryRequest req, SolrQueryResponse rsp) throws Exception {
|
||||
CSVLoader loader = new SingleThreadedCSVLoader(req);
|
||||
|
||||
Iterable<ContentStream> streams = req.getContentStreams();
|
||||
if (streams == null) {
|
||||
throw new SolrException(400, "missing content stream");
|
||||
}
|
||||
|
||||
for(ContentStream stream : streams) {
|
||||
Reader reader = stream.getReader();
|
||||
try {
|
||||
loader.errHeader = "CSVLoader: input=" + stream.getSourceInfo();
|
||||
loader.load(reader);
|
||||
} finally {
|
||||
IOUtils.closeQuietly(reader);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
//////////////////////// SolrInfoMBeans methods //////////////////////
|
||||
@Override
|
||||
public String getDescription() {
|
||||
return "Add/Update multiple documents with CSV formatted rows";
|
||||
}
|
||||
|
||||
@Override
|
||||
public String getVersion() {
|
||||
return "$Revision:$";
|
||||
}
|
||||
|
||||
@Override
|
||||
public String getSourceId() {
|
||||
return "$Id:$";
|
||||
}
|
||||
|
||||
@Override
|
||||
public String getSource() {
|
||||
return "$URL:$";
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
abstract class CSVLoader {
|
||||
static String SEPARATOR="separator";
|
||||
static String FIELDNAMES="fieldnames";
|
||||
static String HEADER="header";
|
||||
static String SKIP="skip";
|
||||
static String MAP="map";
|
||||
static String TRIM="trim";
|
||||
static String EMPTY="keepEmpty";
|
||||
static String SPLIT="split";
|
||||
static String ENCAPSULATOR="encapsulator";
|
||||
static String COMMIT="commit";
|
||||
static String OVERWRITE="overwrite";
|
||||
|
||||
private static Pattern colonSplit = Pattern.compile(":");
|
||||
private static Pattern commaSplit = Pattern.compile(",");
|
||||
|
||||
final IndexSchema schema;
|
||||
final SolrParams params;
|
||||
final UpdateHandler handler;
|
||||
final CSVStrategy strategy;
|
||||
|
||||
String[] fieldnames;
|
||||
SchemaField[] fields;
|
||||
CSVLoader.FieldAdder[] adders;
|
||||
|
||||
int skipLines; // number of lines to skip at start of file
|
||||
|
||||
final AddUpdateCommand templateAdd;
|
||||
|
||||
|
||||
/** Add a field to a document unless it's zero length.
|
||||
* The FieldAdder hierarchy handles all the complexity of
|
||||
* further transforming or splitting field values to keep the
|
||||
* main logic loop clean. All implementations of add() must be
|
||||
* MT-safe!
|
||||
*/
|
||||
private class FieldAdder {
|
||||
void add(DocumentBuilder builder, int line, int column, String val) {
|
||||
if (val.length() > 0) {
|
||||
builder.addField(fields[column].getName(),val,1.0f);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/** add zero length fields */
|
||||
private class FieldAdderEmpty extends CSVLoader.FieldAdder {
|
||||
void add(DocumentBuilder builder, int line, int column, String val) {
|
||||
builder.addField(fields[column].getName(),val,1.0f);
|
||||
}
|
||||
}
|
||||
|
||||
/** trim fields */
|
||||
private class FieldTrimmer extends CSVLoader.FieldAdder {
|
||||
private final CSVLoader.FieldAdder base;
|
||||
FieldTrimmer(CSVLoader.FieldAdder base) { this.base=base; }
|
||||
void add(DocumentBuilder builder, int line, int column, String val) {
|
||||
base.add(builder, line, column, val.trim());
|
||||
}
|
||||
}
|
||||
|
||||
/** map a single value.
|
||||
* for just a couple of mappings, this is probably faster than
|
||||
* using a HashMap.
|
||||
*/
|
||||
private class FieldMapperSingle extends CSVLoader.FieldAdder {
|
||||
private final String from;
|
||||
private final String to;
|
||||
private final CSVLoader.FieldAdder base;
|
||||
FieldMapperSingle(String from, String to, CSVLoader.FieldAdder base) {
|
||||
this.from=from;
|
||||
this.to=to;
|
||||
this.base=base;
|
||||
}
|
||||
void add(DocumentBuilder builder, int line, int column, String val) {
|
||||
if (from.equals(val)) val=to;
|
||||
base.add(builder,line,column,val);
|
||||
}
|
||||
}
|
||||
|
||||
/** Split a single value into multiple values based on
|
||||
* a CSVStrategy.
|
||||
*/
|
||||
private class FieldSplitter extends CSVLoader.FieldAdder {
|
||||
private final CSVStrategy strategy;
|
||||
private final CSVLoader.FieldAdder base;
|
||||
FieldSplitter(CSVStrategy strategy, CSVLoader.FieldAdder base) {
|
||||
this.strategy = strategy;
|
||||
this.base = base;
|
||||
}
|
||||
|
||||
void add(DocumentBuilder builder, int line, int column, String val) {
|
||||
CSVParser parser = new CSVParser(new StringReader(val), strategy);
|
||||
try {
|
||||
String[] vals = parser.getLine();
|
||||
if (vals!=null) {
|
||||
for (String v: vals) base.add(builder,line,column,v);
|
||||
} else {
|
||||
base.add(builder,line,column,val);
|
||||
}
|
||||
} catch (IOException e) {
|
||||
throw new SolrException(400,"");
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
String errHeader="CSVLoader:";
|
||||
|
||||
CSVLoader(SolrQueryRequest req) {
|
||||
this.params = req.getParams();
|
||||
handler = req.getCore().getUpdateHandler();
|
||||
schema = req.getSchema();
|
||||
|
||||
templateAdd = new AddUpdateCommand();
|
||||
templateAdd.allowDups=false;
|
||||
templateAdd.overwriteCommitted=true;
|
||||
templateAdd.overwritePending=true;
|
||||
|
||||
if (params.getBool(OVERWRITE,true)) {
|
||||
templateAdd.allowDups=false;
|
||||
templateAdd.overwriteCommitted=true;
|
||||
templateAdd.overwritePending=true;
|
||||
} else {
|
||||
templateAdd.allowDups=true;
|
||||
templateAdd.overwriteCommitted=false;
|
||||
templateAdd.overwritePending=false;
|
||||
}
|
||||
|
||||
strategy = new CSVStrategy(',', '"', CSVStrategy.COMMENTS_DISABLED, true, false, true);
|
||||
String sep = params.get(SEPARATOR);
|
||||
if (sep!=null) {
|
||||
if (sep.length()!=1) throw new SolrException(400,"Invalid separator:'"+sep+"'");
|
||||
strategy.setDelimiter(sep.charAt(0));
|
||||
}
|
||||
|
||||
String encapsulator = params.get(ENCAPSULATOR);
|
||||
if (encapsulator!=null) {
|
||||
if (encapsulator.length()!=1) throw new SolrException(400,"Invalid encapsulator:'"+sep+"'");
|
||||
strategy.setEncapsulator(encapsulator.charAt(0));
|
||||
}
|
||||
|
||||
String fn = params.get(FIELDNAMES);
|
||||
fieldnames = fn != null ? commaSplit.split(fn,-1) : null;
|
||||
|
||||
Boolean hasHeader = params.getBool(HEADER);
|
||||
|
||||
if (fieldnames==null) {
|
||||
if (null == hasHeader) {
|
||||
// assume the file has the headers if they aren't supplied in the args
|
||||
hasHeader=true;
|
||||
} else if (hasHeader) {
|
||||
throw new SolrException(400,"CSVLoader: must specify fieldnames=<fields>* or header=true");
|
||||
}
|
||||
} else {
|
||||
// if the fieldnames were supplied and the file has a header, we need to
|
||||
// skip over that header.
|
||||
if (hasHeader!=null && hasHeader) skipLines=1;
|
||||
|
||||
prepareFields();
|
||||
}
|
||||
}
|
||||
|
||||
/** create the FieldAdders that control how each field is indexed */
|
||||
void prepareFields() {
|
||||
// Possible future optimization: for really rapid incremental indexing
|
||||
// from a POST, one could cache all of this setup info based on the params.
|
||||
// The link from FieldAdder to this would need to be severed for that to happen.
|
||||
|
||||
fields = new SchemaField[fieldnames.length];
|
||||
adders = new CSVLoader.FieldAdder[fieldnames.length];
|
||||
String skipStr = params.get(SKIP);
|
||||
List<String> skipFields = skipStr==null ? null : StrUtils.splitSmart(skipStr,',');
|
||||
|
||||
CSVLoader.FieldAdder adder = new CSVLoader.FieldAdder();
|
||||
CSVLoader.FieldAdder adderKeepEmpty = new CSVLoader.FieldAdderEmpty();
|
||||
|
||||
for (int i=0; i<fields.length; i++) {
|
||||
String fname = fieldnames[i];
|
||||
// to skip a field, leave the entries in fields and addrs null
|
||||
if (fname.length()==0 || (skipFields!=null && skipFields.contains(fname))) continue;
|
||||
|
||||
fields[i] = schema.getField(fname);
|
||||
boolean keepEmpty = params.getFieldBool(fname,EMPTY,false);
|
||||
adders[i] = keepEmpty ? adderKeepEmpty : adder;
|
||||
|
||||
// Order that operations are applied: split -> trim -> map -> add
|
||||
// so create in reverse order.
|
||||
// Creation of FieldAdders could be optimized and shared among fields
|
||||
|
||||
String[] fmap = params.getFieldParams(fname,MAP);
|
||||
if (fmap!=null) {
|
||||
for (String mapRule : fmap) {
|
||||
String[] mapArgs = colonSplit.split(mapRule,-1);
|
||||
if (mapArgs.length!=2)
|
||||
throw new SolrException(400, "Map rules must be of the form 'from:to' ,got '"+mapRule+"'");
|
||||
adders[i] = new CSVLoader.FieldMapperSingle(mapArgs[0], mapArgs[1], adders[i]);
|
||||
}
|
||||
}
|
||||
|
||||
if (params.getFieldBool(fname,TRIM,false)) {
|
||||
adders[i] = new CSVLoader.FieldTrimmer(adders[i]);
|
||||
}
|
||||
|
||||
if (params.getFieldBool(fname,SPLIT,false)) {
|
||||
String sepStr = params.getFieldParam(fname,SEPARATOR);
|
||||
char fsep = sepStr==null || sepStr.length()==0 ? ',' : sepStr.charAt(0);
|
||||
String encStr = params.getFieldParam(fname,ENCAPSULATOR);
|
||||
char fenc = encStr==null || encStr.length()==0 ? '\'' : encStr.charAt(0);
|
||||
|
||||
CSVStrategy fstrat = new CSVStrategy(fsep,fenc,CSVStrategy.COMMENTS_DISABLED);
|
||||
adders[i] = new CSVLoader.FieldSplitter(fstrat, adders[i]);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
private void input_err(String msg, String[] line, int lineno) {
|
||||
StringBuilder sb = new StringBuilder();
|
||||
sb.append(errHeader+", line="+lineno + ","+msg+"\n\tvalues={");
|
||||
for (String val: line) { sb.append("'"+val+"',"); }
|
||||
sb.append('}');
|
||||
throw new SolrException(400,sb.toString());
|
||||
}
|
||||
|
||||
/** load the CSV input */
|
||||
void load(Reader input) throws IOException {
|
||||
Reader reader = input;
|
||||
if (skipLines>0) {
|
||||
if (!(reader instanceof BufferedReader)) {
|
||||
reader = new BufferedReader(reader);
|
||||
}
|
||||
BufferedReader r = (BufferedReader)reader;
|
||||
for (int i=0; i<skipLines; i++) {
|
||||
r.readLine();
|
||||
}
|
||||
}
|
||||
|
||||
CSVParser parser = new CSVParser(reader, strategy);
|
||||
|
||||
// parse the fieldnames from the header of the file
|
||||
if (fieldnames==null) {
|
||||
fieldnames = parser.getLine();
|
||||
if (fieldnames==null) {
|
||||
throw new SolrException(400,"Expected fieldnames in CSV input");
|
||||
}
|
||||
prepareFields();
|
||||
}
|
||||
|
||||
// read the rest of the CSV file
|
||||
for(;;) {
|
||||
int line = parser.getLineNumber(); // for error reporting in MT mode
|
||||
String[] vals = parser.getLine();
|
||||
if (vals==null) break;
|
||||
|
||||
if (vals.length != fields.length) {
|
||||
input_err("expected "+fields.length+" values but got "+vals.length, vals, line);
|
||||
}
|
||||
|
||||
addDoc(line,vals);
|
||||
}
|
||||
|
||||
if (params.getBool(COMMIT,true)) {
|
||||
handler.commit(new CommitUpdateCommand(false));
|
||||
}
|
||||
}
|
||||
|
||||
/** called for each line of values (document) */
|
||||
abstract void addDoc(int line, String[] vals) throws IOException;
|
||||
|
||||
/** this must be MT safe... may be called concurrently from multiple threads. */
|
||||
void doAdd(int line, String[] vals, DocumentBuilder builder, AddUpdateCommand template) throws IOException {
|
||||
// the line number is passed simply for error reporting in MT mode.
|
||||
// first, create the lucene document
|
||||
builder.startDoc();
|
||||
for (int i=0; i<vals.length; i++) {
|
||||
if (fields[i]==null) continue; // ignore this field
|
||||
String val = vals[i];
|
||||
adders[i].add(builder, line, i, val);
|
||||
}
|
||||
builder.endDoc();
|
||||
|
||||
template.doc = builder.getDoc();
|
||||
handler.addDoc(template);
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
|
||||
class SingleThreadedCSVLoader extends CSVLoader {
|
||||
protected DocumentBuilder builder;
|
||||
|
||||
SingleThreadedCSVLoader(SolrQueryRequest req) {
|
||||
super(req);
|
||||
builder = new DocumentBuilder(schema);
|
||||
}
|
||||
|
||||
void addDoc(int line, String[] vals) throws IOException {
|
||||
templateAdd.indexedId = null;
|
||||
doAdd(line, vals, builder, templateAdd);
|
||||
}
|
||||
}
|
||||
|
|
@ -165,6 +165,14 @@ public abstract class SolrParams {
|
|||
return val!=null ? val : get(param);
|
||||
}
|
||||
|
||||
/** returns the String values of the field parameter, "f.field.param", or
|
||||
* the values for "param" if that is not set.
|
||||
*/
|
||||
public String[] getFieldParams(String field, String param) {
|
||||
String[] val = getParams(fpname(field,param));
|
||||
return val!=null ? val : getParams(param);
|
||||
}
|
||||
|
||||
/** Returns the Boolean value of the param, or null if not set */
|
||||
public Boolean getBool(String param) {
|
||||
String val = get(param);
|
||||
|
|
|
@ -197,19 +197,32 @@ public class TestHarness {
|
|||
* @see LocalSolrQueryRequest
|
||||
*/
|
||||
public String query(SolrQueryRequest req) throws IOException, Exception {
|
||||
return query(req.getQueryType(), req);
|
||||
}
|
||||
|
||||
/**
|
||||
* Processes a "query" using a user constructed SolrQueryRequest
|
||||
*
|
||||
* @param handler the name of the request handler to process the request
|
||||
* @param req the Query to process, will be closed.
|
||||
* @return The XML response to the query
|
||||
* @exception Exception any exception in the response.
|
||||
* @exception IOException if there is a problem writing the XML
|
||||
* @see LocalSolrQueryRequest
|
||||
*/
|
||||
public String query(String handler, SolrQueryRequest req) throws IOException, Exception {
|
||||
SolrQueryResponse rsp = new SolrQueryResponse();
|
||||
core.execute(req,rsp);
|
||||
core.execute(core.getRequestHandler(handler),req,rsp);
|
||||
if (rsp.getException() != null) {
|
||||
throw rsp.getException();
|
||||
}
|
||||
|
||||
|
||||
StringWriter sw = new StringWriter(32000);
|
||||
QueryResponseWriter responseWriter = core.getQueryResponseWriter(req);
|
||||
responseWriter.write(sw,req,rsp);
|
||||
|
||||
req.close();
|
||||
|
||||
|
||||
return sw.toString();
|
||||
}
|
||||
|
||||
|
|
|
@ -0,0 +1,239 @@
|
|||
/**
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
package org.apache.solr.handler;
|
||||
|
||||
import org.apache.solr.util.AbstractSolrTestCase;
|
||||
import org.apache.solr.util.ContentStream;
|
||||
import org.apache.solr.util.ContentStreamBase;
|
||||
import org.apache.solr.request.SolrQueryRequest;
|
||||
import org.apache.solr.request.LocalSolrQueryRequest;
|
||||
import org.apache.solr.core.SolrException;
|
||||
|
||||
import java.io.*;
|
||||
import java.util.List;
|
||||
import java.util.ArrayList;
|
||||
|
||||
public class TestCSVLoader extends AbstractSolrTestCase {
|
||||
|
||||
public String getSchemaFile() { return "schema.xml"; }
|
||||
public String getSolrConfigFile() { return "solrconfig.xml"; }
|
||||
|
||||
String filename = "solr_tmp.csv";
|
||||
String def_charset = "UTF-8";
|
||||
File file = new File(filename);
|
||||
|
||||
public void setUp() throws Exception {
|
||||
// if you override setUp or tearDown, you better call
|
||||
// the super classes version
|
||||
super.setUp();
|
||||
}
|
||||
public void tearDown() throws Exception {
|
||||
// if you override setUp or tearDown, you better call
|
||||
// the super classes version
|
||||
super.tearDown();
|
||||
deleteFile();
|
||||
}
|
||||
|
||||
void makeFile(String contents) {
|
||||
makeFile(contents,def_charset);
|
||||
}
|
||||
|
||||
void makeFile(String contents, String charset) {
|
||||
try {
|
||||
Writer out = new OutputStreamWriter(new FileOutputStream(filename), charset);
|
||||
out.write(contents);
|
||||
out.close();
|
||||
} catch (Exception e) {
|
||||
throw new RuntimeException(e);
|
||||
}
|
||||
}
|
||||
|
||||
void deleteFile() {
|
||||
file.delete();
|
||||
}
|
||||
|
||||
void cleanup() {
|
||||
assertU(delQ("id:[100 TO 110]"));
|
||||
assertU(commit());
|
||||
}
|
||||
|
||||
void loadLocal(String... args) throws Exception {
|
||||
LocalSolrQueryRequest req = (LocalSolrQueryRequest)req(args);
|
||||
|
||||
// TODO: stop using locally defined streams once stream.file and
|
||||
// stream.body work everywhere
|
||||
List<ContentStream> cs = new ArrayList<ContentStream>();
|
||||
cs.add(new ContentStreamBase.FileStream(new File(filename)));
|
||||
req.setContentStreams(cs);
|
||||
h.query("/update/csv",req);
|
||||
}
|
||||
|
||||
public void testCSVLoad() throws Exception {
|
||||
makeFile("id\n100\n101\n102");
|
||||
loadLocal("stream.file",filename);
|
||||
// csv loader currently defaults to committing
|
||||
// assertU(commit());
|
||||
assertQ(req("id:[100 TO 110]"),"//*[@numFound='3']");
|
||||
}
|
||||
|
||||
public void testCommitFalse() throws Exception {
|
||||
makeFile("id\n100\n101\n102");
|
||||
loadLocal("stream.file",filename,"commit","false");
|
||||
assertQ(req("id:[100 TO 110]"),"//*[@numFound='0']");
|
||||
assertU(commit());
|
||||
assertQ(req("id:[100 TO 110]"),"//*[@numFound='3']");
|
||||
}
|
||||
|
||||
public void testCommitTrue() throws Exception {
|
||||
makeFile("id\n100\n101\n102");
|
||||
loadLocal("stream.file",filename,"commit","true");
|
||||
assertQ(req("id:[100 TO 110]"),"//*[@numFound='3']");
|
||||
}
|
||||
|
||||
public void testCSV() throws Exception {
|
||||
lrf.args.put("version","2.0");
|
||||
|
||||
makeFile("id,str_s\n100,\"quoted\"\n101,\n102,\"\"\n103,");
|
||||
loadLocal("stream.file",filename,"commit","true");
|
||||
assertQ(req("id:[100 TO 110]"),"//*[@numFound='4']");
|
||||
assertQ(req("id:100"),"//str[@name='str_s'][.='quoted']");
|
||||
assertQ(req("id:101"),"count(//str[@name='str_s'])=0");
|
||||
// 102 is a quoted zero length field ,"", as opposed to ,,
|
||||
// but we can't distinguish this case (and it's debateable
|
||||
// if we should). Does CSV have a way to specify missing
|
||||
// from zero-length?
|
||||
assertQ(req("id:102"),"count(//str[@name='str_s'])=0");
|
||||
assertQ(req("id:103"),"count(//str[@name='str_s'])=0");
|
||||
|
||||
// test overwrite by default
|
||||
loadLocal("stream.file",filename, "commit","true");
|
||||
assertQ(req("id:[100 TO 110]"),"//*[@numFound='4']");
|
||||
|
||||
// test no overwrites
|
||||
loadLocal("stream.file",filename, "commit","true", "overwrite","false");
|
||||
assertQ(req("id:[100 TO 110]"),"//*[@numFound='8']");
|
||||
|
||||
// test overwrite
|
||||
loadLocal("stream.file",filename, "commit","true");
|
||||
assertQ(req("id:[100 TO 110]"),"//*[@numFound='4']");
|
||||
|
||||
// test global value mapping
|
||||
loadLocal("stream.file",filename, "commit","true", "map","quoted:QUOTED");
|
||||
assertQ(req("id:[100 TO 110]"),"//*[@numFound='4']");
|
||||
assertQ(req("id:100"),"//str[@name='str_s'][.='QUOTED']");
|
||||
assertQ(req("id:101"),"count(//str[@name='str_s'])=0");
|
||||
assertQ(req("id:102"),"count(//str[@name='str_s'])=0");
|
||||
assertQ(req("id:103"),"count(//str[@name='str_s'])=0");
|
||||
|
||||
// test value mapping to empty (remove)
|
||||
loadLocal("stream.file",filename, "commit","true", "map","quoted:");
|
||||
assertQ(req("id:[100 TO 110]"),"//*[@numFound='4']");
|
||||
assertQ(req("id:100"),"count(//str[@name='str_s'])=0");
|
||||
|
||||
// test value mapping from empty
|
||||
loadLocal("stream.file",filename, "commit","true", "map",":EMPTY");
|
||||
assertQ(req("id:[100 TO 110]"),"//*[@numFound='4']");
|
||||
assertQ(req("id:100"),"//str[@name='str_s'][.='quoted']");
|
||||
assertQ(req("id:101"),"//str[@name='str_s'][.='EMPTY']");
|
||||
assertQ(req("id:102"),"//str[@name='str_s'][.='EMPTY']");
|
||||
assertQ(req("id:103"),"//str[@name='str_s'][.='EMPTY']");
|
||||
|
||||
// test multiple map rules
|
||||
loadLocal("stream.file",filename, "commit","true", "map",":EMPTY", "map","quoted:QUOTED");
|
||||
assertQ(req("id:[100 TO 110]"),"//*[@numFound='4']");
|
||||
assertQ(req("id:100"),"//str[@name='str_s'][.='QUOTED']");
|
||||
assertQ(req("id:101"),"//str[@name='str_s'][.='EMPTY']");
|
||||
assertQ(req("id:102"),"//str[@name='str_s'][.='EMPTY']");
|
||||
assertQ(req("id:103"),"//str[@name='str_s'][.='EMPTY']");
|
||||
|
||||
// test indexing empty fields
|
||||
loadLocal("stream.file",filename, "commit","true", "f.str_s.keepEmpty","true");
|
||||
assertQ(req("id:[100 TO 110]"),"//*[@numFound='4']");
|
||||
assertQ(req("id:100"),"//str[@name='str_s'][.='quoted']");
|
||||
assertQ(req("id:101"),"//str[@name='str_s'][.='']");
|
||||
assertQ(req("id:102"),"//str[@name='str_s'][.='']");
|
||||
assertQ(req("id:103"),"//str[@name='str_s'][.='']");
|
||||
|
||||
// test overriding the name of fields
|
||||
loadLocal("stream.file",filename, "commit","true",
|
||||
"fieldnames","id,my_s", "header","true",
|
||||
"f.my_s.map",":EMPTY");
|
||||
assertQ(req("id:[100 TO 110]"),"//*[@numFound='4']");
|
||||
assertQ(req("id:100"),"//str[@name='my_s'][.='quoted']");
|
||||
assertQ(req("id:101"),"count(//str[@name='str_s'])=0");
|
||||
assertQ(req("id:102"),"count(//str[@name='str_s'])=0");
|
||||
assertQ(req("id:103"),"count(//str[@name='str_s'])=0");
|
||||
assertQ(req("id:101"),"//str[@name='my_s'][.='EMPTY']");
|
||||
assertQ(req("id:102"),"//str[@name='my_s'][.='EMPTY']");
|
||||
assertQ(req("id:103"),"//str[@name='my_s'][.='EMPTY']");
|
||||
|
||||
// test that header in file was skipped
|
||||
assertQ(req("id:id"),"//*[@numFound='0']");
|
||||
|
||||
// test loading file as if it didn't have a header
|
||||
loadLocal("stream.file",filename, "commit","true",
|
||||
"fieldnames","id,my_s", "header","false");
|
||||
assertQ(req("id:id"),"//*[@numFound='1']");
|
||||
assertQ(req("id:100"),"//str[@name='my_s'][.='quoted']");
|
||||
|
||||
|
||||
// test multi-valued fields via field splitting w/ mapping of subvalues
|
||||
makeFile("id,str_s\n"
|
||||
+"100,\"quoted\"\n"
|
||||
+"101,\"a,b,c\"\n"
|
||||
+"102,\"a,,b\"\n"
|
||||
+"103,\n");
|
||||
loadLocal("stream.file",filename, "commit","true",
|
||||
"f.str_s.map",":EMPTY",
|
||||
"f.str_s.split","true");
|
||||
assertQ(req("id:[100 TO 110]"),"//*[@numFound='4']");
|
||||
assertQ(req("id:100"),"//str[@name='str_s'][.='quoted']");
|
||||
assertQ(req("id:101"),"//arr[@name='str_s']/str[1][.='a']");
|
||||
assertQ(req("id:101"),"//arr[@name='str_s']/str[2][.='b']");
|
||||
assertQ(req("id:101"),"//arr[@name='str_s']/str[3][.='c']");
|
||||
assertQ(req("id:102"),"//arr[@name='str_s']/str[2][.='EMPTY']");
|
||||
assertQ(req("id:103"),"//str[@name='str_s'][.='EMPTY']");
|
||||
|
||||
|
||||
// test alternate values for delimiters
|
||||
makeFile("id|str_s\n"
|
||||
+"100|^quoted^\n"
|
||||
+"101|a;'b';c\n"
|
||||
+"102|a;;b\n"
|
||||
+"103|\n");
|
||||
|
||||
loadLocal("stream.file",filename, "commit","true",
|
||||
"separator","|",
|
||||
"encapsulator","^",
|
||||
"f.str_s.map",":EMPTY",
|
||||
"f.str_s.split","true",
|
||||
"f.str_s.separator",";",
|
||||
"f.str_s.encapsulator","'"
|
||||
);
|
||||
assertQ(req("id:[100 TO 110]"),"//*[@numFound='4']");
|
||||
assertQ(req("id:100"),"//str[@name='str_s'][.='quoted']");
|
||||
assertQ(req("id:101"),"//arr[@name='str_s']/str[1][.='a']");
|
||||
assertQ(req("id:101"),"//arr[@name='str_s']/str[2][.='b']");
|
||||
assertQ(req("id:101"),"//arr[@name='str_s']/str[3][.='c']");
|
||||
assertQ(req("id:102"),"//arr[@name='str_s']/str[2][.='EMPTY']");
|
||||
assertQ(req("id:103"),"//str[@name='str_s'][.='EMPTY']");
|
||||
}
|
||||
|
||||
|
||||
|
||||
}
|
|
@ -263,6 +263,9 @@
|
|||
</lst>
|
||||
</requestHandler>
|
||||
|
||||
<requestHandler name="/update/csv" class="solr.CSVRequestHandler" startup="lazy">
|
||||
</requestHandler>
|
||||
|
||||
<!-- enable streaming for testing... -->
|
||||
<requestParsers enableRemoteStreaming="true" multipartUploadLimitInKB="2048" />
|
||||
|
||||
|
@ -275,5 +278,4 @@
|
|||
<propTest attr1="${solr.test.sys.prop1}-$${literal}"
|
||||
attr2="${non.existent.sys.prop:default-from-config}">prefix-${solr.test.sys.prop2}-suffix</propTest>
|
||||
|
||||
|
||||
</config>
|
||||
|
|
Loading…
Reference in New Issue