CSV updates: SOLR-66

git-svn-id: https://svn.apache.org/repos/asf/lucene/solr/trunk@524175 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
Yonik Seeley 2007-03-30 16:59:58 +00:00
parent 18dc9d98e4
commit 731416af36
9 changed files with 672 additions and 4 deletions

View File

@ -136,6 +136,8 @@ New Features
or overriding the content type of input, and stream.file for reading or overriding the content type of input, and stream.file for reading
local files. (Ryan McKinley via yonik) local files. (Ryan McKinley via yonik)
20. SOLR-66: CSV data format for document additions and updates. (yonik)
Changes in runtime behavior Changes in runtime behavior
1. Highlighting using DisMax will only pick up terms from the main 1. Highlighting using DisMax will only pick up terms from the main
user query, not boost or filter queries (klaas). user query, not boost or filter queries (klaas).

11
example/exampledocs/books.csv Executable file
View File

@ -0,0 +1,11 @@
id,cat,name,price,inStock,author_t,series_t,sequence_i,genre_s
0553573403,book,A Game of Thrones,7.99,true,George R.R. Martin,"A Song of Ice and Fire",1,fantasy
0553579908,book,A Clash of Kings,7.99,true,George R.R. Martin,"A Song of Ice and Fire",2,fantasy
055357342X,book,A Storm of Swords,7.99,true,George R.R. Martin,"A Song of Ice and Fire",3,fantasy
0553293354,book,Foundation,7.99,true,Isaac Asimov,Foundation Novels,1,scifi
0812521390,book,The Black Company,6.99,false,Glen Cook,The Chronicles of The Black Company,1,fantasy
0812550706,book,Ender's Game,6.99,true,Orson Scott Card,Ender,1,scifi
0441385532,book,Jhereg,7.95,false,Steven Brust,Vlad Taltos,1,fantasy
0380014300,book,Nine Princes In Amber,6.99,true,Roger Zelazny,the Chronicles of Amber,1,fantasy
0805080481,book,The Book of Three,5.99,true,Lloyd Alexander,The Chronicles of Prydain,1,fantasy
080508049X,book,The Black Cauldron,5.99,true,Lloyd Alexander,The Chronicles of Prydain,2,fantasy
1 id cat name price inStock author_t series_t sequence_i genre_s
2 0553573403 book A Game of Thrones 7.99 true George R.R. Martin A Song of Ice and Fire 1 fantasy
3 0553579908 book A Clash of Kings 7.99 true George R.R. Martin A Song of Ice and Fire 2 fantasy
4 055357342X book A Storm of Swords 7.99 true George R.R. Martin A Song of Ice and Fire 3 fantasy
5 0553293354 book Foundation 7.99 true Isaac Asimov Foundation Novels 1 scifi
6 0812521390 book The Black Company 6.99 false Glen Cook The Chronicles of The Black Company 1 fantasy
7 0812550706 book Ender's Game 6.99 true Orson Scott Card Ender 1 scifi
8 0441385532 book Jhereg 7.95 false Steven Brust Vlad Taltos 1 fantasy
9 0380014300 book Nine Princes In Amber 6.99 true Roger Zelazny the Chronicles of Amber 1 fantasy
10 0805080481 book The Book of Three 5.99 true Lloyd Alexander The Chronicles of Prydain 1 fantasy
11 080508049X book The Black Cauldron 5.99 true Lloyd Alexander The Chronicles of Prydain 2 fantasy

View File

@ -385,6 +385,11 @@
<requestHandler name="/update/commit" class="solr.CommitRequestHandler" /> <requestHandler name="/update/commit" class="solr.CommitRequestHandler" />
<!-- CSV update handler, loaded on demand -->
<requestHandler name="/update/csv" class="solr.CSVRequestHandler" startup="lazy">
</requestHandler>
<!-- queryResponseWriter plugins... query responses will be written using the <!-- queryResponseWriter plugins... query responses will be written using the
writer specified by the 'wt' request parameter matching the name of a registered writer specified by the 'wt' request parameter matching the name of a registered
writer. writer.

View File

@ -0,0 +1,2 @@
AnyObjectId[8e096258a36f86e9b956e52f55df0b5afbe8999f] was removed in git history.
Apache SVN contains full history.

View File

@ -0,0 +1,386 @@
/**
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.solr.handler;
import org.apache.solr.request.SolrQueryRequest;
import org.apache.solr.request.SolrParams;
import org.apache.solr.request.SolrQueryResponse;
import org.apache.solr.util.ContentStream;
import org.apache.solr.core.SolrException;
import org.apache.solr.schema.IndexSchema;
import org.apache.solr.schema.SchemaField;
import org.apache.solr.util.StrUtils;
import org.apache.solr.update.*;
import org.apache.commons.csv.CSVStrategy;
import org.apache.commons.csv.CSVParser;
import org.apache.commons.io.IOUtils;
import java.util.regex.Pattern;
import java.util.List;
import java.io.*;
/**
* @author yonik
* @version $Id$
*/
public class CSVRequestHandler extends RequestHandlerBase {
public void handleRequestBody(SolrQueryRequest req, SolrQueryResponse rsp) throws Exception {
CSVLoader loader = new SingleThreadedCSVLoader(req);
Iterable<ContentStream> streams = req.getContentStreams();
if (streams == null) {
throw new SolrException(400, "missing content stream");
}
for(ContentStream stream : streams) {
Reader reader = stream.getReader();
try {
loader.errHeader = "CSVLoader: input=" + stream.getSourceInfo();
loader.load(reader);
} finally {
IOUtils.closeQuietly(reader);
}
}
}
//////////////////////// SolrInfoMBeans methods //////////////////////
@Override
public String getDescription() {
return "Add/Update multiple documents with CSV formatted rows";
}
@Override
public String getVersion() {
return "$Revision:$";
}
@Override
public String getSourceId() {
return "$Id:$";
}
@Override
public String getSource() {
return "$URL:$";
}
}
abstract class CSVLoader {
static String SEPARATOR="separator";
static String FIELDNAMES="fieldnames";
static String HEADER="header";
static String SKIP="skip";
static String MAP="map";
static String TRIM="trim";
static String EMPTY="keepEmpty";
static String SPLIT="split";
static String ENCAPSULATOR="encapsulator";
static String COMMIT="commit";
static String OVERWRITE="overwrite";
private static Pattern colonSplit = Pattern.compile(":");
private static Pattern commaSplit = Pattern.compile(",");
final IndexSchema schema;
final SolrParams params;
final UpdateHandler handler;
final CSVStrategy strategy;
String[] fieldnames;
SchemaField[] fields;
CSVLoader.FieldAdder[] adders;
int skipLines; // number of lines to skip at start of file
final AddUpdateCommand templateAdd;
/** Add a field to a document unless it's zero length.
* The FieldAdder hierarchy handles all the complexity of
* further transforming or splitting field values to keep the
* main logic loop clean. All implementations of add() must be
* MT-safe!
*/
private class FieldAdder {
void add(DocumentBuilder builder, int line, int column, String val) {
if (val.length() > 0) {
builder.addField(fields[column].getName(),val,1.0f);
}
}
}
/** add zero length fields */
private class FieldAdderEmpty extends CSVLoader.FieldAdder {
void add(DocumentBuilder builder, int line, int column, String val) {
builder.addField(fields[column].getName(),val,1.0f);
}
}
/** trim fields */
private class FieldTrimmer extends CSVLoader.FieldAdder {
private final CSVLoader.FieldAdder base;
FieldTrimmer(CSVLoader.FieldAdder base) { this.base=base; }
void add(DocumentBuilder builder, int line, int column, String val) {
base.add(builder, line, column, val.trim());
}
}
/** map a single value.
* for just a couple of mappings, this is probably faster than
* using a HashMap.
*/
private class FieldMapperSingle extends CSVLoader.FieldAdder {
private final String from;
private final String to;
private final CSVLoader.FieldAdder base;
FieldMapperSingle(String from, String to, CSVLoader.FieldAdder base) {
this.from=from;
this.to=to;
this.base=base;
}
void add(DocumentBuilder builder, int line, int column, String val) {
if (from.equals(val)) val=to;
base.add(builder,line,column,val);
}
}
/** Split a single value into multiple values based on
* a CSVStrategy.
*/
private class FieldSplitter extends CSVLoader.FieldAdder {
private final CSVStrategy strategy;
private final CSVLoader.FieldAdder base;
FieldSplitter(CSVStrategy strategy, CSVLoader.FieldAdder base) {
this.strategy = strategy;
this.base = base;
}
void add(DocumentBuilder builder, int line, int column, String val) {
CSVParser parser = new CSVParser(new StringReader(val), strategy);
try {
String[] vals = parser.getLine();
if (vals!=null) {
for (String v: vals) base.add(builder,line,column,v);
} else {
base.add(builder,line,column,val);
}
} catch (IOException e) {
throw new SolrException(400,"");
}
}
}
String errHeader="CSVLoader:";
CSVLoader(SolrQueryRequest req) {
this.params = req.getParams();
handler = req.getCore().getUpdateHandler();
schema = req.getSchema();
templateAdd = new AddUpdateCommand();
templateAdd.allowDups=false;
templateAdd.overwriteCommitted=true;
templateAdd.overwritePending=true;
if (params.getBool(OVERWRITE,true)) {
templateAdd.allowDups=false;
templateAdd.overwriteCommitted=true;
templateAdd.overwritePending=true;
} else {
templateAdd.allowDups=true;
templateAdd.overwriteCommitted=false;
templateAdd.overwritePending=false;
}
strategy = new CSVStrategy(',', '"', CSVStrategy.COMMENTS_DISABLED, true, false, true);
String sep = params.get(SEPARATOR);
if (sep!=null) {
if (sep.length()!=1) throw new SolrException(400,"Invalid separator:'"+sep+"'");
strategy.setDelimiter(sep.charAt(0));
}
String encapsulator = params.get(ENCAPSULATOR);
if (encapsulator!=null) {
if (encapsulator.length()!=1) throw new SolrException(400,"Invalid encapsulator:'"+sep+"'");
strategy.setEncapsulator(encapsulator.charAt(0));
}
String fn = params.get(FIELDNAMES);
fieldnames = fn != null ? commaSplit.split(fn,-1) : null;
Boolean hasHeader = params.getBool(HEADER);
if (fieldnames==null) {
if (null == hasHeader) {
// assume the file has the headers if they aren't supplied in the args
hasHeader=true;
} else if (hasHeader) {
throw new SolrException(400,"CSVLoader: must specify fieldnames=<fields>* or header=true");
}
} else {
// if the fieldnames were supplied and the file has a header, we need to
// skip over that header.
if (hasHeader!=null && hasHeader) skipLines=1;
prepareFields();
}
}
/** create the FieldAdders that control how each field is indexed */
void prepareFields() {
// Possible future optimization: for really rapid incremental indexing
// from a POST, one could cache all of this setup info based on the params.
// The link from FieldAdder to this would need to be severed for that to happen.
fields = new SchemaField[fieldnames.length];
adders = new CSVLoader.FieldAdder[fieldnames.length];
String skipStr = params.get(SKIP);
List<String> skipFields = skipStr==null ? null : StrUtils.splitSmart(skipStr,',');
CSVLoader.FieldAdder adder = new CSVLoader.FieldAdder();
CSVLoader.FieldAdder adderKeepEmpty = new CSVLoader.FieldAdderEmpty();
for (int i=0; i<fields.length; i++) {
String fname = fieldnames[i];
// to skip a field, leave the entries in fields and addrs null
if (fname.length()==0 || (skipFields!=null && skipFields.contains(fname))) continue;
fields[i] = schema.getField(fname);
boolean keepEmpty = params.getFieldBool(fname,EMPTY,false);
adders[i] = keepEmpty ? adderKeepEmpty : adder;
// Order that operations are applied: split -> trim -> map -> add
// so create in reverse order.
// Creation of FieldAdders could be optimized and shared among fields
String[] fmap = params.getFieldParams(fname,MAP);
if (fmap!=null) {
for (String mapRule : fmap) {
String[] mapArgs = colonSplit.split(mapRule,-1);
if (mapArgs.length!=2)
throw new SolrException(400, "Map rules must be of the form 'from:to' ,got '"+mapRule+"'");
adders[i] = new CSVLoader.FieldMapperSingle(mapArgs[0], mapArgs[1], adders[i]);
}
}
if (params.getFieldBool(fname,TRIM,false)) {
adders[i] = new CSVLoader.FieldTrimmer(adders[i]);
}
if (params.getFieldBool(fname,SPLIT,false)) {
String sepStr = params.getFieldParam(fname,SEPARATOR);
char fsep = sepStr==null || sepStr.length()==0 ? ',' : sepStr.charAt(0);
String encStr = params.getFieldParam(fname,ENCAPSULATOR);
char fenc = encStr==null || encStr.length()==0 ? '\'' : encStr.charAt(0);
CSVStrategy fstrat = new CSVStrategy(fsep,fenc,CSVStrategy.COMMENTS_DISABLED);
adders[i] = new CSVLoader.FieldSplitter(fstrat, adders[i]);
}
}
}
private void input_err(String msg, String[] line, int lineno) {
StringBuilder sb = new StringBuilder();
sb.append(errHeader+", line="+lineno + ","+msg+"\n\tvalues={");
for (String val: line) { sb.append("'"+val+"',"); }
sb.append('}');
throw new SolrException(400,sb.toString());
}
/** load the CSV input */
void load(Reader input) throws IOException {
Reader reader = input;
if (skipLines>0) {
if (!(reader instanceof BufferedReader)) {
reader = new BufferedReader(reader);
}
BufferedReader r = (BufferedReader)reader;
for (int i=0; i<skipLines; i++) {
r.readLine();
}
}
CSVParser parser = new CSVParser(reader, strategy);
// parse the fieldnames from the header of the file
if (fieldnames==null) {
fieldnames = parser.getLine();
if (fieldnames==null) {
throw new SolrException(400,"Expected fieldnames in CSV input");
}
prepareFields();
}
// read the rest of the CSV file
for(;;) {
int line = parser.getLineNumber(); // for error reporting in MT mode
String[] vals = parser.getLine();
if (vals==null) break;
if (vals.length != fields.length) {
input_err("expected "+fields.length+" values but got "+vals.length, vals, line);
}
addDoc(line,vals);
}
if (params.getBool(COMMIT,true)) {
handler.commit(new CommitUpdateCommand(false));
}
}
/** called for each line of values (document) */
abstract void addDoc(int line, String[] vals) throws IOException;
/** this must be MT safe... may be called concurrently from multiple threads. */
void doAdd(int line, String[] vals, DocumentBuilder builder, AddUpdateCommand template) throws IOException {
// the line number is passed simply for error reporting in MT mode.
// first, create the lucene document
builder.startDoc();
for (int i=0; i<vals.length; i++) {
if (fields[i]==null) continue; // ignore this field
String val = vals[i];
adders[i].add(builder, line, i, val);
}
builder.endDoc();
template.doc = builder.getDoc();
handler.addDoc(template);
}
}
class SingleThreadedCSVLoader extends CSVLoader {
protected DocumentBuilder builder;
SingleThreadedCSVLoader(SolrQueryRequest req) {
super(req);
builder = new DocumentBuilder(schema);
}
void addDoc(int line, String[] vals) throws IOException {
templateAdd.indexedId = null;
doAdd(line, vals, builder, templateAdd);
}
}

View File

@ -165,6 +165,14 @@ public abstract class SolrParams {
return val!=null ? val : get(param); return val!=null ? val : get(param);
} }
/** returns the String values of the field parameter, "f.field.param", or
* the values for "param" if that is not set.
*/
public String[] getFieldParams(String field, String param) {
String[] val = getParams(fpname(field,param));
return val!=null ? val : getParams(param);
}
/** Returns the Boolean value of the param, or null if not set */ /** Returns the Boolean value of the param, or null if not set */
public Boolean getBool(String param) { public Boolean getBool(String param) {
String val = get(param); String val = get(param);

View File

@ -197,9 +197,22 @@ public class TestHarness {
* @see LocalSolrQueryRequest * @see LocalSolrQueryRequest
*/ */
public String query(SolrQueryRequest req) throws IOException, Exception { public String query(SolrQueryRequest req) throws IOException, Exception {
return query(req.getQueryType(), req);
}
/**
* Processes a "query" using a user constructed SolrQueryRequest
*
* @param handler the name of the request handler to process the request
* @param req the Query to process, will be closed.
* @return The XML response to the query
* @exception Exception any exception in the response.
* @exception IOException if there is a problem writing the XML
* @see LocalSolrQueryRequest
*/
public String query(String handler, SolrQueryRequest req) throws IOException, Exception {
SolrQueryResponse rsp = new SolrQueryResponse(); SolrQueryResponse rsp = new SolrQueryResponse();
core.execute(req,rsp); core.execute(core.getRequestHandler(handler),req,rsp);
if (rsp.getException() != null) { if (rsp.getException() != null) {
throw rsp.getException(); throw rsp.getException();
} }

View File

@ -0,0 +1,239 @@
/**
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.solr.handler;
import org.apache.solr.util.AbstractSolrTestCase;
import org.apache.solr.util.ContentStream;
import org.apache.solr.util.ContentStreamBase;
import org.apache.solr.request.SolrQueryRequest;
import org.apache.solr.request.LocalSolrQueryRequest;
import org.apache.solr.core.SolrException;
import java.io.*;
import java.util.List;
import java.util.ArrayList;
public class TestCSVLoader extends AbstractSolrTestCase {
public String getSchemaFile() { return "schema.xml"; }
public String getSolrConfigFile() { return "solrconfig.xml"; }
String filename = "solr_tmp.csv";
String def_charset = "UTF-8";
File file = new File(filename);
public void setUp() throws Exception {
// if you override setUp or tearDown, you better call
// the super classes version
super.setUp();
}
public void tearDown() throws Exception {
// if you override setUp or tearDown, you better call
// the super classes version
super.tearDown();
deleteFile();
}
void makeFile(String contents) {
makeFile(contents,def_charset);
}
void makeFile(String contents, String charset) {
try {
Writer out = new OutputStreamWriter(new FileOutputStream(filename), charset);
out.write(contents);
out.close();
} catch (Exception e) {
throw new RuntimeException(e);
}
}
void deleteFile() {
file.delete();
}
void cleanup() {
assertU(delQ("id:[100 TO 110]"));
assertU(commit());
}
void loadLocal(String... args) throws Exception {
LocalSolrQueryRequest req = (LocalSolrQueryRequest)req(args);
// TODO: stop using locally defined streams once stream.file and
// stream.body work everywhere
List<ContentStream> cs = new ArrayList<ContentStream>();
cs.add(new ContentStreamBase.FileStream(new File(filename)));
req.setContentStreams(cs);
h.query("/update/csv",req);
}
public void testCSVLoad() throws Exception {
makeFile("id\n100\n101\n102");
loadLocal("stream.file",filename);
// csv loader currently defaults to committing
// assertU(commit());
assertQ(req("id:[100 TO 110]"),"//*[@numFound='3']");
}
public void testCommitFalse() throws Exception {
makeFile("id\n100\n101\n102");
loadLocal("stream.file",filename,"commit","false");
assertQ(req("id:[100 TO 110]"),"//*[@numFound='0']");
assertU(commit());
assertQ(req("id:[100 TO 110]"),"//*[@numFound='3']");
}
public void testCommitTrue() throws Exception {
makeFile("id\n100\n101\n102");
loadLocal("stream.file",filename,"commit","true");
assertQ(req("id:[100 TO 110]"),"//*[@numFound='3']");
}
public void testCSV() throws Exception {
lrf.args.put("version","2.0");
makeFile("id,str_s\n100,\"quoted\"\n101,\n102,\"\"\n103,");
loadLocal("stream.file",filename,"commit","true");
assertQ(req("id:[100 TO 110]"),"//*[@numFound='4']");
assertQ(req("id:100"),"//str[@name='str_s'][.='quoted']");
assertQ(req("id:101"),"count(//str[@name='str_s'])=0");
// 102 is a quoted zero length field ,"", as opposed to ,,
// but we can't distinguish this case (and it's debateable
// if we should). Does CSV have a way to specify missing
// from zero-length?
assertQ(req("id:102"),"count(//str[@name='str_s'])=0");
assertQ(req("id:103"),"count(//str[@name='str_s'])=0");
// test overwrite by default
loadLocal("stream.file",filename, "commit","true");
assertQ(req("id:[100 TO 110]"),"//*[@numFound='4']");
// test no overwrites
loadLocal("stream.file",filename, "commit","true", "overwrite","false");
assertQ(req("id:[100 TO 110]"),"//*[@numFound='8']");
// test overwrite
loadLocal("stream.file",filename, "commit","true");
assertQ(req("id:[100 TO 110]"),"//*[@numFound='4']");
// test global value mapping
loadLocal("stream.file",filename, "commit","true", "map","quoted:QUOTED");
assertQ(req("id:[100 TO 110]"),"//*[@numFound='4']");
assertQ(req("id:100"),"//str[@name='str_s'][.='QUOTED']");
assertQ(req("id:101"),"count(//str[@name='str_s'])=0");
assertQ(req("id:102"),"count(//str[@name='str_s'])=0");
assertQ(req("id:103"),"count(//str[@name='str_s'])=0");
// test value mapping to empty (remove)
loadLocal("stream.file",filename, "commit","true", "map","quoted:");
assertQ(req("id:[100 TO 110]"),"//*[@numFound='4']");
assertQ(req("id:100"),"count(//str[@name='str_s'])=0");
// test value mapping from empty
loadLocal("stream.file",filename, "commit","true", "map",":EMPTY");
assertQ(req("id:[100 TO 110]"),"//*[@numFound='4']");
assertQ(req("id:100"),"//str[@name='str_s'][.='quoted']");
assertQ(req("id:101"),"//str[@name='str_s'][.='EMPTY']");
assertQ(req("id:102"),"//str[@name='str_s'][.='EMPTY']");
assertQ(req("id:103"),"//str[@name='str_s'][.='EMPTY']");
// test multiple map rules
loadLocal("stream.file",filename, "commit","true", "map",":EMPTY", "map","quoted:QUOTED");
assertQ(req("id:[100 TO 110]"),"//*[@numFound='4']");
assertQ(req("id:100"),"//str[@name='str_s'][.='QUOTED']");
assertQ(req("id:101"),"//str[@name='str_s'][.='EMPTY']");
assertQ(req("id:102"),"//str[@name='str_s'][.='EMPTY']");
assertQ(req("id:103"),"//str[@name='str_s'][.='EMPTY']");
// test indexing empty fields
loadLocal("stream.file",filename, "commit","true", "f.str_s.keepEmpty","true");
assertQ(req("id:[100 TO 110]"),"//*[@numFound='4']");
assertQ(req("id:100"),"//str[@name='str_s'][.='quoted']");
assertQ(req("id:101"),"//str[@name='str_s'][.='']");
assertQ(req("id:102"),"//str[@name='str_s'][.='']");
assertQ(req("id:103"),"//str[@name='str_s'][.='']");
// test overriding the name of fields
loadLocal("stream.file",filename, "commit","true",
"fieldnames","id,my_s", "header","true",
"f.my_s.map",":EMPTY");
assertQ(req("id:[100 TO 110]"),"//*[@numFound='4']");
assertQ(req("id:100"),"//str[@name='my_s'][.='quoted']");
assertQ(req("id:101"),"count(//str[@name='str_s'])=0");
assertQ(req("id:102"),"count(//str[@name='str_s'])=0");
assertQ(req("id:103"),"count(//str[@name='str_s'])=0");
assertQ(req("id:101"),"//str[@name='my_s'][.='EMPTY']");
assertQ(req("id:102"),"//str[@name='my_s'][.='EMPTY']");
assertQ(req("id:103"),"//str[@name='my_s'][.='EMPTY']");
// test that header in file was skipped
assertQ(req("id:id"),"//*[@numFound='0']");
// test loading file as if it didn't have a header
loadLocal("stream.file",filename, "commit","true",
"fieldnames","id,my_s", "header","false");
assertQ(req("id:id"),"//*[@numFound='1']");
assertQ(req("id:100"),"//str[@name='my_s'][.='quoted']");
// test multi-valued fields via field splitting w/ mapping of subvalues
makeFile("id,str_s\n"
+"100,\"quoted\"\n"
+"101,\"a,b,c\"\n"
+"102,\"a,,b\"\n"
+"103,\n");
loadLocal("stream.file",filename, "commit","true",
"f.str_s.map",":EMPTY",
"f.str_s.split","true");
assertQ(req("id:[100 TO 110]"),"//*[@numFound='4']");
assertQ(req("id:100"),"//str[@name='str_s'][.='quoted']");
assertQ(req("id:101"),"//arr[@name='str_s']/str[1][.='a']");
assertQ(req("id:101"),"//arr[@name='str_s']/str[2][.='b']");
assertQ(req("id:101"),"//arr[@name='str_s']/str[3][.='c']");
assertQ(req("id:102"),"//arr[@name='str_s']/str[2][.='EMPTY']");
assertQ(req("id:103"),"//str[@name='str_s'][.='EMPTY']");
// test alternate values for delimiters
makeFile("id|str_s\n"
+"100|^quoted^\n"
+"101|a;'b';c\n"
+"102|a;;b\n"
+"103|\n");
loadLocal("stream.file",filename, "commit","true",
"separator","|",
"encapsulator","^",
"f.str_s.map",":EMPTY",
"f.str_s.split","true",
"f.str_s.separator",";",
"f.str_s.encapsulator","'"
);
assertQ(req("id:[100 TO 110]"),"//*[@numFound='4']");
assertQ(req("id:100"),"//str[@name='str_s'][.='quoted']");
assertQ(req("id:101"),"//arr[@name='str_s']/str[1][.='a']");
assertQ(req("id:101"),"//arr[@name='str_s']/str[2][.='b']");
assertQ(req("id:101"),"//arr[@name='str_s']/str[3][.='c']");
assertQ(req("id:102"),"//arr[@name='str_s']/str[2][.='EMPTY']");
assertQ(req("id:103"),"//str[@name='str_s'][.='EMPTY']");
}
}

View File

@ -263,6 +263,9 @@
</lst> </lst>
</requestHandler> </requestHandler>
<requestHandler name="/update/csv" class="solr.CSVRequestHandler" startup="lazy">
</requestHandler>
<!-- enable streaming for testing... --> <!-- enable streaming for testing... -->
<requestParsers enableRemoteStreaming="true" multipartUploadLimitInKB="2048" /> <requestParsers enableRemoteStreaming="true" multipartUploadLimitInKB="2048" />
@ -275,5 +278,4 @@
<propTest attr1="${solr.test.sys.prop1}-$${literal}" <propTest attr1="${solr.test.sys.prop1}-$${literal}"
attr2="${non.existent.sys.prop:default-from-config}">prefix-${solr.test.sys.prop2}-suffix</propTest> attr2="${non.existent.sys.prop:default-from-config}">prefix-${solr.test.sys.prop2}-suffix</propTest>
</config> </config>