CSV updates: SOLR-66

git-svn-id: https://svn.apache.org/repos/asf/lucene/solr/trunk@524175 13f79535-47bb-0310-9956-ffa450edef68
2007-03-30 16:59:58 +00:00 · 2007-03-30 16:59:58 +00:00 · 731416af36
parent 18dc9d98e4
commit 731416af36
9 changed files with 672 additions and 4 deletions
--- a/CHANGES.txt
+++ b/CHANGES.txt
@ -136,6 +136,8 @@ New Features
    or overriding the content type of input, and stream.file for reading
    local files. (Ryan McKinley via yonik)
 20. SOLR-66: CSV data format for document additions and updates. (yonik)
 Changes in runtime behavior
 1. Highlighting using DisMax will only pick up terms from the main 
    user query, not boost or filter queries (klaas).
--- a/example/exampledocs/books.csv
+++ b/example/exampledocs/books.csv
@ -0,0 +1,11 @@
 id,cat,name,price,inStock,author_t,series_t,sequence_i,genre_s
 0553573403,book,A Game of Thrones,7.99,true,George R.R. Martin,"A Song of Ice and Fire",1,fantasy
 0553579908,book,A Clash of Kings,7.99,true,George R.R. Martin,"A Song of Ice and Fire",2,fantasy
 055357342X,book,A Storm of Swords,7.99,true,George R.R. Martin,"A Song of Ice and Fire",3,fantasy
 0553293354,book,Foundation,7.99,true,Isaac Asimov,Foundation Novels,1,scifi
 0812521390,book,The Black Company,6.99,false,Glen Cook,The Chronicles of The Black Company,1,fantasy
 0812550706,book,Ender's Game,6.99,true,Orson Scott Card,Ender,1,scifi
 0441385532,book,Jhereg,7.95,false,Steven Brust,Vlad Taltos,1,fantasy
 0380014300,book,Nine Princes In Amber,6.99,true,Roger Zelazny,the Chronicles of Amber,1,fantasy
 0805080481,book,The Book of Three,5.99,true,Lloyd Alexander,The Chronicles of Prydain,1,fantasy
 080508049X,book,The Black Cauldron,5.99,true,Lloyd Alexander,The Chronicles of Prydain,2,fantasy
--- a/example/solr/conf/solrconfig.xml
+++ b/example/solr/conf/solrconfig.xml
@ -385,6 +385,11 @@
  <requestHandler name="/update/commit" class="solr.CommitRequestHandler" />
  <!-- CSV update handler, loaded on demand -->
  <requestHandler name="/update/csv" class="solr.CSVRequestHandler" startup="lazy">
  </requestHandler>
  <!-- queryResponseWriter plugins... query responses will be written using the
    writer specified by the 'wt' request parameter matching the name of a registered
    writer.
--- a/lib/commons-csv-0.1-SNAPSHOT.jar
+++ b/lib/commons-csv-0.1-SNAPSHOT.jar
@ -0,0 +1,2 @@
 AnyObjectId[8e096258a36f86e9b956e52f55df0b5afbe8999f] was removed in git history.
 Apache SVN contains full history.
--- a/src/java/org/apache/solr/handler/CSVRequestHandler.java
+++ b/src/java/org/apache/solr/handler/CSVRequestHandler.java
@ -0,0 +1,386 @@
 /**
 * Licensed to the Apache Software Foundation (ASF) under one or more
 * contributor license agreements.  See the NOTICE file distributed with
 * this work for additional information regarding copyright ownership.
 * The ASF licenses this file to You under the Apache License, Version 2.0
 * (the "License"); you may not use this file except in compliance with
 * the License.  You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
 package org.apache.solr.handler;
 import org.apache.solr.request.SolrQueryRequest;
 import org.apache.solr.request.SolrParams;
 import org.apache.solr.request.SolrQueryResponse;
 import org.apache.solr.util.ContentStream;
 import org.apache.solr.core.SolrException;
 import org.apache.solr.schema.IndexSchema;
 import org.apache.solr.schema.SchemaField;
 import org.apache.solr.util.StrUtils;
 import org.apache.solr.update.*;
 import org.apache.commons.csv.CSVStrategy;
 import org.apache.commons.csv.CSVParser;
 import org.apache.commons.io.IOUtils;
 import java.util.regex.Pattern;
 import java.util.List;
 import java.io.*;
 /**
 * @author yonik
 * @version $Id$
 */
 public class CSVRequestHandler extends RequestHandlerBase {
  public void handleRequestBody(SolrQueryRequest req, SolrQueryResponse rsp) throws Exception {
    CSVLoader loader = new SingleThreadedCSVLoader(req);
    Iterable<ContentStream> streams = req.getContentStreams();
    if (streams == null) {
      throw new SolrException(400, "missing content stream");
    }
    for(ContentStream stream : streams) {
      Reader reader = stream.getReader();
      try {
        loader.errHeader = "CSVLoader: input=" + stream.getSourceInfo(); 
        loader.load(reader);
      } finally {
        IOUtils.closeQuietly(reader);
      }
    }
  }
  //////////////////////// SolrInfoMBeans methods //////////////////////
  @Override
  public String getDescription() {
    return "Add/Update multiple documents with CSV formatted rows";
  }
  @Override
  public String getVersion() {
      return "$Revision:$";
 }
 @Override
 public String getSourceId() {
    return "$Id:$";
  }
  @Override
  public String getSource() {
    return "$URL:$";
  }
 }
 abstract class CSVLoader {
  static String SEPARATOR="separator";
  static String FIELDNAMES="fieldnames";
  static String HEADER="header";
  static String SKIP="skip";
  static String MAP="map";
  static String TRIM="trim";
  static String EMPTY="keepEmpty";
  static String SPLIT="split";
  static String ENCAPSULATOR="encapsulator";
  static String COMMIT="commit";
  static String OVERWRITE="overwrite";
  private static Pattern colonSplit = Pattern.compile(":");
  private static Pattern commaSplit = Pattern.compile(",");
  final IndexSchema schema;
  final SolrParams params;
  final UpdateHandler handler;
  final CSVStrategy strategy;
  String[] fieldnames;
  SchemaField[] fields;
  CSVLoader.FieldAdder[] adders;
  int skipLines;    // number of lines to skip at start of file
  final AddUpdateCommand templateAdd;
  /** Add a field to a document unless it's zero length.
   * The FieldAdder hierarchy handles all the complexity of
   * further transforming or splitting field values to keep the
   * main logic loop clean.  All implementations of add() must be
   * MT-safe!
   */
  private class FieldAdder {
    void add(DocumentBuilder builder, int line, int column, String val) {
      if (val.length() > 0) {
        builder.addField(fields[column].getName(),val,1.0f);
      }
    }
  }
  /** add zero length fields */
  private class FieldAdderEmpty extends CSVLoader.FieldAdder {
    void add(DocumentBuilder builder, int line, int column, String val) {
      builder.addField(fields[column].getName(),val,1.0f);
    }
  }
  /** trim fields */
  private class FieldTrimmer extends CSVLoader.FieldAdder {
    private final CSVLoader.FieldAdder base;
    FieldTrimmer(CSVLoader.FieldAdder base) { this.base=base; }
    void add(DocumentBuilder builder, int line, int column, String val) {
      base.add(builder, line, column, val.trim());
    }
  }
  /** map a single value.
   * for just a couple of mappings, this is probably faster than
   * using a HashMap.
   */
 private class FieldMapperSingle extends CSVLoader.FieldAdder {
   private final String from;
   private final String to;
   private final CSVLoader.FieldAdder base;
   FieldMapperSingle(String from, String to, CSVLoader.FieldAdder base) {
     this.from=from;
     this.to=to;
     this.base=base;
   }
    void add(DocumentBuilder builder, int line, int column, String val) {
      if (from.equals(val)) val=to;
      base.add(builder,line,column,val);
    }
 }
  /** Split a single value into multiple values based on
   * a CSVStrategy.
   */
  private class FieldSplitter extends CSVLoader.FieldAdder {
    private final CSVStrategy strategy;
    private final CSVLoader.FieldAdder base;
    FieldSplitter(CSVStrategy strategy, CSVLoader.FieldAdder base) {
      this.strategy = strategy;
      this.base = base;
    }
    void add(DocumentBuilder builder, int line, int column, String val) {
      CSVParser parser = new CSVParser(new StringReader(val), strategy);
      try {
        String[] vals = parser.getLine();
        if (vals!=null) {
          for (String v: vals) base.add(builder,line,column,v);
        } else {
          base.add(builder,line,column,val);
        }
      } catch (IOException e) {
        throw new SolrException(400,"");
      }
    }
  }
  String errHeader="CSVLoader:";
  CSVLoader(SolrQueryRequest req) {
    this.params = req.getParams();
    handler = req.getCore().getUpdateHandler();
    schema = req.getSchema();
    templateAdd = new AddUpdateCommand();
    templateAdd.allowDups=false;
    templateAdd.overwriteCommitted=true;
    templateAdd.overwritePending=true;
    if (params.getBool(OVERWRITE,true)) {
      templateAdd.allowDups=false;
      templateAdd.overwriteCommitted=true;
      templateAdd.overwritePending=true;
    } else {
      templateAdd.allowDups=true;
      templateAdd.overwriteCommitted=false;
      templateAdd.overwritePending=false;
    }
    strategy = new CSVStrategy(',', '"', CSVStrategy.COMMENTS_DISABLED, true,  false, true);
    String sep = params.get(SEPARATOR);
    if (sep!=null) {
      if (sep.length()!=1) throw new SolrException(400,"Invalid separator:'"+sep+"'");
      strategy.setDelimiter(sep.charAt(0));
    }
    String encapsulator = params.get(ENCAPSULATOR);
    if (encapsulator!=null) {
      if (encapsulator.length()!=1) throw new SolrException(400,"Invalid encapsulator:'"+sep+"'");
      strategy.setEncapsulator(encapsulator.charAt(0));
    }
    String fn = params.get(FIELDNAMES);
    fieldnames = fn != null ? commaSplit.split(fn,-1) : null;
    Boolean hasHeader = params.getBool(HEADER);
    if (fieldnames==null) {
      if (null == hasHeader) {
        // assume the file has the headers if they aren't supplied in the args
        hasHeader=true;
      } else if (hasHeader) {
        throw new SolrException(400,"CSVLoader: must specify fieldnames=<fields>* or header=true");
      }
    } else {
      // if the fieldnames were supplied and the file has a header, we need to
      // skip over that header.
      if (hasHeader!=null && hasHeader) skipLines=1;
      prepareFields();
    }
  }
  /** create the FieldAdders that control how each field  is indexed */
  void prepareFields() {
    // Possible future optimization: for really rapid incremental indexing
    // from a POST, one could cache all of this setup info based on the params.
    // The link from FieldAdder to this would need to be severed for that to happen.
    fields = new SchemaField[fieldnames.length];
    adders = new CSVLoader.FieldAdder[fieldnames.length];
    String skipStr = params.get(SKIP);
    List<String> skipFields = skipStr==null ? null : StrUtils.splitSmart(skipStr,',');
    CSVLoader.FieldAdder adder = new CSVLoader.FieldAdder();
    CSVLoader.FieldAdder adderKeepEmpty = new CSVLoader.FieldAdderEmpty();
    for (int i=0; i<fields.length; i++) {
      String fname = fieldnames[i];
      // to skip a field, leave the entries in fields and addrs null
      if (fname.length()==0 || (skipFields!=null && skipFields.contains(fname))) continue;
      fields[i] = schema.getField(fname);
      boolean keepEmpty = params.getFieldBool(fname,EMPTY,false);
      adders[i] = keepEmpty ? adderKeepEmpty : adder;
      // Order that operations are applied: split -> trim -> map -> add
      // so create in reverse order.
      // Creation of FieldAdders could be optimized and shared among fields
      String[] fmap = params.getFieldParams(fname,MAP);
      if (fmap!=null) {
        for (String mapRule : fmap) {
          String[] mapArgs = colonSplit.split(mapRule,-1);
          if (mapArgs.length!=2)
            throw new SolrException(400, "Map rules must be of the form 'from:to' ,got '"+mapRule+"'");
          adders[i] = new CSVLoader.FieldMapperSingle(mapArgs[0], mapArgs[1], adders[i]);
        }
      }
      if (params.getFieldBool(fname,TRIM,false)) {
        adders[i] = new CSVLoader.FieldTrimmer(adders[i]);
      }
      if (params.getFieldBool(fname,SPLIT,false)) {
        String sepStr = params.getFieldParam(fname,SEPARATOR);
        char fsep = sepStr==null || sepStr.length()==0 ? ',' : sepStr.charAt(0);
        String encStr = params.getFieldParam(fname,ENCAPSULATOR);
        char fenc = encStr==null || encStr.length()==0 ? '\'' : encStr.charAt(0);
        CSVStrategy fstrat = new CSVStrategy(fsep,fenc,CSVStrategy.COMMENTS_DISABLED);
        adders[i] = new CSVLoader.FieldSplitter(fstrat, adders[i]);
      }
    }
  }
  private void input_err(String msg, String[] line, int lineno) {
    StringBuilder sb = new StringBuilder();
    sb.append(errHeader+", line="+lineno + ","+msg+"\n\tvalues={");
    for (String val: line) { sb.append("'"+val+"',"); }
    sb.append('}');
    throw new SolrException(400,sb.toString());
  }
  /** load the CSV input */
  void load(Reader input) throws IOException {
    Reader reader = input;
    if (skipLines>0) {
      if (!(reader instanceof BufferedReader)) {
        reader = new BufferedReader(reader);
      }
      BufferedReader r = (BufferedReader)reader;
      for (int i=0; i<skipLines; i++) {
        r.readLine();
      }
    }
    CSVParser parser = new CSVParser(reader, strategy);
    // parse the fieldnames from the header of the file
    if (fieldnames==null) {
      fieldnames = parser.getLine();
      if (fieldnames==null) {
        throw new SolrException(400,"Expected fieldnames in CSV input");
      }
      prepareFields();
    }
    // read the rest of the CSV file
    for(;;) {
      int line = parser.getLineNumber();  // for error reporting in MT mode
      String[] vals = parser.getLine();
      if (vals==null) break;
      if (vals.length != fields.length) {
        input_err("expected "+fields.length+" values but got "+vals.length, vals, line);
      }
      addDoc(line,vals);
    }
    if (params.getBool(COMMIT,true)) {
      handler.commit(new CommitUpdateCommand(false));
    }
  }
  /** called for each line of values (document) */
  abstract void addDoc(int line, String[] vals) throws IOException;
  /** this must be MT safe... may be called concurrently from multiple threads. */
  void doAdd(int line, String[] vals, DocumentBuilder builder, AddUpdateCommand template) throws IOException {
    // the line number is passed simply for error reporting in MT mode.
    // first, create the lucene document
    builder.startDoc();
    for (int i=0; i<vals.length; i++) {
      if (fields[i]==null) continue;  // ignore this field
      String val = vals[i];
      adders[i].add(builder, line, i, val);
    }
    builder.endDoc();
    template.doc = builder.getDoc();
    handler.addDoc(template);
  }
 }
 class SingleThreadedCSVLoader extends CSVLoader {
  protected DocumentBuilder builder;
  SingleThreadedCSVLoader(SolrQueryRequest req) {
    super(req);
    builder = new DocumentBuilder(schema);
  }
  void addDoc(int line, String[] vals) throws IOException {
    templateAdd.indexedId = null;
    doAdd(line, vals, builder, templateAdd);
  }
 }
--- a/src/java/org/apache/solr/request/SolrParams.java
+++ b/src/java/org/apache/solr/request/SolrParams.java
@ -165,6 +165,14 @@ public abstract class SolrParams {
    return val!=null ? val : get(param);
  }
  /** returns the String values of the field parameter, "f.field.param", or
   *  the values for "param" if that is not set.
   */
  public String[] getFieldParams(String field, String param) {
    String[] val = getParams(fpname(field,param));
    return val!=null ? val : getParams(param);
  }
  /** Returns the Boolean value of the param, or null if not set */
  public Boolean getBool(String param) {
    String val = get(param);
--- a/src/java/org/apache/solr/util/TestHarness.java
+++ b/src/java/org/apache/solr/util/TestHarness.java
@ -197,9 +197,22 @@ public class TestHarness {
   * @see LocalSolrQueryRequest
   */
  public String query(SolrQueryRequest req) throws IOException, Exception {
    return query(req.getQueryType(), req);
  }
  /**
   * Processes a "query" using a user constructed SolrQueryRequest
   *
   * @param handler the name of the request handler to process the request
   * @param req the Query to process, will be closed.
   * @return The XML response to the query
   * @exception Exception any exception in the response.
   * @exception IOException if there is a problem writing the XML
   * @see LocalSolrQueryRequest
   */
  public String query(String handler, SolrQueryRequest req) throws IOException, Exception {
    SolrQueryResponse rsp = new SolrQueryResponse();
-    core.execute(req,rsp);
+    core.execute(core.getRequestHandler(handler),req,rsp);
    if (rsp.getException() != null) {
      throw rsp.getException();
    }
--- a/src/test/org/apache/solr/handler/TestCSVLoader.java
+++ b/src/test/org/apache/solr/handler/TestCSVLoader.java
@ -0,0 +1,239 @@
 /**
 * Licensed to the Apache Software Foundation (ASF) under one or more
 * contributor license agreements.  See the NOTICE file distributed with
 * this work for additional information regarding copyright ownership.
 * The ASF licenses this file to You under the Apache License, Version 2.0
 * (the "License"); you may not use this file except in compliance with
 * the License.  You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
 package org.apache.solr.handler;
 import org.apache.solr.util.AbstractSolrTestCase;
 import org.apache.solr.util.ContentStream;
 import org.apache.solr.util.ContentStreamBase;
 import org.apache.solr.request.SolrQueryRequest;
 import org.apache.solr.request.LocalSolrQueryRequest;
 import org.apache.solr.core.SolrException;
 import java.io.*;
 import java.util.List;
 import java.util.ArrayList;
 public class TestCSVLoader extends AbstractSolrTestCase {
  public String getSchemaFile() { return "schema.xml"; }
  public String getSolrConfigFile() { return "solrconfig.xml"; }
  String filename = "solr_tmp.csv";
  String def_charset = "UTF-8";
  File file = new File(filename);
  public void setUp() throws Exception {
    // if you override setUp or tearDown, you better call
    // the super classes version
    super.setUp();
  }
  public void tearDown() throws Exception {
    // if you override setUp or tearDown, you better call
    // the super classes version
    super.tearDown();
    deleteFile();
  }
  void makeFile(String contents) {
    makeFile(contents,def_charset);
  }
  void makeFile(String contents, String charset) {
    try {
      Writer out = new OutputStreamWriter(new FileOutputStream(filename), charset);
      out.write(contents);
      out.close();
    } catch (Exception e) {
      throw new RuntimeException(e);
    }
  }
  void deleteFile() {
    file.delete();
  }
  void cleanup() {
    assertU(delQ("id:[100 TO 110]"));
    assertU(commit());
  }
  void loadLocal(String... args) throws Exception {
    LocalSolrQueryRequest req =  (LocalSolrQueryRequest)req(args);
    // TODO: stop using locally defined streams once stream.file and
    // stream.body work everywhere
    List<ContentStream> cs = new ArrayList<ContentStream>();
    cs.add(new ContentStreamBase.FileStream(new File(filename)));
    req.setContentStreams(cs);
    h.query("/update/csv",req);
  }
  public void testCSVLoad() throws Exception {
    makeFile("id\n100\n101\n102");
    loadLocal("stream.file",filename);
    // csv loader currently defaults to committing
    // assertU(commit());
    assertQ(req("id:[100 TO 110]"),"//*[@numFound='3']");
  }
  public void testCommitFalse() throws Exception {
    makeFile("id\n100\n101\n102");
    loadLocal("stream.file",filename,"commit","false");
    assertQ(req("id:[100 TO 110]"),"//*[@numFound='0']");
    assertU(commit());
    assertQ(req("id:[100 TO 110]"),"//*[@numFound='3']");
  }
  public void testCommitTrue() throws Exception {
    makeFile("id\n100\n101\n102");
    loadLocal("stream.file",filename,"commit","true");
    assertQ(req("id:[100 TO 110]"),"//*[@numFound='3']");
  }
  public void testCSV() throws Exception {
    lrf.args.put("version","2.0");
    makeFile("id,str_s\n100,\"quoted\"\n101,\n102,\"\"\n103,");
    loadLocal("stream.file",filename,"commit","true");
    assertQ(req("id:[100 TO 110]"),"//*[@numFound='4']");
    assertQ(req("id:100"),"//str[@name='str_s'][.='quoted']");
    assertQ(req("id:101"),"count(//str[@name='str_s'])=0");
    // 102 is a quoted zero length field ,"", as opposed to ,,
    // but we can't distinguish this case (and it's debateable
    // if we should).  Does CSV have a way to specify missing
    // from zero-length?
    assertQ(req("id:102"),"count(//str[@name='str_s'])=0");
    assertQ(req("id:103"),"count(//str[@name='str_s'])=0");
    // test overwrite by default
    loadLocal("stream.file",filename, "commit","true");
    assertQ(req("id:[100 TO 110]"),"//*[@numFound='4']");
    // test no overwrites
    loadLocal("stream.file",filename, "commit","true", "overwrite","false");
    assertQ(req("id:[100 TO 110]"),"//*[@numFound='8']");
    // test overwrite
    loadLocal("stream.file",filename, "commit","true");
    assertQ(req("id:[100 TO 110]"),"//*[@numFound='4']");
    // test global value mapping
    loadLocal("stream.file",filename, "commit","true", "map","quoted:QUOTED");
    assertQ(req("id:[100 TO 110]"),"//*[@numFound='4']");
    assertQ(req("id:100"),"//str[@name='str_s'][.='QUOTED']");
    assertQ(req("id:101"),"count(//str[@name='str_s'])=0");
    assertQ(req("id:102"),"count(//str[@name='str_s'])=0");
    assertQ(req("id:103"),"count(//str[@name='str_s'])=0");
    // test value mapping to empty (remove)
    loadLocal("stream.file",filename, "commit","true", "map","quoted:");
    assertQ(req("id:[100 TO 110]"),"//*[@numFound='4']");
    assertQ(req("id:100"),"count(//str[@name='str_s'])=0");
    // test value mapping from empty
    loadLocal("stream.file",filename, "commit","true", "map",":EMPTY");
    assertQ(req("id:[100 TO 110]"),"//*[@numFound='4']");
    assertQ(req("id:100"),"//str[@name='str_s'][.='quoted']");
    assertQ(req("id:101"),"//str[@name='str_s'][.='EMPTY']");
    assertQ(req("id:102"),"//str[@name='str_s'][.='EMPTY']");
    assertQ(req("id:103"),"//str[@name='str_s'][.='EMPTY']");
    // test multiple map rules
    loadLocal("stream.file",filename, "commit","true", "map",":EMPTY", "map","quoted:QUOTED");
    assertQ(req("id:[100 TO 110]"),"//*[@numFound='4']");
    assertQ(req("id:100"),"//str[@name='str_s'][.='QUOTED']");
    assertQ(req("id:101"),"//str[@name='str_s'][.='EMPTY']");
    assertQ(req("id:102"),"//str[@name='str_s'][.='EMPTY']");
    assertQ(req("id:103"),"//str[@name='str_s'][.='EMPTY']");
    // test indexing empty fields
    loadLocal("stream.file",filename, "commit","true", "f.str_s.keepEmpty","true");
    assertQ(req("id:[100 TO 110]"),"//*[@numFound='4']");
    assertQ(req("id:100"),"//str[@name='str_s'][.='quoted']");
    assertQ(req("id:101"),"//str[@name='str_s'][.='']");
    assertQ(req("id:102"),"//str[@name='str_s'][.='']");
    assertQ(req("id:103"),"//str[@name='str_s'][.='']");
    // test overriding the name of fields
    loadLocal("stream.file",filename, "commit","true",
             "fieldnames","id,my_s", "header","true",
             "f.my_s.map",":EMPTY");
    assertQ(req("id:[100 TO 110]"),"//*[@numFound='4']");
    assertQ(req("id:100"),"//str[@name='my_s'][.='quoted']");
    assertQ(req("id:101"),"count(//str[@name='str_s'])=0");
    assertQ(req("id:102"),"count(//str[@name='str_s'])=0");
    assertQ(req("id:103"),"count(//str[@name='str_s'])=0");
    assertQ(req("id:101"),"//str[@name='my_s'][.='EMPTY']");
    assertQ(req("id:102"),"//str[@name='my_s'][.='EMPTY']");
    assertQ(req("id:103"),"//str[@name='my_s'][.='EMPTY']");
    // test that header in file was skipped
    assertQ(req("id:id"),"//*[@numFound='0']");
    // test loading file as if it didn't have a header
    loadLocal("stream.file",filename, "commit","true",
             "fieldnames","id,my_s", "header","false");
    assertQ(req("id:id"),"//*[@numFound='1']");
    assertQ(req("id:100"),"//str[@name='my_s'][.='quoted']");
    // test multi-valued fields via field splitting w/ mapping of subvalues
    makeFile("id,str_s\n"
            +"100,\"quoted\"\n"
            +"101,\"a,b,c\"\n"
            +"102,\"a,,b\"\n"
            +"103,\n");
    loadLocal("stream.file",filename, "commit","true",
              "f.str_s.map",":EMPTY",
              "f.str_s.split","true");
    assertQ(req("id:[100 TO 110]"),"//*[@numFound='4']");
    assertQ(req("id:100"),"//str[@name='str_s'][.='quoted']");
    assertQ(req("id:101"),"//arr[@name='str_s']/str[1][.='a']");
    assertQ(req("id:101"),"//arr[@name='str_s']/str[2][.='b']");
    assertQ(req("id:101"),"//arr[@name='str_s']/str[3][.='c']");
    assertQ(req("id:102"),"//arr[@name='str_s']/str[2][.='EMPTY']");
    assertQ(req("id:103"),"//str[@name='str_s'][.='EMPTY']");
    // test alternate values for delimiters
    makeFile("id|str_s\n"
            +"100|^quoted^\n"
            +"101|a;'b';c\n"
            +"102|a;;b\n"
            +"103|\n");
    loadLocal("stream.file",filename, "commit","true",
              "separator","|",
              "encapsulator","^",
              "f.str_s.map",":EMPTY",
              "f.str_s.split","true",
              "f.str_s.separator",";",
              "f.str_s.encapsulator","'"
    );
    assertQ(req("id:[100 TO 110]"),"//*[@numFound='4']");
    assertQ(req("id:100"),"//str[@name='str_s'][.='quoted']");
    assertQ(req("id:101"),"//arr[@name='str_s']/str[1][.='a']");
    assertQ(req("id:101"),"//arr[@name='str_s']/str[2][.='b']");
    assertQ(req("id:101"),"//arr[@name='str_s']/str[3][.='c']");
    assertQ(req("id:102"),"//arr[@name='str_s']/str[2][.='EMPTY']");
    assertQ(req("id:103"),"//str[@name='str_s'][.='EMPTY']");
  }
 }
--- a/src/test/test-files/solr/conf/solrconfig.xml
+++ b/src/test/test-files/solr/conf/solrconfig.xml
@ -263,6 +263,9 @@
    </lst>
  </requestHandler>
  <requestHandler name="/update/csv" class="solr.CSVRequestHandler" startup="lazy">
  </requestHandler>
  <!-- enable streaming for testing... -->
  <requestParsers enableRemoteStreaming="true" multipartUploadLimitInKB="2048" />
@ -275,5 +278,4 @@
  <propTest attr1="${solr.test.sys.prop1}-$${literal}"
            attr2="${non.existent.sys.prop:default-from-config}">prefix-${solr.test.sys.prop2}-suffix</propTest>
 </config>
		`@ -0,0 +1,2 @@`
							`AnyObjectId[8e096258a36f86e9b956e52f55df0b5afbe8999f] was removed in git history.`
							`Apache SVN contains full history.`