SOLR-1065 -- A ContentStreamDataSource which can accept HTTP POST data in a content stream. This can be used to push data to Solr instead of just pulling it from DB/Files/URLs

git-svn-id: https://svn.apache.org/repos/asf/lucene/solr/trunk@755141 13f79535-47bb-0310-9956-ffa450edef68
2009-03-17 07:50:09 +00:00 · 2009-03-17 07:50:09 +00:00 · 7de4dee17c
parent 2d4d167ced
commit 7de4dee17c
11 changed files with 682 additions and 13 deletions
--- a/contrib/dataimporthandler/CHANGES.txt
+++ b/contrib/dataimporthandler/CHANGES.txt
@ -93,6 +93,10 @@ New Features
 21.SOLR-1062: A LogTransformer which can log data in a given template format.
              (Jon Baer, Noble Paul via shalin)

+22.SOLR-1065: A ContentStreamDataSource which can accept HTTP POST data in a content stream. This can be used to
+              push data to Solr instead of just pulling it from DB/Files/URLs.
+              (Noble Paul via shalin)
+
 Optimizations
 ----------------------
 1. SOLR-846:  Reduce memory consumption during delta import by removing keys when used
--- a/contrib/dataimporthandler/build.xml
+++ b/contrib/dataimporthandler/build.xml
@ -26,7 +26,12 @@
  <description>
    Data Import Handler
  </description>
-
+  <path id="classpath.jetty">
+    <!-- jetty -->
+    <fileset dir="${solr-path}/example/lib">
+      <include name="**/*.jar" />
+    </fileset>
+  </path>
  <path id="common.classpath">
  	<pathelement location="${solr-path}/build/solr" />
  	<pathelement location="${solr-path}/build/solrj" />
@ -35,6 +40,7 @@
 	
  <path id="test.classpath">
  	<path refid="common.classpath" />
+  	<path refid="classpath.jetty" />
 	  <pathelement path="target/classes" />
  	<pathelement path="target/test-classes" />
    <pathelement path="${java.class.path}"/>
--- a/contrib/dataimporthandler/src/main/java/org/apache/solr/handler/dataimport/ContentStreamDataSource.java
+++ b/contrib/dataimporthandler/src/main/java/org/apache/solr/handler/dataimport/ContentStreamDataSource.java
@ -0,0 +1,67 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.solr.handler.dataimport;
+
+import org.apache.solr.common.util.ContentStream;
+import static org.apache.solr.handler.dataimport.DataImportHandlerException.SEVERE;
+
+import java.io.IOException;
+import java.io.Reader;
+import java.util.Properties;
+
+/**
+ * A DataSource implementation which reads from the ContentStream of a POST request
+ * <p/>
+ * Refer to <a href="http://wiki.apache.org/solr/DataImportHandler">http://wiki.apache.org/solr/DataImportHandler</a>
+ * for more details.
+ * <p/>
+ * <b>This API is experimental and may change in the future.</b>
+ *
+ * @version $Id$
+ * @since solr 1.4
+ */
+public class ContentStreamDataSource extends DataSource<Reader> {
+  private ContextImpl context;
+  private ContentStream contentStream;
+  private Reader reader;
+
+  public void init(Context context, Properties initProps) {
+    this.context = (ContextImpl) context;
+  }
+
+  public Reader getData(String query) {
+    contentStream = context.docBuilder.requestParameters.contentStream;
+    if (contentStream == null)
+      throw new DataImportHandlerException(SEVERE, "No stream available. The request has no body");
+    try {
+      return reader = contentStream.getReader();
+    } catch (IOException e) {
+      DataImportHandlerException.wrapAndThrow(SEVERE, e);
+      return null;
+    }
+  }
+
+  public void close() {
+    if (contentStream != null) {
+      try {
+        if (reader == null) reader = contentStream.getReader();
+        reader.close();
+      } catch (IOException e) {
+      }
+    }
+  }
+}
--- a/contrib/dataimporthandler/src/main/java/org/apache/solr/handler/dataimport/ContextImpl.java
+++ b/contrib/dataimporthandler/src/main/java/org/apache/solr/handler/dataimport/ContextImpl.java
@ -50,7 +50,7 @@ public class ContextImpl extends Context {

  private Map<String, Object> entitySession, globalSession, docSession;

-  private DocBuilder docBuilder;
+  DocBuilder docBuilder;

  public ContextImpl(DataConfig.Entity entity, VariableResolverImpl resolver,
                     DataSource ds, int currProcess,
--- a/contrib/dataimporthandler/src/main/java/org/apache/solr/handler/dataimport/DataImportHandler.java
+++ b/contrib/dataimporthandler/src/main/java/org/apache/solr/handler/dataimport/DataImportHandler.java
@ -24,6 +24,7 @@ import org.apache.solr.common.params.SolrParams;
 import org.apache.solr.common.params.UpdateParams;
 import org.apache.solr.common.util.ContentStreamBase;
 import org.apache.solr.common.util.NamedList;
+import org.apache.solr.common.util.ContentStream;
 import org.apache.solr.core.SolrConfig;
 import org.apache.solr.core.SolrCore;
 import org.apache.solr.core.SolrResourceLoader;
@ -113,7 +114,13 @@ public class DataImportHandler extends RequestHandlerBase implements
    SolrParams params = req.getParams();
    DataImporter.RequestParams requestParams = new DataImporter.RequestParams(getParamsMap(params));
    String command = requestParams.command;
-
+    Iterable<ContentStream> streams = req.getContentStreams();
+    if(streams != null){
+      for (ContentStream stream : streams) {
+          requestParams.contentStream = stream;
+          break;
+      }
+    }
    if (DataImporter.SHOW_CONF_CMD.equals(command)) {
      // Modify incoming request params to add wt=raw
      ModifiableSolrParams rawParams = new ModifiableSolrParams(req.getParams());
@ -186,7 +193,11 @@ public class DataImportHandler extends RequestHandlerBase implements
          }
        } else {
          // Asynchronous request for normal mode
-          importer.runAsync(requestParams, sw);
+          if(requestParams.contentStream == null){
+            importer.runAsync(requestParams, sw);
+          } else {
+              importer.runCmd(requestParams, sw);
+          }
        }
      } else if (DataImporter.RELOAD_CONF_CMD.equals(command)) {
        importer = null;
--- a/contrib/dataimporthandler/src/main/java/org/apache/solr/handler/dataimport/DataImporter.java
+++ b/contrib/dataimporthandler/src/main/java/org/apache/solr/handler/dataimport/DataImporter.java
@ -21,6 +21,7 @@ import org.apache.solr.core.SolrConfig;
 import org.apache.solr.core.SolrCore;
 import org.apache.solr.schema.IndexSchema;
 import org.apache.solr.schema.SchemaField;
+import org.apache.solr.common.util.ContentStream;
 import org.slf4j.Logger;
 import org.slf4j.LoggerFactory;
 import org.w3c.dom.Document;
@ -469,6 +470,8 @@ public class DataImporter {

    public String dataConfig;

+    public ContentStream contentStream;
+
    public RequestParams() {
    }

--- a/contrib/dataimporthandler/src/test/java/org/apache/solr/handler/dataimport/TestContentStreamDataSource.java
+++ b/contrib/dataimporthandler/src/test/java/org/apache/solr/handler/dataimport/TestContentStreamDataSource.java
@ -0,0 +1,154 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.solr.handler.dataimport;
+
+import junit.framework.TestCase;
+import org.apache.commons.io.FileUtils;
+import org.apache.solr.client.solrj.embedded.JettySolrRunner;
+import org.apache.solr.client.solrj.impl.CommonsHttpSolrServer;
+import org.apache.solr.client.solrj.request.DirectXmlRequest;
+import org.apache.solr.client.solrj.response.QueryResponse;
+import org.apache.solr.common.SolrDocument;
+import org.apache.solr.common.SolrDocumentList;
+import org.apache.solr.common.params.ModifiableSolrParams;
+import org.apache.solr.util.AbstractSolrTestCase;
+
+import java.io.File;
+
+/**
+ * Test for ContentStreamDataSource
+ *
+ * @version $Id$
+ * @since solr 1.4
+ */
+public class TestContentStreamDataSource extends TestCase {
+  private static final String CONF_DIR = "." + File.separator + "solr" + File.separator + "conf" + File.separator;
+  SolrInstance instance = null;
+  JettySolrRunner jetty;
+
+
+  public void setUp() throws Exception {
+    instance = new SolrInstance("inst", null);
+    instance.setUp();
+    jetty = createJetty(instance);
+
+  }
+
+  public void testSimple() throws Exception {
+    DirectXmlRequest req = new DirectXmlRequest("/dataimport", xml);
+    ModifiableSolrParams params = new ModifiableSolrParams();
+    params.set("command", "full-import");
+    params.set("clean", "false");
+    req.setParams(params);
+    String url = "http://localhost:" + jetty.getLocalPort() + "/solr";
+    CommonsHttpSolrServer solrServer = new CommonsHttpSolrServer(url);
+    solrServer.request(req);
+    ModifiableSolrParams qparams = new ModifiableSolrParams();
+    qparams.add("q", "*:*");
+    QueryResponse qres = solrServer.query(qparams);
+    SolrDocumentList results = qres.getResults();
+    assertEquals(2, results.getNumFound());
+    SolrDocument doc = results.get(0);
+    assertEquals("1", doc.getFieldValue("id"));
+    assertEquals("Hello C1", doc.getFieldValue("desc"));
+  }
+
+  private class SolrInstance extends AbstractSolrTestCase {
+    String name;
+    Integer port;
+    File homeDir;
+    File confDir;
+
+    /**
+     * if masterPort is null, this instance is a master -- otherwise this instance is a slave, and assumes the master is
+     * on localhost at the specified port.
+     */
+    public SolrInstance(String name, Integer port) {
+      this.name = name;
+      this.port = port;
+    }
+
+    public String getHomeDir() {
+      return homeDir.toString();
+    }
+
+    @Override
+    public String getSchemaFile() {
+      return CONF_DIR + "dataimport-schema.xml";
+    }
+
+    public String getConfDir() {
+      return confDir.toString();
+    }
+
+    public String getDataDir() {
+      return dataDir.toString();
+    }
+
+    @Override
+    public String getSolrConfigFile() {
+      return CONF_DIR + "contentstream-solrconfig.xml";
+    }
+
+    public void setUp() throws Exception {
+
+      String home = System.getProperty("java.io.tmpdir")
+              + File.separator
+              + getClass().getName() + "-" + System.currentTimeMillis();
+
+
+      homeDir = new File(home + "inst");
+      dataDir = new File(homeDir, "data");
+      confDir = new File(homeDir, "conf");
+
+      homeDir.mkdirs();
+      dataDir.mkdirs();
+      confDir.mkdirs();
+
+      File f = new File(confDir, "solrconfig.xml");
+      FileUtils.copyFile(new File(getSolrConfigFile()), f);
+      f = new File(confDir, "schema.xml");
+
+      FileUtils.copyFile(new File(getSchemaFile()), f);
+      f = new File(confDir, "data-config.xml");
+      FileUtils.copyFile(new File(CONF_DIR + "dataconfig-contentstream.xml"), f);
+    }
+
+    public void tearDown() throws Exception {
+      super.tearDown();
+      AbstractSolrTestCase.recurseDelete(homeDir);
+    }
+  }
+
+  private JettySolrRunner createJetty(SolrInstance instance) throws Exception {
+    System.setProperty("solr.solr.home", instance.getHomeDir());
+    System.setProperty("solr.data.dir", instance.getDataDir());
+    JettySolrRunner jetty = new JettySolrRunner("/solr", 0);
+    jetty.start();
+    return jetty;
+  }
+
+  static String xml = "<root>\n"
+          + "<b>\n"
+          + "  <id>1</id>\n"
+          + "  <c>Hello C1</c>\n"
+          + "</b>\n"
+          + "<b>\n"
+          + "  <id>2</id>\n"
+          + "  <c>Hello C2</c>\n"
+          + "</b>\n" + "</root>";
+}
--- a/contrib/dataimporthandler/src/test/resources/solr/conf/contentstream-solrconfig.xml
+++ b/contrib/dataimporthandler/src/test/resources/solr/conf/contentstream-solrconfig.xml
@ -0,0 +1,408 @@
+<?xml version="1.0" encoding="UTF-8" ?>
+<!--
+ Licensed to the Apache Software Foundation (ASF) under one or more
+ contributor license agreements.  See the NOTICE file distributed with
+ this work for additional information regarding copyright ownership.
+ The ASF licenses this file to You under the Apache License, Version 2.0
+ (the "License"); you may not use this file except in compliance with
+ the License.  You may obtain a copy of the License at
+
+     http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License.
+-->
+
+<config>
+  <!-- Set this to 'false' if you want solr to continue working after it has 
+       encountered an severe configuration error.  In a production environment, 
+       you may want solr to keep working even if one handler is mis-configured.
+
+       You may also set this to false using by setting the system property:
+         -Dsolr.abortOnConfigurationError=false
+     -->
+  <abortOnConfigurationError>${solr.abortOnConfigurationError:true}</abortOnConfigurationError>
+
+  <!-- Used to specify an alternate directory to hold all index data
+       other than the default ./data under the Solr home.
+       If replication is in use, this should match the replication configuration. -->
+       <dataDir>${solr.data.dir:./solr/data}</dataDir>
+
+
+  <indexDefaults>
+   <!-- Values here affect all index writers and act as a default unless overridden. -->
+    <useCompoundFile>false</useCompoundFile>
+
+    <mergeFactor>10</mergeFactor>
+    <!--
+     If both ramBufferSizeMB and maxBufferedDocs is set, then Lucene will flush based on whichever limit is hit first.
+
+     -->
+    <!--<maxBufferedDocs>1000</maxBufferedDocs>-->
+    <!-- Tell Lucene when to flush documents to disk.
+    Giving Lucene more memory for indexing means faster indexing at the cost of more RAM
+
+    If both ramBufferSizeMB and maxBufferedDocs is set, then Lucene will flush based on whichever limit is hit first.
+
+    -->
+    <ramBufferSizeMB>32</ramBufferSizeMB>
+    <maxMergeDocs>2147483647</maxMergeDocs>
+    <maxFieldLength>10000</maxFieldLength>
+    <writeLockTimeout>1000</writeLockTimeout>
+    <commitLockTimeout>10000</commitLockTimeout>
+
+    <!--
+     Expert: Turn on Lucene's auto commit capability.
+
+     TODO: Add recommendations on why you would want to do this.
+
+     NOTE: Despite the name, this value does not have any relation to Solr's autoCommit functionality
+
+     -->
+    <!--<luceneAutoCommit>false</luceneAutoCommit>-->
+    <!--
+     Expert:
+     The Merge Policy in Lucene controls how merging is handled by Lucene.  The default in 2.3 is the LogByteSizeMergePolicy, previous
+     versions used LogDocMergePolicy.
+
+     LogByteSizeMergePolicy chooses segments to merge based on their size.  The Lucene 2.2 default, LogDocMergePolicy chose when
+     to merge based on number of documents
+
+     Other implementations of MergePolicy must have a no-argument constructor
+     -->
+    <!--<mergePolicy>org.apache.lucene.index.LogByteSizeMergePolicy</mergePolicy>-->
+
+    <!--
+     Expert:
+     The Merge Scheduler in Lucene controls how merges are performed.  The ConcurrentMergeScheduler (Lucene 2.3 default)
+      can perform merges in the background using separate threads.  The SerialMergeScheduler (Lucene 2.2 default) does not.
+     -->
+    <!--<mergeScheduler>org.apache.lucene.index.ConcurrentMergeScheduler</mergeScheduler>-->
+
+    <!--
+      As long as Solr is the only process modifying your index, it is
+      safe to use Lucene's in process locking mechanism.  But you may
+      specify one of the other Lucene LockFactory implementations in
+      the event that you have a custom situation.
+      
+      none = NoLockFactory (typically only used with read only indexes)
+      single = SingleInstanceLockFactory (suggested)
+      native = NativeFSLockFactory
+      simple = SimpleFSLockFactory
+
+      ('simple' is the default for backwards compatibility with Solr 1.2)
+    -->
+    <lockType>single</lockType>
+  </indexDefaults>
+
+  <mainIndex>
+    <!-- options specific to the main on-disk lucene index -->
+    <useCompoundFile>false</useCompoundFile>
+    <ramBufferSizeMB>32</ramBufferSizeMB>
+    <mergeFactor>10</mergeFactor>
+    <!-- Deprecated -->
+    <!--<maxBufferedDocs>1000</maxBufferedDocs>-->
+    <maxMergeDocs>2147483647</maxMergeDocs>
+    <maxFieldLength>10000</maxFieldLength>
+
+    <!-- If true, unlock any held write or commit locks on startup. 
+         This defeats the locking mechanism that allows multiple
+         processes to safely access a lucene index, and should be
+         used with care.
+         This is not needed if lock type is 'none' or 'single'
+     -->
+    <unlockOnStartup>false</unlockOnStartup>
+  </mainIndex>
+
+  <!-- the default high-performance update handler -->
+  <updateHandler class="solr.DirectUpdateHandler2">
+
+    <!-- A prefix of "solr." for class names is an alias that
+         causes solr to search appropriate packages, including
+         org.apache.solr.(search|update|request|core|analysis)
+     -->
+
+    <!-- Limit the number of deletions Solr will buffer during doc updating.
+        
+        Setting this lower can help bound memory use during indexing.
+    -->
+    <maxPendingDeletes>100000</maxPendingDeletes>
+
+  </updateHandler>
+
+
+  <query>
+    <!-- Maximum number of clauses in a boolean query... can affect
+        range or prefix queries that expand to big boolean
+        queries.  An exception is thrown if exceeded.  -->
+    <maxBooleanClauses>1024</maxBooleanClauses>
+
+    
+    <!-- Cache used by SolrIndexSearcher for filters (DocSets),
+         unordered sets of *all* documents that match a query.
+         When a new searcher is opened, its caches may be prepopulated
+         or "autowarmed" using data from caches in the old searcher.
+         autowarmCount is the number of items to prepopulate.  For LRUCache,
+         the autowarmed items will be the most recently accessed items.
+       Parameters:
+         class - the SolrCache implementation (currently only LRUCache)
+         size - the maximum number of entries in the cache
+         initialSize - the initial capacity (number of entries) of
+           the cache.  (seel java.util.HashMap)
+         autowarmCount - the number of entries to prepopulate from
+           and old cache.
+         -->
+    <filterCache
+      class="solr.LRUCache"
+      size="512"
+      initialSize="512"
+      autowarmCount="256"/>
+
+   <!-- queryResultCache caches results of searches - ordered lists of
+         document ids (DocList) based on a query, a sort, and the range
+         of documents requested.  -->
+    <queryResultCache
+      class="solr.LRUCache"
+      size="512"
+      initialSize="512"
+      autowarmCount="256"/>
+
+  <!-- documentCache caches Lucene Document objects (the stored fields for each document).
+       Since Lucene internal document ids are transient, this cache will not be autowarmed.  -->
+    <documentCache
+      class="solr.LRUCache"
+      size="512"
+      initialSize="512"
+      autowarmCount="0"/>
+
+    <!-- If true, stored fields that are not requested will be loaded lazily.
+
+    This can result in a significant speed improvement if the usual case is to
+    not load all stored fields, especially if the skipped fields are large compressed
+    text fields.
+    -->
+    <enableLazyFieldLoading>true</enableLazyFieldLoading>
+
+    <!-- Example of a generic cache.  These caches may be accessed by name
+         through SolrIndexSearcher.getCache(),cacheLookup(), and cacheInsert().
+         The purpose is to enable easy caching of user/application level data.
+         The regenerator argument should be specified as an implementation
+         of solr.search.CacheRegenerator if autowarming is desired.  -->
+    <!--
+    <cache name="myUserCache"
+      class="solr.LRUCache"
+      size="4096"
+      initialSize="1024"
+      autowarmCount="1024"
+      regenerator="org.mycompany.mypackage.MyRegenerator"
+      />
+    -->
+
+   <!-- An optimization that attempts to use a filter to satisfy a search.
+         If the requested sort does not include score, then the filterCache
+         will be checked for a filter matching the query. If found, the filter
+         will be used as the source of document ids, and then the sort will be
+         applied to that.
+    <useFilterForSortedQuery>true</useFilterForSortedQuery>
+   -->
+
+   <!-- An optimization for use with the queryResultCache.  When a search
+         is requested, a superset of the requested number of document ids
+         are collected.  For example, if a search for a particular query
+         requests matching documents 10 through 19, and queryWindowSize is 50,
+         then documents 0 through 49 will be collected and cached.  Any further
+         requests in that range can be satisfied via the cache.  -->
+    <queryResultWindowSize>50</queryResultWindowSize>
+    
+    <!-- Maximum number of documents to cache for any entry in the
+         queryResultCache. -->
+    <queryResultMaxDocsCached>200</queryResultMaxDocsCached>
+
+    <!-- This entry enables an int hash representation for filters (DocSets)
+         when the number of items in the set is less than maxSize.  For smaller
+         sets, this representation is more memory efficient, more efficient to
+         iterate over, and faster to take intersections.  -->
+    <HashDocSet maxSize="3000" loadFactor="0.75"/>
+
+    <!-- a newSearcher event is fired whenever a new searcher is being prepared
+         and there is a current searcher handling requests (aka registered). -->
+    <!-- QuerySenderListener takes an array of NamedList and executes a
+         local query request for each NamedList in sequence. -->
+    <listener event="newSearcher" class="solr.QuerySenderListener">
+      <arr name="queries">
+        <lst> <str name="q">solr</str> <str name="start">0</str> <str name="rows">10</str> </lst>
+        <lst> <str name="q">rocks</str> <str name="start">0</str> <str name="rows">10</str> </lst>
+        <lst><str name="q">static newSearcher warming query from solrconfig.xml</str></lst>
+      </arr>
+    </listener>
+
+    <!-- a firstSearcher event is fired whenever a new searcher is being
+         prepared but there is no current registered searcher to handle
+         requests or to gain autowarming data from. -->
+    <listener event="firstSearcher" class="solr.QuerySenderListener">
+      <arr name="queries">
+      </arr>
+    </listener>
+
+    <!-- If a search request comes in and there is no current registered searcher,
+         then immediately register the still warming searcher and use it.  If
+         "false" then all requests will block until the first searcher is done
+         warming. -->
+    <useColdSearcher>false</useColdSearcher>
+
+    <!-- Maximum number of searchers that may be warming in the background
+      concurrently.  An error is returned if this limit is exceeded. Recommend
+      1-2 for read-only slaves, higher for masters w/o cache warming. -->
+    <maxWarmingSearchers>4</maxWarmingSearchers>
+
+  </query>
+
+  <!-- 
+    Let the dispatch filter handler /select?qt=XXX
+    handleSelect=true will use consistent error handling for /select and /update
+    handleSelect=false will use solr1.1 style error formatting
+    -->
+  <requestDispatcher handleSelect="true" >
+    <!--Make sure your system has some authentication before enabling remote streaming!  -->
+    <requestParsers enableRemoteStreaming="false" multipartUploadLimitInKB="2048" />
+        
+    <!-- Set HTTP caching related parameters (for proxy caches and clients).
+          
+         To get the behaviour of Solr 1.2 (ie: no caching related headers)
+         use the never304="true" option and do not specify a value for
+         <cacheControl>
+    -->
+    <httpCaching never304="true">
+    <!--httpCaching lastModifiedFrom="openTime"
+                 etagSeed="Solr"-->
+       <!-- lastModFrom="openTime" is the default, the Last-Modified value
+            (and validation against If-Modified-Since requests) will all be
+            relative to when the current Searcher was opened.
+            You can change it to lastModFrom="dirLastMod" if you want the
+            value to exactly corrispond to when the physical index was last
+            modified.
+               
+            etagSeed="..." is an option you can change to force the ETag
+            header (and validation against If-None-Match requests) to be
+            differnet even if the index has not changed (ie: when making
+            significant changes to your config file)
+
+            lastModifiedFrom and etagSeed are both ignored if you use the
+            never304="true" option.
+       -->
+       <!-- If you include a <cacheControl> directive, it will be used to
+            generate a Cache-Control header, as well as an Expires header
+            if the value contains "max-age="
+               
+            By default, no Cache-Control header is generated.
+
+            You can use the <cacheControl> option even if you have set
+            never304="true"
+       -->
+       <!-- <cacheControl>max-age=30, public</cacheControl> -->
+    </httpCaching>
+  </requestDispatcher>
+  
+      
+  <!-- requestHandler plugins... incoming queries will be dispatched to the
+     correct handler based on the path or the qt (query type) param.
+     Names starting with a '/' are accessed with the a path equal to the 
+     registered name.  Names without a leading '/' are accessed with:
+      http://host/app/select?qt=name
+     If no qt is defined, the requestHandler that declares default="true"
+     will be used.
+  -->
+  <requestHandler name="standard" class="solr.StandardRequestHandler" default="true">
+    <!-- default values for query parameters -->
+     <lst name="defaults">
+       <str name="echoParams">explicit</str>
+       <!-- 
+       <int name="rows">10</int>
+       <str name="fl">*</str>
+       <str name="version">2.1</str>
+        -->
+     </lst>
+  </requestHandler>
+  
+  <requestHandler name="/dataimport" class="org.apache.solr.handler.dataimport.DataImportHandler">
+    <lst name="defaults">
+      <str name="config">data-config.xml</str>
+
+    </lst>
+  </requestHandler>
+    
+  <!--
+   
+   Search components are registered to SolrCore and used by Search Handlers
+   
+   By default, the following components are avaliable:
+    
+   <searchComponent name="query"     class="org.apache.solr.handler.component.QueryComponent" />
+   <searchComponent name="facet"     class="org.apache.solr.handler.component.FacetComponent" />
+   <searchComponent name="mlt"       class="org.apache.solr.handler.component.MoreLikeThisComponent" />
+   <searchComponent name="highlight" class="org.apache.solr.handler.component.HighlightComponent" />
+   <searchComponent name="debug"     class="org.apache.solr.handler.component.DebugComponent" />
+  
+   If you register a searchComponent to one of the standard names, that will be used instead.
+  
+   -->
+ 
+  <requestHandler name="/search" class="org.apache.solr.handler.component.SearchHandler">
+    <lst name="defaults">
+      <str name="echoParams">explicit</str>
+    </lst>
+    <!--
+    By default, this will register the following components:
+    
+    <arr name="components">
+      <str>query</str>
+      <str>facet</str>
+      <str>mlt</str>
+      <str>highlight</str>
+      <str>debug</str>
+    </arr>
+    
+    To insert handlers before or after the 'standard' components, use:
+    
+    <arr name="first-components">
+      <str>first</str>
+    </arr>
+    
+    <arr name="last-components">
+      <str>last</str>
+    </arr>
+    
+    -->
+  </requestHandler>
+  
+  <!-- Update request handler.  
+  
+       Note: Since solr1.1 requestHandlers requires a valid content type header if posted in 
+       the body. For example, curl now requires: -H 'Content-type:text/xml; charset=utf-8'
+       The response format differs from solr1.1 formatting and returns a standard error code.
+       
+       To enable solr1.1 behavior, remove the /update handler or change its path
+       
+       "update.processor.class" is the class name for the UpdateRequestProcessor.  It is initalized
+       only once.  This can not be changed for each request.
+    -->
+  <requestHandler name="/update" class="solr.XmlUpdateRequestHandler" >
+    <!--
+    <str name="update.processor.class">org.apache.solr.handler.UpdateRequestProcessor</str>
+    -->
+  </requestHandler>
+  
+  <!-- config for the admin interface --> 
+  <admin>
+    <defaultQuery>*:*</defaultQuery>
+    
+    <!-- configure a healthcheck file for servers behind a loadbalancer
+    <healthcheck type="file">server-enabled</healthcheck>
+    -->
+  </admin>
+
+</config>
+
--- a/contrib/dataimporthandler/src/test/resources/solr/conf/dataconfig-contentstream.xml
+++ b/contrib/dataimporthandler/src/test/resources/solr/conf/dataconfig-contentstream.xml
@ -0,0 +1,10 @@
+<dataConfig>
+  <dataSource type="ContentStreamDataSource" name="c"/>
+  <document>
+    <entity name="b" dataSource="c" processor="XPathEntityProcessor"
+            forEach="/root/b">
+      <field column="desc" xpath="/root/b/c"/>
+      <field column="id" xpath="/root/b/id"/>
+    </entity>
+  </document>
+</dataConfig>
--- a/contrib/dataimporthandler/src/test/resources/solr/conf/dataimport-schema.xml
+++ b/contrib/dataimporthandler/src/test/resources/solr/conf/dataimport-schema.xml
@ -164,19 +164,19 @@
        <!-- in this example, we will only use synonyms at query time
        <filter class="solr.SynonymFilterFactory" synonyms="index_synonyms.txt" ignoreCase="true" expand="false"/>
        -->
-        <filter class="solr.StopFilterFactory" ignoreCase="true" words="stopwords.txt"/>
+        <!--<filter class="solr.StopFilterFactory" ignoreCase="true" words="stopwords.txt"/>-->
        <filter class="solr.WordDelimiterFilterFactory" generateWordParts="1" generateNumberParts="1" catenateWords="1" catenateNumbers="1" catenateAll="0" splitOnCaseChange="1"/>
        <filter class="solr.LowerCaseFilterFactory"/>
-        <filter class="solr.EnglishPorterFilterFactory" protected="protwords.txt"/>
+        <!--<filter class="solr.EnglishPorterFilterFactory" protected="protwords.txt"/>-->
        <filter class="solr.RemoveDuplicatesTokenFilterFactory"/>
      </analyzer>
      <analyzer type="query">
        <tokenizer class="solr.WhitespaceTokenizerFactory"/>
-        <filter class="solr.SynonymFilterFactory" synonyms="synonyms.txt" ignoreCase="true" expand="true"/>
-        <filter class="solr.StopFilterFactory" ignoreCase="true" words="stopwords.txt"/>
+        <!--<filter class="solr.SynonymFilterFactory" synonyms="synonyms.txt" ignoreCase="true" expand="true"/>-->
+        <!--<filter class="solr.StopFilterFactory" ignoreCase="true" words="stopwords.txt"/>-->
        <filter class="solr.WordDelimiterFilterFactory" generateWordParts="1" generateNumberParts="1" catenateWords="0" catenateNumbers="0" catenateAll="0" splitOnCaseChange="1"/>
        <filter class="solr.LowerCaseFilterFactory"/>
-        <filter class="solr.EnglishPorterFilterFactory" protected="protwords.txt"/>
+        <!--<filter class="solr.EnglishPorterFilterFactory" protected="protwords.txt"/>-->
        <filter class="solr.RemoveDuplicatesTokenFilterFactory"/>
      </analyzer>
    </fieldType>
@ -187,11 +187,11 @@
    <fieldType name="textTight" class="solr.TextField" positionIncrementGap="100" >
      <analyzer>
        <tokenizer class="solr.WhitespaceTokenizerFactory"/>
-        <filter class="solr.SynonymFilterFactory" synonyms="synonyms.txt" ignoreCase="true" expand="false"/>
-        <filter class="solr.StopFilterFactory" ignoreCase="true" words="stopwords.txt"/>
+        <!--<filter class="solr.SynonymFilterFactory" synonyms="synonyms.txt" ignoreCase="true" expand="false"/>-->
+        <!--<filter class="solr.StopFilterFactory" ignoreCase="true" words="stopwords.txt"/>-->
        <filter class="solr.WordDelimiterFilterFactory" generateWordParts="0" generateNumberParts="0" catenateWords="1" catenateNumbers="1" catenateAll="0"/>
        <filter class="solr.LowerCaseFilterFactory"/>
-        <filter class="solr.EnglishPorterFilterFactory" protected="protwords.txt"/>
+        <!--<filter class="solr.EnglishPorterFilterFactory" protected="protwords.txt"/>-->
        <filter class="solr.RemoveDuplicatesTokenFilterFactory"/>
      </analyzer>
    </fieldType>
--- a/src/solrj/org/apache/solr/client/solrj/request/DirectXmlRequest.java
+++ b/src/solrj/org/apache/solr/client/solrj/request/DirectXmlRequest.java
@ -37,6 +37,7 @@ import org.apache.solr.common.util.ContentStream;
 public class DirectXmlRequest extends SolrRequest
 {
  final String xml;
+  private SolrParams params;
  
  public DirectXmlRequest( String path, String body )
  {
@ -51,7 +52,12 @@ public class DirectXmlRequest extends SolrRequest

  @Override
  public SolrParams getParams() {
-    return null;
+    return params;
+  }
+
+
+  public void setParams(SolrParams params) {
+    this.params = params;
  }

  @Override