SOLR-3430: DIH expanded test coverage & subsequent bug fixes (cache support).

git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/trunk@1335140 13f79535-47bb-0310-9956-ffa450edef68
2012-05-07 17:24:42 +00:00 · 2012-05-07 17:24:42 +00:00 · d29c5a8ea9
parent 74fd952b21
commit d29c5a8ea9
10 changed files with 377 additions and 291 deletions
--- a/solr/common-build.xml
+++ b/solr/common-build.xml
@ -64,6 +64,7 @@
  <path id="additional.dependencies">
  	<fileset dir="${common-solr.dir}/lib" excludes="${common.classpath.excludes}"/>
  	<fileset dir="${common-solr.dir}/example/lib" excludes="${common.classpath.excludes}"/>
+  	<fileset dir="${common-solr.dir}/example/example-DIH/solr/db/lib" excludes="${common.classpath.excludes}"/>  	
  	<fileset dir="lib" excludes="${common.classpath.excludes}" erroronmissingdir="false"/>
  </path>

--- a/solr/contrib/dataimporthandler/CHANGES.txt
+++ b/solr/contrib/dataimporthandler/CHANGES.txt
@ -9,6 +9,10 @@ HTTP data sources quick and easy.

 $Id$
 ==================  4.0.0-dev ==============
+Bug Fixes
+----------------------
+* SOLR-3430: Added a new test against a real SQL database.  Fixed problems revealed by this new test
+             related to  the expanded cache support added to 3.6/SOLR-2382 (James Dyer)

 Other Changes
 ----------------------
--- a/solr/contrib/dataimporthandler/src/java/org/apache/solr/handler/dataimport/EntityProcessorBase.java
+++ b/solr/contrib/dataimporthandler/src/java/org/apache/solr/handler/dataimport/EntityProcessorBase.java
@ -57,8 +57,6 @@ public class EntityProcessorBase extends EntityProcessor {
      firstInit(context);
    }
    if(cacheSupport!=null) {
-      rowIterator = null;
-      query = null;
      cacheSupport.initNewParent(context);
    }   
    
--- a/solr/contrib/dataimporthandler/src/java/org/apache/solr/handler/dataimport/SortedMapBackedCache.java
+++ b/solr/contrib/dataimporthandler/src/java/org/apache/solr/handler/dataimport/SortedMapBackedCache.java
@ -120,6 +120,20 @@ public class SortedMapBackedCache implements DIHCache {
  @Override
  public Iterator<Map<String,Object>> iterator(Object key) {
    checkOpen(true);
+    if(key instanceof Iterable<?>) {
+      List<Map<String,Object>> vals = new ArrayList<Map<String,Object>>();
+      Iterator<?> iter = ((Iterable<?>) key).iterator();
+      while(iter.hasNext()) {
+        List<Map<String,Object>> val = theMap.get(iter.next());
+        if(val!=null) {
+          vals.addAll(val);
+        }
+      } 
+      if(vals.size()==0) {
+        return null;
+      }
+      return vals.iterator();
+    }    
    List<Map<String,Object>> val = theMap.get(key);
    if (val == null) {
      return null;
--- a/solr/contrib/dataimporthandler/src/test-files/dih/solr/conf/data-config-end-to-end.xml
+++ b/solr/contrib/dataimporthandler/src/test-files/dih/solr/conf/data-config-end-to-end.xml
@ -0,0 +1,41 @@
+<dataConfig>
+  <dataSource name="hsqldb" driver="org.hsqldb.jdbcDriver" url="jdbc:hsqldb:mem:." />
+  <document name="dih_end_to_end">
+    <entity 
+      name="People" 
+      processor="SqlEntityProcessor"
+      dataSource="hsqldb" 
+      query="SELECT ID, NAME, COUNTRY_CODES FROM PEOPLE"
+      transformer="RegexTransformer"
+    >
+      <field column="ID" name="id" />
+      <field column="COUNTRY_CODE" sourceColName="COUNTRY_CODES" splitBy="," />
+ 
+<!-- 
+ Instead of using 'cachePk'/'cacheLookup' as done below, we could have done:
+  where="CODE=People.COUNTRY_CODE"
+--> 
+      <entity 
+        name="Countries"
+        processor="SqlEntityProcessor"
+        dataSource="hsqldb" 
+        cacheImpl="SortedMapBackedCache"
+        cachePk="CODE"
+        cacheLookup="People.COUNTRY_CODE"
+        
+        query="SELECT CODE, COUNTRY_NAME FROM COUNTRIES"
+      >
+      	<field column="CODE" name="DO_NOT_INDEX" />
+      </entity>
+         
+      <entity 
+        name="Sports"
+        processor="SqlEntityProcessor"
+        dataSource="hsqldb"             	
+        query="SELECT PERSON_ID, SPORT_NAME FROM PEOPLE_SPORTS WHERE PERSON_ID=${People.ID}"
+      />
+
+    </entity>
+  </document>
+</dataConfig>
+         
--- a/solr/contrib/dataimporthandler/src/test-files/dih/solr/conf/dataimport-schema.xml
+++ b/solr/contrib/dataimporthandler/src/test-files/dih/solr/conf/dataimport-schema.xml
@ -1,308 +1,58 @@
-<?xml version="1.0" encoding="UTF-8" ?>
-<!--
- Licensed to the Apache Software Foundation (ASF) under one or more
- contributor license agreements.  See the NOTICE file distributed with
- this work for additional information regarding copyright ownership.
- The ASF licenses this file to You under the Apache License, Version 2.0
- (the "License"); you may not use this file except in compliance with
- the License.  You may obtain a copy of the License at
-
-     http://www.apache.org/licenses/LICENSE-2.0
-
- Unless required by applicable law or agreed to in writing, software
- distributed under the License is distributed on an "AS IS" BASIS,
- WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- See the License for the specific language governing permissions and
- limitations under the License.
-->
-
-<!--  
- This is the Solr schema file. This file should be named "schema.xml" and
- should be in the conf directory under the solr home
- (i.e. ./solr/conf/schema.xml by default) 
- or located where the classloader for the Solr webapp can find it.
-
- This example schema is the recommended starting point for users.
- It should be kept correct and concise, usable out-of-the-box.
-
- For more information, on how to customize this file, please see
- http://wiki.apache.org/solr/SchemaXml
-->
-
-<schema name="test" version="1.1">
-  <!-- attribute "name" is the name of this schema and is only used for display purposes.
-       Applications should change this to reflect the nature of the search collection.
-       version="1.1" is Solr's version number for the schema syntax and semantics.  It should
-       not normally be changed by applications.
-       1.0: multiValued attribute did not exist, all fields are multiValued by nature
-       1.1: multiValued attribute introduced, false by default -->
-
-  <types>
-    <!-- field type definitions. The "name" attribute is
-       just a label to be used by field definitions.  The "class"
-       attribute and any other attributes determine the real
-       behavior of the fieldType.
-         Class names starting with "solr" refer to java classes in the
-       org.apache.solr.analysis package.
-    -->
-
-    <!-- The StrField type is not analyzed, but indexed/stored verbatim.  
-       - StrField and TextField support an optional compressThreshold which
-       limits compression (if enabled in the derived fields) to values which
-       exceed a certain size (in characters).
-    -->
+<schema name="dih_test" version="4.0">
+  <types>    
    <fieldType name="string" class="solr.StrField" sortMissingLast="true" omitNorms="true"/>
-
-    <!-- boolean type: "true" or "false" -->
-    <fieldType name="boolean" class="solr.BoolField" sortMissingLast="true" omitNorms="true"/>
-
-    <!-- The optional sortMissingLast and sortMissingFirst attributes are
-         currently supported on types that are sorted internally as strings.
-       - If sortMissingLast="true", then a sort on this field will cause documents
-         without the field to come after documents with the field,
-         regardless of the requested sort order (asc or desc).
-       - If sortMissingFirst="true", then a sort on this field will cause documents
-         without the field to come before documents with the field,
-         regardless of the requested sort order.
-       - If sortMissingLast="false" and sortMissingFirst="false" (the default),
-         then default lucene sorting will be used which places docs without the
-         field first in an ascending sort and last in a descending sort.
-    -->    
-
-
-    <!-- numeric field types that store and index the text
-         value verbatim (and hence don't support range queries, since the
-         lexicographic ordering isn't equal to the numeric ordering) -->
-    <fieldType name="integer" class="solr.IntField" omitNorms="true"/>
-    <fieldType name="long" class="solr.LongField" omitNorms="true"/>
-    <fieldType name="float" class="solr.FloatField" omitNorms="true"/>
-    <fieldType name="double" class="solr.DoubleField" omitNorms="true"/>
-
-
-    <!-- Numeric field types that manipulate the value into
-         a string value that isn't human-readable in its internal form,
-         but with a lexicographic ordering the same as the numeric ordering,
-         so that range queries work correctly. -->
-    <fieldType name="sint" class="solr.SortableIntField" sortMissingLast="true" omitNorms="true"/>
-    <fieldType name="slong" class="solr.SortableLongField" sortMissingLast="true" omitNorms="true"/>
-    <fieldType name="sfloat" class="solr.SortableFloatField" sortMissingLast="true" omitNorms="true"/>
-    <fieldType name="sdouble" class="solr.SortableDoubleField" sortMissingLast="true" omitNorms="true"/>
-
-
-    <!-- The format for this date field is of the form 1995-12-31T23:59:59Z, and
-         is a more restricted form of the canonical representation of dateTime
-         http://www.w3.org/TR/xmlschema-2/#dateTime    
-         The trailing "Z" designates UTC time and is mandatory.
-         Optional fractional seconds are allowed: 1995-12-31T23:59:59.999Z
-         All other components are mandatory.
-
-         Expressions can also be used to denote calculations that should be
-         performed relative to "NOW" to determine the value, ie...
-
-               NOW/HOUR
-                  ... Round to the start of the current hour
-               NOW-1DAY
-                  ... Exactly 1 day prior to now
-               NOW/DAY+6MONTHS+3DAYS
-                  ... 6 months and 3 days in the future from the start of
-                      the current day
-                      
-         Consult the DateField javadocs for more information.
-      -->
+    <fieldType name="boolean" class="solr.BoolField" sortMissingLast="true" omitNorms="true"/>  
+    <fieldType name="tint" class="solr.TrieIntField" precisionStep="8" positionIncrementGap="0"/>
+    <fieldType name="tfloat" class="solr.TrieFloatField" precisionStep="8" positionIncrementGap="0"/>
+    <fieldType name="tlong" class="solr.TrieLongField" precisionStep="8" positionIncrementGap="0"/>
+    <fieldType name="tdouble" class="solr.TrieDoubleField" precisionStep="8" positionIncrementGap="0"/>  
    <fieldType name="date" class="solr.DateField" sortMissingLast="true" omitNorms="true"/>
-
-
-    <!-- The "RandomSortField" is not used to store or search any
-         data.  You can declare fields of this type it in your schema
-         to generate psuedo-random orderings of your docs for sorting 
-         purposes.  The ordering is generated based on the field name 
-         and the version of the index, As long as the index version
-         remains unchanged, and the same field name is reused,
-         the ordering of the docs will be consistent.  
-         If you want differend psuedo-random orderings of documents,
-         for the same version of the index, use a dynamicField and
-         change the name
-     -->
-    <fieldType name="random" class="solr.RandomSortField" indexed="true" />
-
-    <!-- solr.TextField allows the specification of custom text analyzers
-         specified as a tokenizer and a list of token filters. Different
-         analyzers may be specified for indexing and querying.
-
-         The optional positionIncrementGap puts space between multiple fields of
-         this type on the same document, with the purpose of preventing false phrase
-         matching across fields.
-
-         For more info on customizing your analyzer chain, please see
-         http://wiki.apache.org/solr/AnalyzersTokenizersTokenFilters
-     -->
-
-    <!-- One can also specify an existing Analyzer class that has a
-         default constructor via the class attribute on the analyzer element
-    <fieldType name="text_greek" class="solr.TextField">
-      <analyzer class="org.apache.lucene.analysis.el.GreekAnalyzer"/>
-    </fieldType>
-    -->
-
-    <!-- A text field that only splits on whitespace for exact matching of words -->
-    <fieldType name="text_ws" class="solr.TextField" positionIncrementGap="100">
-      <analyzer>
-        <tokenizer class="solr.MockTokenizerFactory"/>
-      </analyzer>
-    </fieldType>
-
-    <!-- A text field that uses WordDelimiterFilter to enable splitting and matching of
-        words on case-change, alpha numeric boundaries, and non-alphanumeric chars,
-        so that a query of "wifi" or "wi fi" could match a document containing "Wi-Fi".
-        Synonyms and stopwords are customized by external files, and stemming is enabled.
-        Duplicate tokens at the same position (which may result from Stemmed Synonyms or
-        WordDelim parts) are removed.
-        -->
    <fieldType name="text" class="solr.TextField" positionIncrementGap="100">
      <analyzer type="index">
        <tokenizer class="solr.MockTokenizerFactory"/>
-        <!-- in this example, we will only use synonyms at query time
-        <filter class="solr.SynonymFilterFactory" synonyms="index_synonyms.txt" ignoreCase="true" expand="false"/>
-        -->
-        <!--<filter class="solr.StopFilterFactory" ignoreCase="true" words="stopwords.txt"/>-->
        <filter class="solr.WordDelimiterFilterFactory" generateWordParts="1" generateNumberParts="1" catenateWords="1" catenateNumbers="1" catenateAll="0" splitOnCaseChange="1"/>
        <filter class="solr.LowerCaseFilterFactory"/>
-        <!--<filter class="solr.KeywordMarkerFilterFactory" protected="protwords.txt"/>
-        <filter class="solr.PorterStemFilterFactory"/>-->
        <filter class="solr.RemoveDuplicatesTokenFilterFactory"/>
      </analyzer>
      <analyzer type="query">
        <tokenizer class="solr.MockTokenizerFactory"/>
-        <!--<filter class="solr.SynonymFilterFactory" synonyms="synonyms.txt" ignoreCase="true" expand="true"/>-->
-        <!--<filter class="solr.StopFilterFactory" ignoreCase="true" words="stopwords.txt"/>-->
        <filter class="solr.WordDelimiterFilterFactory" generateWordParts="1" generateNumberParts="1" catenateWords="0" catenateNumbers="0" catenateAll="0" splitOnCaseChange="1"/>
        <filter class="solr.LowerCaseFilterFactory"/>
-        <!--<filter class="solr.KeywordMarkerFilterFactory" protected="protwords.txt"/>
-        <filter class="solr.PorterStemFilterFactory"/>-->
        <filter class="solr.RemoveDuplicatesTokenFilterFactory"/>
      </analyzer>
    </fieldType>
-
-
-    <!-- Less flexible matching, but less false matches.  Probably not ideal for product names,
-         but may be good for SKUs.  Can insert dashes in the wrong place and still match. -->
    <fieldType name="textTight" class="solr.TextField" positionIncrementGap="100" >
      <analyzer>
        <tokenizer class="solr.MockTokenizerFactory"/>
-        <!--<filter class="solr.SynonymFilterFactory" synonyms="synonyms.txt" ignoreCase="true" expand="false"/>-->
-        <!--<filter class="solr.StopFilterFactory" ignoreCase="true" words="stopwords.txt"/>-->
        <filter class="solr.WordDelimiterFilterFactory" generateWordParts="0" generateNumberParts="0" catenateWords="1" catenateNumbers="1" catenateAll="0"/>
        <filter class="solr.LowerCaseFilterFactory"/>
-        <!--<filter class="solr.KeywordMarkerFilterFactory" protected="protwords.txt"/>
-        <filter class="solr.EnglishMinimalStemFilterFactory"/>-->
        <filter class="solr.RemoveDuplicatesTokenFilterFactory"/>
      </analyzer>
    </fieldType>
-
-    <!-- This is an example of using the KeywordTokenizer along
-         With various TokenFilterFactories to produce a sortable field
-         that does not include some properties of the source text
-      -->
-    <fieldType name="alphaOnlySort" class="solr.TextField" sortMissingLast="true" omitNorms="true">
-      <analyzer>
-        <!-- KeywordTokenizer does no actual tokenizing, so the entire
-             input string is preserved as a single token
-          -->
-        <tokenizer class="solr.MockTokenizerFactory" pattern="keyword"/>
-        <!-- The LowerCase TokenFilter does what you expect, which can be
-             when you want your sorting to be case insensitive
-          -->
-        <filter class="solr.LowerCaseFilterFactory" />
-        <!-- The TrimFilter removes any leading or trailing whitespace -->
-        <filter class="solr.TrimFilterFactory" />
-        <!-- The PatternReplaceFilter gives you the flexibility to use
-             Java Regular expression to replace any sequence of characters
-             matching a pattern with an arbitrary replacement string, 
-             which may include back refrences to portions of the orriginal
-             string matched by the pattern.
-             
-             See the Java Regular Expression documentation for more
-             infomation on pattern and replacement string syntax.
-             
-             http://java.sun.com/j2se/1.6.0/docs/api/java/util/regex/package-summary.html
-          -->
-        <filter class="solr.PatternReplaceFilterFactory"
-                pattern="([^a-z])" replacement="" replace="all"
-        />
-      </analyzer>
-    </fieldType>
-
-    <!-- since fields of this type are by default not stored or indexed, any data added to 
-         them will be ignored outright 
-     --> 
    <fieldtype name="ignored" stored="false" indexed="false" class="solr.StrField" /> 
-
- </types>
-
-
- <fields>
-   <!-- Valid attributes for fields:
-     name: mandatory - the name for the field
-     type: mandatory - the name of a previously defined type from the <types> section
-     indexed: true if this field should be indexed (searchable or sortable)
-     stored: true if this field should be retrievable
-     compressed: [false] if this field should be stored using gzip compression
-       (this will only apply if the field type is compressable; among
-       the standard field types, only TextField and StrField are)
-     multiValued: true if this field may contain multiple values per document
-     omitNorms: (expert) set to true to omit the norms associated with
-       this field (this disables length normalization and index-time
-       boosting for the field, and saves some memory).  Only full-text
-       fields or fields that need an index-time boost need norms.
-     termVectors: [false] set to true to store the term vector for a given field.
-       When using MoreLikeThis, fields used for similarity should be stored for 
-       best performance.
-   -->
-
-   <field name="id" type="string" indexed="true" stored="true" required="true" />
-   <field name="desc" type="string" indexed="true" stored="true" multiValued="true" />
-   
-   <field name="date" type="date" indexed="true" stored="true" />
-
-   <field name="timestamp" type="date" indexed="true" stored="true" default="NOW" multiValued="false"/>
-   
-
-   <!-- Dynamic field definitions.  If a field name is not found, dynamicFields
-        will be used if the name matches any of the patterns.
-        RESTRICTION: the glob-like pattern in the name attribute must have
-        a "*" only at the start or the end.
-        EXAMPLE:  name="*_i" will match any field ending in _i (like myid_i, z_i)
-        Longer patterns will be matched first.  if equal size patterns
-        both match, the first appearing in the schema will be used.  -->
-   <dynamicField name="*_i"       type="sint"    indexed="true"  stored="true"/>
-   <dynamicField name="*_s"       type="string"  indexed="true"  stored="true"/>
-   <dynamicField name="*_mult_s"  type="string"  indexed="true"  stored="true"   multiValued="true"/>
-   <dynamicField name="*_l"       type="slong"   indexed="true"  stored="true"/>
-   <dynamicField name="*_t"       type="text"    indexed="true"  stored="true"/>
-   <dynamicField name="*_b"       type="boolean" indexed="true"  stored="true"/>
-   <dynamicField name="*_f"       type="sfloat"  indexed="true"  stored="true"/>
-   <dynamicField name="*_d"       type="sdouble" indexed="true"  stored="true"/>
-   <dynamicField name="*_dt"      type="date"    indexed="true"  stored="true"/>
-
-   <dynamicField name="random*" type="random" />
-
-   <!-- uncomment the following to ignore any fields that don't already match an existing 
-        field name or dynamic field, rather than reporting them as an error. 
-        alternately, change the type="ignored" to some other type e.g. "text" if you want 
-        unknown fields indexed and/or stored by default --> 
-   <!--dynamicField name="*" type="ignored" /-->
-   
- </fields>
-
- <!-- Field to use to determine and enforce document uniqueness. 
-      Unless this field is marked with required="false", it will be a required field
-   -->
- <uniqueKey>id</uniqueKey>
-
- <!-- field for the QueryParser to use when an explicit fieldname is absent -->
- <defaultSearchField>desc</defaultSearchField>
-
- <!-- SolrQueryParser configuration: defaultOperator="AND|OR" -->
- <solrQueryParser defaultOperator="OR"/>
-
-</schema>
+  </types>
+  <fields>   
+    <field name="id" type="string" indexed="true" stored="true" required="true" />
+    <field name="desc" type="string" indexed="true" stored="true" multiValued="true" />   
+    <field name="date" type="date" indexed="true" stored="true" />
+    <field name="timestamp" type="date" indexed="true" stored="true" default="NOW" multiValued="false"/>
+    
+    <field name="NAME" type="text" indexed="true" stored="true" multiValued="false" />
+    <field name="COUNTRY_NAME" type="text" indexed="true" stored="true" multiValued="true" />
+    <field name="SPORT_NAME" type="text" indexed="true" stored="true" multiValued="true" />
+    <field name="DO_NOT_INDEX" type="ignored" />
+       
+    <dynamicField name="*_i"       type="tint"    indexed="true"  stored="true"/>
+    <dynamicField name="*_s"       type="string"  indexed="true"  stored="true"/>
+    <dynamicField name="*_mult_s"  type="string"  indexed="true"  stored="true"   multiValued="true"/>
+    <dynamicField name="*_l"       type="tlong"   indexed="true"  stored="true"/>
+    <dynamicField name="*_t"       type="text"    indexed="true"  stored="true"/>
+    <dynamicField name="*_b"       type="boolean" indexed="true"  stored="true"/>
+    <dynamicField name="*_f"       type="tfloat"  indexed="true"  stored="true"/>
+    <dynamicField name="*_d"       type="tdouble" indexed="true"  stored="true"/>
+    <dynamicField name="*_dt"      type="date"    indexed="true"  stored="true"/>   
+  </fields>
+  <uniqueKey>id</uniqueKey>
+  <defaultSearchField>desc</defaultSearchField>
+  <solrQueryParser defaultOperator="OR"/>
+</schema>
--- a/solr/contrib/dataimporthandler/src/test-files/dih/solr/conf/dataimport-solrconfig-end-to-end.xml
+++ b/solr/contrib/dataimporthandler/src/test-files/dih/solr/conf/dataimport-solrconfig-end-to-end.xml
@ -0,0 +1,45 @@
+<?xml version="1.0" encoding="UTF-8" ?>
+<!--
+ Licensed to the Apache Software Foundation (ASF) under one or more
+ contributor license agreements.  See the NOTICE file distributed with
+ this work for additional information regarding copyright ownership.
+ The ASF licenses this file to You under the Apache License, Version 2.0
+ (the "License"); you may not use this file except in compliance with
+ the License.  You may obtain a copy of the License at
+
+     http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License.
+-->
+
+<config>
+  <luceneMatchVersion>${tests.luceneMatchVersion:LUCENE_CURRENT}</luceneMatchVersion>
+  <directoryFactory name="DirectoryFactory" class="${solr.directoryFactory:solr.RAMDirectoryFactory}"/>
+  <updateHandler class="solr.DirectUpdateHandler2">
+    <maxPendingDeletes>100000</maxPendingDeletes>
+  </updateHandler>  
+  <requestDispatcher handleSelect="true" >
+    <requestParsers enableRemoteStreaming="false" multipartUploadLimitInKB="2048" />
+    <httpCaching never304="true" />    
+  </requestDispatcher>
+  <requestHandler name="standard" class="solr.StandardRequestHandler" default="true">
+    <lst name="defaults">
+       <str name="echoParams">explicit</str>       
+    </lst>
+  </requestHandler>  
+  <requestHandler name="/dataimport-end-to-end" class="org.apache.solr.handler.dataimport.DataImportHandler">
+    <lst name="defaults">
+  	  <str name="config">data-config-end-to-end.xml</str>
+    </lst>
+  </requestHandler>   
+  <requestHandler name="/search" class="org.apache.solr.handler.component.SearchHandler">
+    <lst name="defaults">
+      <str name="echoParams">explicit</str>
+    </lst> 
+  </requestHandler>  
+</config>
+
--- a/solr/contrib/dataimporthandler/src/test/org/apache/solr/handler/dataimport/AbstractDIHJdbcTestCase.java
+++ b/solr/contrib/dataimporthandler/src/test/org/apache/solr/handler/dataimport/AbstractDIHJdbcTestCase.java
@ -0,0 +1,170 @@
+package org.apache.solr.handler.dataimport;
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with this
+ * work for additional information regarding copyright ownership. The ASF
+ * licenses this file to You under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ * 
+ * http://www.apache.org/licenses/LICENSE-2.0
+ * 
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ * License for the specific language governing permissions and limitations under
+ * the License.
+ */
+
+import java.sql.Connection;
+import java.sql.DriverManager;
+import java.sql.PreparedStatement;
+import java.sql.SQLException;
+import java.sql.Statement;
+
+import junit.framework.Assert;
+
+import org.junit.AfterClass;
+import org.junit.BeforeClass;
+/**
+ * This sets up an in-memory Derby Sql database with a little sample data.
+ * The schema here is poorly-designed to illustrate DIH's ability to 
+ * overcome these kinds of challenges.
+ */
+public abstract class AbstractDIHJdbcTestCase extends AbstractDataImportHandlerTestCase {
+  @BeforeClass
+  public static void beforeClassDihJdbcTest() throws Exception {
+    try {
+      Class.forName("org.hsqldb.jdbcDriver").newInstance();
+    } catch (Exception e) {
+      throw e;
+    }
+    
+    Connection conn = null;
+    Statement s = null;
+    PreparedStatement ps = null;    
+    try {    
+      conn = DriverManager.getConnection("jdbc:hsqldb:mem:.");    
+      s = conn.createStatement();
+      s.executeUpdate("create table countries(code char(2) not null primary key, country_name varchar(50))");
+      s.executeUpdate("create table people(id int not null primary key, name varchar(50), country_codes varchar(100))");
+      s.executeUpdate("create table people_sports(id int not null primary key, person_id int, sport_name varchar(50))");
+      
+      ps = conn.prepareStatement("insert into countries values (?,?)");
+      for(String[] country : countries) {
+        ps.setString(1, country[0]);
+        ps.setString(2, country[1]);
+        Assert.assertEquals(1, ps.executeUpdate());
+      }
+      ps.close();
+            
+      ps = conn.prepareStatement("insert into people values (?,?,?)");
+      for(Object[] person : people) {
+        ps.setInt(1, (Integer) person[0]);
+        ps.setString(2, (String) person[1]);
+        ps.setString(3, (String) person[2]);
+        Assert.assertEquals(1, ps.executeUpdate());
+      }
+      ps.close(); 
+      
+      ps = conn.prepareStatement("insert into people_sports values (?,?,?)");
+      for(Object[] sport : people_sports) {
+        ps.setInt(1, (Integer) sport[0]);
+        ps.setInt(2, (Integer) sport[1]);
+        ps.setString(3, (String) sport[2]);
+        Assert.assertEquals(1, ps.executeUpdate());
+      }
+      ps.close();
+      conn.close();    
+    } catch(Exception e) {
+      throw e;
+    } finally {
+      if(s!=null) { s.close(); }
+      if(ps!=null) { ps.close(); }
+      if(conn!=null) { conn.close(); }
+    }
+  }
+  
+  @AfterClass
+  public static void afterClassDihJdbcTest() throws Exception {  
+    Connection conn = null;
+    Statement s = null;
+    try {      
+      conn = DriverManager.getConnection("jdbc:hsqldb:mem:.");    
+      s = conn.createStatement();
+      s.executeUpdate("shutdown");
+    } catch (SQLException e) {
+      throw e;
+    } finally {
+      if(s!=null) { s.close(); }
+      if(conn!=null) { conn.close(); }
+    }
+  }
+  
+  public static final String[][] countries = {
+    {"NA",   "Namibia"},
+    {"NC",   "New Caledonia"},
+    {"NE",   "Niger"},
+    {"NF",   "Norfolk Island"},
+    {"NG",   "Nigeria"},
+    {"NI",   "Nicaragua"},
+    {"NL",   "Netherlands"},
+    {"NO",   "Norway"},
+    {"NP",   "Nepal"},
+    {"NR",   "Nauru"},
+    {"NU",   "Niue"},
+    {"NZ",   "New Zealand"}
+  };
+  
+  public static final Object[][] people = {
+    {1,"Jacob","NZ"},
+    {2,"Ethan","NU,NA,NE"},
+    {3,"Michael","NR"},
+    {4,"Jayden","NP"},
+    {5,"William","NO"},
+    {6,"Alexander","NL"},
+    {7,"Noah","NI"},
+    {8,"Daniel","NG"},
+    {9,"Aiden","NF"},
+    {10,"Anthony","NE"},
+    {11,"Emma","NL"},
+    {12,"Grace","NI"},
+    {13,"Hailey","NG"},
+    {14,"Isabella","NF"},
+    {15,"Lily","NE"},
+    {16,"Madison","NC"},
+    {17,"Mia","NA"},
+    {18,"Natalie","NP,NR,NU,NZ"},
+    {19,"Olivia","NU"},
+    {20,"Samantha","NR"}
+  };
+  
+  public static final Object[][] people_sports = {
+    {100, 1, "Swimming"},
+    {200, 2, "Triathlon"},
+    {300, 3, "Water polo"},
+    {310, 3, "Underwater rugby"},
+    {320, 3, "Kayaking"},
+    {400, 4, "Snorkeling"},
+    {500, 5, "Synchronized diving"},
+    {600, 6, "Underwater rugby"},
+    {700, 7, "Boating"},
+    {800, 8, "Bodyboarding"},
+    {900, 9, "Canoeing"},
+    {1000, 10, "Fishing"},
+    {1100, 11, "Jet Ski"},
+    {1110, 11, "Rowing"},
+    {1120, 11, "Sailing"},
+    {1200, 12, "Kayaking"},
+    {1210, 12, "Canoeing"},
+    {1300, 13, "Kite surfing"},
+    {1400, 14, "Parasailing"},
+    {1500, 15, "Rafting"},
+    {1600, 16, "Rowing"},
+    {1700, 17, "Sailing"},
+    {1800, 18, "White Water Rafting"},
+    {1900, 19, "Water skiing"},
+    {2000, 20, "Windsurfing"}
+  };  
+  
+}
--- a/solr/contrib/dataimporthandler/src/test/org/apache/solr/handler/dataimport/TestDIHEndToEnd.java
+++ b/solr/contrib/dataimporthandler/src/test/org/apache/solr/handler/dataimport/TestDIHEndToEnd.java
@ -0,0 +1,58 @@
+package org.apache.solr.handler.dataimport;
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with this
+ * work for additional information regarding copyright ownership. The ASF
+ * licenses this file to You under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ * 
+ * http://www.apache.org/licenses/LICENSE-2.0
+ * 
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ * License for the specific language governing permissions and limitations under
+ * the License.
+ */
+
+import java.util.regex.Matcher;
+import java.util.regex.Pattern;
+
+import org.apache.solr.request.LocalSolrQueryRequest;
+import org.junit.Assert;
+import org.junit.BeforeClass;
+import org.junit.Test;
+
+public class TestDIHEndToEnd extends AbstractDIHJdbcTestCase {
+  @BeforeClass
+  public static void beforeClass() throws Exception {
+    initCore("dataimport-solrconfig-end-to-end.xml", "dataimport-schema.xml");
+  }
+  @Test
+  public void testEndToEnd() throws Exception {
+    LocalSolrQueryRequest request = lrf.makeRequest("command", "full-import",
+        "clean", "true", "commit", "true", "synchronous", "true", "indent", "true");
+    h.query("/dataimport-end-to-end", request);
+    assertQ(req("*:*"), "//*[@numFound='20']");
+    assertQ(req("COUNTRY_NAME:zealand"), "//*[@numFound='2']");
+    assertQ(req("COUNTRY_NAME:niue"), "//*[@numFound='3']");
+    
+    //It would be nice if there was a way to get it to run transformers before putting 
+    //data in the cache, then id=2 (person=Ethan, country=NU,NA,NE) could join...)
+    //assertQ(req("COUNTRY_NAME:Netherlands"), "//*[@numFound='3']");
+    
+    assertQ(req("NAME:michael"), "//*[@numFound='1']");
+    assertQ(req("SPORT_NAME:kayaking"), "//*[@numFound='2']");
+    assertQ(req("SPORT_NAME:fishing"), "//*[@numFound='1']");
+    
+    request = lrf.makeRequest("indent", "true");
+    String response = h.query("/dataimport-end-to-end", request);
+    Matcher m = Pattern.compile(".str name..Total Requests made to DataSource..(\\d+)..str.").matcher(response);
+    Assert.assertTrue(m.find() && m.groupCount()==1);
+    int numRequests = Integer.parseInt(m.group(1));
+    Assert.assertTrue(
+        "The database should have been hit once each " +
+        "for 'Person' & 'Country' and ~20 times for 'Sport'", numRequests<30);
+  }
+}
--- a/solr/contrib/dataimporthandler/src/test/org/apache/solr/handler/dataimport/TestJdbcDataSource.java
+++ b/solr/contrib/dataimporthandler/src/test/org/apache/solr/handler/dataimport/TestJdbcDataSource.java
@ -43,11 +43,11 @@ import org.junit.Test;
 * @since solr 1.3
 */
 public class TestJdbcDataSource extends AbstractDataImportHandlerTestCase {
-  Driver driver;
-  DataSource dataSource;
-  Connection connection;
-  IMocksControl mockControl;
-  JdbcDataSource jdbcDataSource = new JdbcDataSource();
+  private Driver driver;
+  private DataSource dataSource;
+  private Connection connection;
+  private IMocksControl mockControl;
+  private JdbcDataSource jdbcDataSource = new JdbcDataSource();
  List<Map<String, String>> fields = new ArrayList<Map<String, String>>();

  Context context = AbstractDataImportHandlerTestCase.getContext(null, null,
@ -129,7 +129,7 @@ public class TestJdbcDataSource extends AbstractDataImportHandlerTestCase {
  @Test
  public void testRetrieveFromDriverManager() throws Exception {
    DriverManager.registerDriver(driver);
-
+    try {
    EasyMock.expect(
            driver.connect((String) EasyMock.notNull(), (Properties) EasyMock
                    .notNull())).andReturn(connection);
@ -147,6 +147,11 @@ public class TestJdbcDataSource extends AbstractDataImportHandlerTestCase {
    mockControl.verify();

    assertSame("connection", conn, connection);
+    } catch(Exception e) {
+      throw e;
+    } finally {
+      DriverManager.deregisterDriver(driver);
+    }
  }

  @Test