From 37bb87e70db556218aa90ec852e07a918ca52d3c Mon Sep 17 00:00:00 2001
From: James Dyer <jdyer@apache.org>
Date: Thu, 16 Feb 2012 15:09:13 +0000
Subject: [PATCH] SOLR-2947: fix multi-threaded DIH bug (introduced
 w/SOLR-2382)

git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/trunk@1245014 13f79535-47bb-0310-9956-ffa450edef68
---
 .../solr/handler/dataimport/DocBuilder.java   |  25 +++-
 .../solr/conf/dataimport-cache-ephemeral.xml  |  32 ----
 .../handler/dataimport/DestroyCountCache.java |  21 +++
 .../dataimport/TestEphemeralCache.java        | 140 +++++++++++++-----
 4 files changed, 145 insertions(+), 73 deletions(-)
 delete mode 100644 solr/contrib/dataimporthandler/src/test-files/dih/solr/conf/dataimport-cache-ephemeral.xml
 create mode 100644 solr/contrib/dataimporthandler/src/test/org/apache/solr/handler/dataimport/DestroyCountCache.java

diff --git a/solr/contrib/dataimporthandler/src/java/org/apache/solr/handler/dataimport/DocBuilder.java b/solr/contrib/dataimporthandler/src/java/org/apache/solr/handler/dataimport/DocBuilder.java
index 1badb6ba4c4..e9e544d11da 100644
--- a/solr/contrib/dataimporthandler/src/java/org/apache/solr/handler/dataimport/DocBuilder.java
+++ b/solr/contrib/dataimporthandler/src/java/org/apache/solr/handler/dataimport/DocBuilder.java
@@ -297,20 +297,32 @@ public class DocBuilder {
     addStatusMessage("Rolledback");
   }
 
-  @SuppressWarnings("unchecked")
   private void doFullDump() {
     addStatusMessage("Full Dump Started");
-    if(dataImporter.getConfig().isMultiThreaded && !verboseDebug){
+    if (dataImporter.getConfig().isMultiThreaded && !verboseDebug) {
+      EntityRunner entityRunner = null;
       try {
         LOG.info("running multithreaded full-import");
-        new EntityRunner(root,null).run(null,Context.FULL_DUMP,null);
+        entityRunner =  new EntityRunner(root, null);
+        entityRunner.run(null, Context.FULL_DUMP, null);
       } catch (Exception e) {
         throw new RuntimeException("Error in multi-threaded import", e);
+      } finally {
+        if (entityRunner != null) {
+          List<EntityRunner> closure = new ArrayList<EntityRunner>();
+          closure.add(entityRunner);
+          for (int i = 0; i < closure.size(); i++) {
+            assert(!closure.get(i).entityProcessorWrapper.isEmpty());
+            closure.addAll(closure.get(i).entityProcessorWrapper.iterator().next().children.values());
+          }
+          for (EntityRunner er : closure) {
+            er.entityProcessor.destroy();
+          }
+        }
       }
     } else {
       buildDocument(getVariableResolver(), null, null, root, true, null);
-    }
-
+    }    
   }
 
   @SuppressWarnings("unchecked")
@@ -470,7 +482,6 @@ public class DocBuilder {
           }
         }
       } finally {
-        entityProcessor.destroy();
       }
 
 
@@ -572,7 +583,7 @@ public class DocBuilder {
           }
         }
       }
-    }
+    }    
   }
 
   /**A reverse linked list .
diff --git a/solr/contrib/dataimporthandler/src/test-files/dih/solr/conf/dataimport-cache-ephemeral.xml b/solr/contrib/dataimporthandler/src/test-files/dih/solr/conf/dataimport-cache-ephemeral.xml
deleted file mode 100644
index a8ef928959f..00000000000
--- a/solr/contrib/dataimporthandler/src/test-files/dih/solr/conf/dataimport-cache-ephemeral.xml
+++ /dev/null
@@ -1,32 +0,0 @@
-<dataConfig>
-	<dataSource type="MockDataSource" />
-	<document>
-		<entity 
-			name="PARENT"
-			processor="SqlEntityProcessor"
-			cacheName="PARENT"
-			cachePk="id"			
-			query="SELECT * FROM PARENT"				
-		>
-			<entity
-				name="CHILD_1"
-				processor="SqlEntityProcessor"
-				cacheImpl="SortedMapBackedCache"
-				cacheName="CHILD"
-				cachePk="id"
-				cacheLookup="PARENT.id"
-				fieldNames="id,         child1a_mult_s, child1b_s"
-				fieldTypes="BIGDECIMAL, STRING,         STRING"
-				query="SELECT * FROM CHILD_1"				
-			/>
-			<entity
-				name="CHILD_2"
-				processor="SqlEntityProcessor"
-				cacheImpl="SortedMapBackedCache"
-				cachePk="id"
-				cacheLookup="PARENT.id"
-				query="SELECT * FROM CHILD_2"				
-			/>
-		</entity>
-	</document>
-</dataConfig>
diff --git a/solr/contrib/dataimporthandler/src/test/org/apache/solr/handler/dataimport/DestroyCountCache.java b/solr/contrib/dataimporthandler/src/test/org/apache/solr/handler/dataimport/DestroyCountCache.java
new file mode 100644
index 00000000000..c9a8ca6ed43
--- /dev/null
+++ b/solr/contrib/dataimporthandler/src/test/org/apache/solr/handler/dataimport/DestroyCountCache.java
@@ -0,0 +1,21 @@
+package org.apache.solr.handler.dataimport;
+
+import static org.hamcrest.CoreMatchers.nullValue;
+
+import java.util.IdentityHashMap;
+import java.util.Map;
+
+import org.junit.Assert;
+
+public class DestroyCountCache extends SortedMapBackedCache {
+  static Map<DIHCache,DIHCache> destroyed = new IdentityHashMap<DIHCache,DIHCache>();
+  
+  @Override
+  public void destroy() {
+    super.destroy();
+    Assert.assertThat(destroyed.put(this, this), nullValue());
+  }
+  
+  public DestroyCountCache() {}
+  
+}
\ No newline at end of file
diff --git a/solr/contrib/dataimporthandler/src/test/org/apache/solr/handler/dataimport/TestEphemeralCache.java b/solr/contrib/dataimporthandler/src/test/org/apache/solr/handler/dataimport/TestEphemeralCache.java
index 9645236e338..383c6f7a793 100644
--- a/solr/contrib/dataimporthandler/src/test/org/apache/solr/handler/dataimport/TestEphemeralCache.java
+++ b/solr/contrib/dataimporthandler/src/test/org/apache/solr/handler/dataimport/TestEphemeralCache.java
@@ -4,24 +4,55 @@ import java.math.BigDecimal;
 import java.util.ArrayList;
 import java.util.List;
 
+import static org.hamcrest.CoreMatchers.*;
+import org.junit.Before;
 import org.junit.BeforeClass;
+import org.junit.Ignore;
+import org.junit.Test;
 
 public class TestEphemeralCache extends AbstractDataImportHandlerTestCase {
-
-	@BeforeClass
-	public static void beforeClass() throws Exception {
-		initCore("dataimport-solrconfig.xml", "dataimport-schema.xml");
-	}
-
-	public void testEphemeralCache() throws Exception {
-		List parentRows = new ArrayList();
-		parentRows.add(createMap("id", new BigDecimal("1"), "parent_s", "one"));
-		parentRows.add(createMap("id", new BigDecimal("2"), "parent_s", "two"));
-		parentRows.add(createMap("id", new BigDecimal("3"), "parent_s", "three"));
-		parentRows.add(createMap("id", new BigDecimal("4"), "parent_s", "four"));
-		parentRows.add(createMap("id", new BigDecimal("5"), "parent_s", "five"));
-
-		List child1Rows = new ArrayList();
+  
+  
+  @BeforeClass
+  public static void beforeClass() throws Exception {
+    initCore("dataimport-solrconfig.xml", "dataimport-schema.xml");
+  }
+  
+  @Before
+  public void reset() {
+    DestroyCountCache.destroyed.clear();
+    setupMockData();
+  }
+  
+  @Test
+  public void testSingleThreaded() throws Exception {
+    assertFullImport(getDataConfigDotXml(0));
+  }
+  
+  @Test
+  public void testWithThreadedParamEqualOne() throws Exception {
+    assertFullImport(getDataConfigDotXml(1));
+  }
+  
+  @Ignore("TODO: fix included in SOLR-3011")
+  @Test
+  public void testMultiThreaded() throws Exception {
+    // Try between 2 and 6 threads
+    int numThreads = random.nextInt(4) + 2;
+    System.out.println("TRYING " + numThreads);
+    assertFullImport(getDataConfigDotXml(numThreads));
+  }
+  
+  @SuppressWarnings("unchecked")
+  private void setupMockData() {
+    List parentRows = new ArrayList();
+    parentRows.add(createMap("id", new BigDecimal("1"), "parent_s", "one"));
+    parentRows.add(createMap("id", new BigDecimal("2"), "parent_s", "two"));
+    parentRows.add(createMap("id", new BigDecimal("3"), "parent_s", "three"));
+    parentRows.add(createMap("id", new BigDecimal("4"), "parent_s", "four"));
+    parentRows.add(createMap("id", new BigDecimal("5"), "parent_s", "five"));
+    
+    List child1Rows = new ArrayList();
     child1Rows.add(createMap("id", new BigDecimal("6"), "child1a_mult_s", "this is the number six."));
     child1Rows.add(createMap("id", new BigDecimal("5"), "child1a_mult_s", "this is the number five."));
     child1Rows.add(createMap("id", new BigDecimal("6"), "child1a_mult_s", "let's sing a song of six."));
@@ -32,7 +63,7 @@ public class TestEphemeralCache extends AbstractDataImportHandlerTestCase {
     child1Rows.add(createMap("id", new BigDecimal("1"), "child1a_mult_s", "one"));
     child1Rows.add(createMap("id", new BigDecimal("1"), "child1a_mult_s", "uno"));
     child1Rows.add(createMap("id", new BigDecimal("2"), "child1b_s", "CHILD1B", "child1a_mult_s", "this is the number two."));
-
+    
     List child2Rows = new ArrayList();
     child2Rows.add(createMap("id", new BigDecimal("6"), "child2a_mult_s", "Child 2 says, 'this is the number six.'"));
     child2Rows.add(createMap("id", new BigDecimal("5"), "child2a_mult_s", "Child 2 says, 'this is the number five.'"));
@@ -44,25 +75,66 @@ public class TestEphemeralCache extends AbstractDataImportHandlerTestCase {
     child2Rows.add(createMap("id", new BigDecimal("1"), "child2a_mult_s", "Child 2 says, 'one'"));
     child2Rows.add(createMap("id", new BigDecimal("1"), "child2a_mult_s", "Child 2 says, 'uno'"));
     child2Rows.add(createMap("id", new BigDecimal("2"), "child2a_mult_s", "Child 2 says, 'this is the number two.'"));
-
+    
     MockDataSource.setIterator("SELECT * FROM PARENT", parentRows.iterator());
     MockDataSource.setIterator("SELECT * FROM CHILD_1", child1Rows.iterator());
     MockDataSource.setIterator("SELECT * FROM CHILD_2", child2Rows.iterator());
-
-    runFullImport(loadDataConfig("dataimport-cache-ephemeral.xml"));
-
-    assertQ(req("*:*"),                                       "//*[@numFound='5']");
-    assertQ(req("id:1"),                                      "//*[@numFound='1']");
-    assertQ(req("id:6"),                                      "//*[@numFound='0']");
-    assertQ(req("parent_s:four"),                             "//*[@numFound='1']");
-    assertQ(req("child1a_mult_s:this\\ is\\ the\\ numbe*"),   "//*[@numFound='2']");
-    assertQ(req("child2a_mult_s:Child\\ 2\\ say*"),           "//*[@numFound='4']");
-    assertQ(req("child1b_s:CHILD1B"),                         "//*[@numFound='1']");
-    assertQ(req("child2b_s:CHILD2B"),                         "//*[@numFound='1']");
-    assertQ(req("child1a_mult_s:one"),                        "//*[@numFound='1']");
-    assertQ(req("child1a_mult_s:uno"),                        "//*[@numFound='1']");
-    assertQ(req("child1a_mult_s:(uno OR one)"),               "//*[@numFound='1']");
-
-	}
-
+    
+  }
+  private String getDataConfigDotXml(int numThreads) {
+    return
+      "<dataConfig>" +
+      " <dataSource type=\"MockDataSource\" />" +
+      " <document>" +
+      "   <entity " +
+      "     name=\"PARENT\"" +
+      "     processor=\"SqlEntityProcessor\"" +
+      "     cacheImpl=\"org.apache.solr.handler.dataimport.DestroyCountCache\"" +
+      "     cacheName=\"PARENT\"" +
+      "     query=\"SELECT * FROM PARENT\"  " +
+      (numThreads==0 ? "" : "threads=\"" + numThreads + "\" ") +
+      "   >" +
+      "     <entity" +
+      "       name=\"CHILD_1\"" +
+      "       processor=\"SqlEntityProcessor\"" +
+      "       cacheImpl=\"org.apache.solr.handler.dataimport.DestroyCountCache\"" +
+      "       cacheName=\"CHILD\"" +
+      "       cachePk=\"id\"" +
+      "       cacheLookup=\"PARENT.id\"" +
+      "       fieldNames=\"id,         child1a_mult_s, child1b_s\"" +
+      "       fieldTypes=\"BIGDECIMAL, STRING,         STRING\"" +
+      "       query=\"SELECT * FROM CHILD_1\"       " +
+      "     />" +
+      "     <entity" +
+      "       name=\"CHILD_2\"" +
+      "       processor=\"SqlEntityProcessor\"" +
+      "       cacheImpl=\"org.apache.solr.handler.dataimport.DestroyCountCache\"" +
+      "       cachePk=\"id\"" +
+      "       cacheLookup=\"PARENT.id\"" +
+      "       query=\"SELECT * FROM CHILD_2\"       " +
+      "     />" +
+      "   </entity>" +
+      " </document>" +
+      "</dataConfig>"
+    ;
+  }
+  
+  private void assertFullImport(String dataConfig) throws Exception {
+    runFullImport(dataConfig);
+    
+    assertQ(req("*:*"), "//*[@numFound='5']");
+    assertQ(req("id:1"), "//*[@numFound='1']");
+    assertQ(req("id:6"), "//*[@numFound='0']");
+    assertQ(req("parent_s:four"), "//*[@numFound='1']");
+    assertQ(req("child1a_mult_s:this\\ is\\ the\\ numbe*"), "//*[@numFound='2']");
+    assertQ(req("child2a_mult_s:Child\\ 2\\ say*"), "//*[@numFound='4']");
+    assertQ(req("child1b_s:CHILD1B"), "//*[@numFound='1']");
+    assertQ(req("child2b_s:CHILD2B"), "//*[@numFound='1']");
+    assertQ(req("child1a_mult_s:one"), "//*[@numFound='1']");
+    assertQ(req("child1a_mult_s:uno"), "//*[@numFound='1']");
+    assertQ(req("child1a_mult_s:(uno OR one)"), "//*[@numFound='1']");
+    
+    assertThat(DestroyCountCache.destroyed.size(), is(3));
+  }
+  
 }