SOLR-3122: allow interrupting zkcmdoperations and some test work

git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/trunk@1244231 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
Mark Robert Miller 2012-02-14 21:58:16 +00:00
parent 3ba23367ea
commit 56f12201b7
6 changed files with 198 additions and 52 deletions

View File

@ -28,6 +28,7 @@ import org.apache.solr.client.solrj.impl.CommonsHttpSolrServer;
import org.apache.solr.client.solrj.request.CoreAdminRequest.PrepRecovery; import org.apache.solr.client.solrj.request.CoreAdminRequest.PrepRecovery;
import org.apache.solr.common.SolrException; import org.apache.solr.common.SolrException;
import org.apache.solr.common.SolrException.ErrorCode; import org.apache.solr.common.SolrException.ErrorCode;
import org.apache.solr.common.cloud.SafeStopThread;
import org.apache.solr.common.cloud.ZkCoreNodeProps; import org.apache.solr.common.cloud.ZkCoreNodeProps;
import org.apache.solr.common.cloud.ZkNodeProps; import org.apache.solr.common.cloud.ZkNodeProps;
import org.apache.solr.common.cloud.ZkStateReader; import org.apache.solr.common.cloud.ZkStateReader;
@ -46,7 +47,7 @@ import org.apache.solr.update.UpdateLog.RecoveryInfo;
import org.slf4j.Logger; import org.slf4j.Logger;
import org.slf4j.LoggerFactory; import org.slf4j.LoggerFactory;
public class RecoveryStrategy extends Thread { public class RecoveryStrategy extends Thread implements SafeStopThread {
private static final int MAX_RETRIES = 500; private static final int MAX_RETRIES = 500;
private static final int INTERRUPTED = MAX_RETRIES + 1; private static final int INTERRUPTED = MAX_RETRIES + 1;
private static final int START_TIMEOUT = 100; private static final int START_TIMEOUT = 100;
@ -191,8 +192,14 @@ public class RecoveryStrategy extends Thread {
} }
log.info("Sync Recovery was not successful - trying replication"); log.info("Sync Recovery was not successful - trying replication");
UpdateLog ulog = core.getUpdateHandler().getUpdateLog(); UpdateLog ulog = core.getUpdateHandler().getUpdateLog();
if (ulog == null) return; if (ulog == null) {
SolrException.log(log, "No UpdateLog found - cannot recover");
recoveryFailed(core, zkController, baseUrl, coreZkNodeName,
core.getCoreDescriptor());
return;
}
log.info("Begin buffering updates");
ulog.bufferUpdates(); ulog.bufferUpdates();
replayed = false; replayed = false;
@ -296,4 +303,8 @@ public class RecoveryStrategy extends Thread {
return future; return future;
} }
public boolean isClosed() {
return close;
}
} }

View File

@ -18,15 +18,23 @@ package org.apache.solr.cloud;
*/ */
import java.io.File; import java.io.File;
import java.util.Map;
import org.apache.solr.BaseDistributedSearchTestCase; import org.apache.solr.BaseDistributedSearchTestCase;
import org.apache.solr.client.solrj.embedded.JettySolrRunner; import org.apache.solr.client.solrj.embedded.JettySolrRunner;
import org.apache.solr.common.cloud.CloudState;
import org.apache.solr.common.cloud.Slice;
import org.apache.solr.common.cloud.SolrZkClient; import org.apache.solr.common.cloud.SolrZkClient;
import org.apache.solr.core.SolrConfig; import org.apache.solr.common.cloud.ZkNodeProps;
import org.apache.solr.common.cloud.ZkStateReader;
import org.apache.zookeeper.KeeperException;
import org.junit.After;
import org.junit.AfterClass; import org.junit.AfterClass;
import org.junit.Before; import org.junit.Before;
public abstract class AbstractDistributedZkTestCase extends BaseDistributedSearchTestCase { public abstract class AbstractDistributedZkTestCase extends BaseDistributedSearchTestCase {
protected static final String DEFAULT_COLLECTION = "collection1";
private static final boolean DEBUG = false; private static final boolean DEBUG = false;
protected ZkTestServer zkServer; protected ZkTestServer zkServer;
@ -44,6 +52,10 @@ public abstract class AbstractDistributedZkTestCase extends BaseDistributedSearc
zkServer.run(); zkServer.run();
System.setProperty("zkHost", zkServer.getZkAddress()); System.setProperty("zkHost", zkServer.getZkAddress());
System.setProperty("enable.update.log", "true");
System.setProperty("remove.version.field", "true");
System
.setProperty("solr.directoryFactory", "solr.StandardDirectoryFactory");
AbstractZkTestCase.buildZooKeeper(zkServer.getZkHost(), zkServer.getZkAddress(), "solrconfig.xml", "schema.xml"); AbstractZkTestCase.buildZooKeeper(zkServer.getZkHost(), zkServer.getZkAddress(), "solrconfig.xml", "schema.xml");
@ -70,8 +82,83 @@ public abstract class AbstractDistributedZkTestCase extends BaseDistributedSearc
shards = sb.toString(); shards = sb.toString();
} }
protected void waitForRecoveriesToFinish(String collection, ZkStateReader zkStateReader, boolean verbose)
throws KeeperException, InterruptedException {
waitForRecoveriesToFinish(collection, zkStateReader, verbose, false);
}
protected void waitForRecoveriesToFinish(String collection,
ZkStateReader zkStateReader, boolean verbose, boolean failOnTimeout)
throws KeeperException, InterruptedException {
boolean cont = true;
int cnt = 0;
while (cont) {
if (verbose) System.out.println("-");
boolean sawLiveRecovering = false;
zkStateReader.updateCloudState(true);
CloudState cloudState = zkStateReader.getCloudState();
Map<String,Slice> slices = cloudState.getSlices(collection);
for (Map.Entry<String,Slice> entry : slices.entrySet()) {
Map<String,ZkNodeProps> shards = entry.getValue().getShards();
for (Map.Entry<String,ZkNodeProps> shard : shards.entrySet()) {
if (verbose) System.out.println("rstate:"
+ shard.getValue().get(ZkStateReader.STATE_PROP)
+ " live:"
+ cloudState.liveNodesContain(shard.getValue().get(
ZkStateReader.NODE_NAME_PROP)));
String state = shard.getValue().get(ZkStateReader.STATE_PROP);
if ((state.equals(ZkStateReader.RECOVERING) || state
.equals(ZkStateReader.SYNC))
&& cloudState.liveNodesContain(shard.getValue().get(
ZkStateReader.NODE_NAME_PROP))) {
sawLiveRecovering = true;
}
}
}
if (!sawLiveRecovering || cnt == 15) {
if (!sawLiveRecovering) {
if (verbose) System.out.println("no one is recoverying");
} else {
if (failOnTimeout) {
fail("There are still nodes recoverying");
return;
}
if (verbose) System.out
.println("gave up waiting for recovery to finish..");
}
cont = false;
} else {
Thread.sleep(2000);
}
cnt++;
}
}
protected void assertAllActive(String collection,ZkStateReader zkStateReader)
throws KeeperException, InterruptedException {
zkStateReader.updateCloudState(true);
CloudState cloudState = zkStateReader.getCloudState();
Map<String,Slice> slices = cloudState.getSlices(collection);
if (slices == null) {
throw new IllegalArgumentException("Cannot find collection:" + collection);
}
for (Map.Entry<String,Slice> entry : slices.entrySet()) {
Map<String,ZkNodeProps> shards = entry.getValue().getShards();
for (Map.Entry<String,ZkNodeProps> shard : shards.entrySet()) {
String state = shard.getValue().get(ZkStateReader.STATE_PROP);
if (!state.equals(ZkStateReader.ACTIVE)) {
fail("Not all shards are ACTIVE");
}
}
}
}
@Override @Override
@After
public void tearDown() throws Exception { public void tearDown() throws Exception {
if (DEBUG) { if (DEBUG) {
printLayout(); printLayout();
@ -79,6 +166,9 @@ public abstract class AbstractDistributedZkTestCase extends BaseDistributedSearc
zkServer.shutdown(); zkServer.shutdown();
System.clearProperty("zkHost"); System.clearProperty("zkHost");
System.clearProperty("collection"); System.clearProperty("collection");
System.clearProperty("enable.update.log");
System.clearProperty("remove.version.field");
System.clearProperty("solr.directoryFactory");
System.clearProperty("solr.test.sys.prop1"); System.clearProperty("solr.test.sys.prop1");
System.clearProperty("solr.test.sys.prop2"); System.clearProperty("solr.test.sys.prop2");
resetExceptionIgnores(); resetExceptionIgnores();
@ -93,7 +183,5 @@ public abstract class AbstractDistributedZkTestCase extends BaseDistributedSearc
@AfterClass @AfterClass
public static void afterClass() throws InterruptedException { public static void afterClass() throws InterruptedException {
// wait just a bit for any zk client threads to outlast timeout
Thread.sleep(2000);
} }
} }

View File

@ -62,7 +62,8 @@ public class BasicDistributedZkTest extends AbstractDistributedZkTestCase {
private Map<String,List<SolrServer>> otherCollectionClients = new HashMap<String,List<SolrServer>>(); private Map<String,List<SolrServer>> otherCollectionClients = new HashMap<String,List<SolrServer>>();
private Map<String,List<SolrServer>> oneInstanceCollectionClients = new HashMap<String,List<SolrServer>>(); private Map<String,List<SolrServer>> oneInstanceCollectionClients = new HashMap<String,List<SolrServer>>();
private String oneInstanceCollection = "oneInstanceCollection";; private String oneInstanceCollection = "oneInstanceCollection";
private String oneInstanceCollection2 = "oneInstanceCollection2";
public BasicDistributedZkTest() { public BasicDistributedZkTest() {
fixShardCount = true; fixShardCount = true;
@ -247,12 +248,63 @@ public class BasicDistributedZkTest extends AbstractDistributedZkTestCase {
testMultipleCollections(); testMultipleCollections();
testANewCollectionInOneInstance(); testANewCollectionInOneInstance();
testSearchByCollectionName(); testSearchByCollectionName();
testANewCollectionInOneInstanceWithManualShardAssignement();
// Thread.sleep(10000000000L); // Thread.sleep(10000000000L);
if (DEBUG) { if (DEBUG) {
super.printLayout(); super.printLayout();
} }
} }
private void testANewCollectionInOneInstanceWithManualShardAssignement() throws Exception {
List<SolrServer> collectionClients = new ArrayList<SolrServer>();
SolrServer client = clients.get(0);
oneInstanceCollectionClients.put(oneInstanceCollection , collectionClients);
String baseUrl = ((CommonsHttpSolrServer) client).getBaseURL();
createCollection(oneInstanceCollection2, collectionClients, baseUrl, 1, "slice1");
createCollection(oneInstanceCollection2, collectionClients, baseUrl, 2, "slice2");
createCollection(oneInstanceCollection2, collectionClients, baseUrl, 3, "slice2");
createCollection(oneInstanceCollection2, collectionClients, baseUrl, 4, "slice1");
SolrServer client1 = createNewSolrServer(oneInstanceCollection2 + "1", baseUrl);
SolrServer client2 = createNewSolrServer(oneInstanceCollection2 + "2", baseUrl);
SolrServer client3 = createNewSolrServer(oneInstanceCollection2 + "3", baseUrl);
SolrServer client4 = createNewSolrServer(oneInstanceCollection2 + "4", baseUrl);
client2.add(getDoc(id, "1"));
client3.add(getDoc(id, "2"));
client4.add(getDoc(id, "3"));
// no one should be recovering
waitForRecoveriesToFinish(oneInstanceCollection2, solrj.getZkStateReader(), false, true);
assertAllActive(oneInstanceCollection2, solrj.getZkStateReader());
client1.commit();
SolrQuery query = new SolrQuery("*:*");
query.set("distrib", false);
long oneDocs = client1.query(query).getResults().getNumFound();
long twoDocs = client2.query(query).getResults().getNumFound();
long threeDocs = client3.query(query).getResults().getNumFound();
long fourDocs = client4.query(query).getResults().getNumFound();
query.set("collection", oneInstanceCollection2);
query.set("distrib", true);
long allDocs = solrj.query(query).getResults().getNumFound();
// System.out.println("1:" + oneDocs);
// System.out.println("2:" + twoDocs);
// System.out.println("3:" + threeDocs);
// System.out.println("4:" + fourDocs);
// System.out.println("All Docs:" + allDocs);
assertEquals(oneDocs, threeDocs);
assertEquals(twoDocs, fourDocs);
assertNotSame(oneDocs, twoDocs);
assertEquals(3, allDocs);
}
private void testSearchByCollectionName() throws SolrServerException { private void testSearchByCollectionName() throws SolrServerException {
SolrServer client = clients.get(0); SolrServer client = clients.get(0);
String baseUrl = ((CommonsHttpSolrServer) client).getBaseURL(); String baseUrl = ((CommonsHttpSolrServer) client).getBaseURL();
@ -280,6 +332,9 @@ public class BasicDistributedZkTest extends AbstractDistributedZkTestCase {
SolrServer client3 = createNewSolrServer(oneInstanceCollection + "3", baseUrl); SolrServer client3 = createNewSolrServer(oneInstanceCollection + "3", baseUrl);
SolrServer client4 = createNewSolrServer(oneInstanceCollection + "4", baseUrl); SolrServer client4 = createNewSolrServer(oneInstanceCollection + "4", baseUrl);
waitForRecoveriesToFinish(oneInstanceCollection, solrj.getZkStateReader(), false);
assertAllActive(oneInstanceCollection, solrj.getZkStateReader());
client2.add(getDoc(id, "1")); client2.add(getDoc(id, "1"));
client3.add(getDoc(id, "2")); client3.add(getDoc(id, "2"));
client4.add(getDoc(id, "3")); client4.add(getDoc(id, "3"));
@ -311,6 +366,12 @@ public class BasicDistributedZkTest extends AbstractDistributedZkTestCase {
private void createCollection(String collection, private void createCollection(String collection,
List<SolrServer> collectionClients, String baseUrl, int num) List<SolrServer> collectionClients, String baseUrl, int num)
throws MalformedURLException, SolrServerException, IOException { throws MalformedURLException, SolrServerException, IOException {
createCollection(collection, collectionClients, baseUrl, num, null);
}
private void createCollection(String collection,
List<SolrServer> collectionClients, String baseUrl, int num, String shardId)
throws MalformedURLException, SolrServerException, IOException {
CommonsHttpSolrServer server = new CommonsHttpSolrServer( CommonsHttpSolrServer server = new CommonsHttpSolrServer(
baseUrl); baseUrl);
Create createCmd = new Create(); Create createCmd = new Create();
@ -319,6 +380,7 @@ public class BasicDistributedZkTest extends AbstractDistributedZkTestCase {
createCmd.setNumShards(2); createCmd.setNumShards(2);
createCmd.setDataDir(dataDir.getAbsolutePath() + File.separator createCmd.setDataDir(dataDir.getAbsolutePath() + File.separator
+ collection + num); + collection + num);
createCmd.setShardId(shardId);
server.request(createCmd); server.request(createCmd);
collectionClients.add(createNewSolrServer(collection, baseUrl)); collectionClients.add(createNewSolrServer(collection, baseUrl));
} }
@ -389,12 +451,13 @@ public class BasicDistributedZkTest extends AbstractDistributedZkTestCase {
throws MalformedURLException, SolrServerException, IOException { throws MalformedURLException, SolrServerException, IOException {
List<SolrServer> collectionClients = new ArrayList<SolrServer>(); List<SolrServer> collectionClients = new ArrayList<SolrServer>();
otherCollectionClients.put(collection, collectionClients); otherCollectionClients.put(collection, collectionClients);
int unique = 0;
for (SolrServer client : clients) { for (SolrServer client : clients) {
CommonsHttpSolrServer server = new CommonsHttpSolrServer( CommonsHttpSolrServer server = new CommonsHttpSolrServer(
((CommonsHttpSolrServer) client).getBaseURL()); ((CommonsHttpSolrServer) client).getBaseURL());
Create createCmd = new Create(); Create createCmd = new Create();
createCmd.setCoreName(collection); createCmd.setCoreName(collection);
createCmd.setDataDir(dataDir.getAbsolutePath() + File.separator + collection); createCmd.setDataDir(dataDir.getAbsolutePath() + File.separator + collection + unique++);
server.request(createCmd); server.request(createCmd);
collectionClients.add(createNewSolrServer(collection, collectionClients.add(createNewSolrServer(collection,
((CommonsHttpSolrServer) client).getBaseURL())); ((CommonsHttpSolrServer) client).getBaseURL()));

View File

@ -65,8 +65,6 @@ public class FullSolrCloudTest extends AbstractDistributedZkTestCase {
private static final String SHARD2 = "shard2"; private static final String SHARD2 = "shard2";
protected static final String DEFAULT_COLLECTION = "collection1";
private boolean printLayoutOnTearDown = false; private boolean printLayoutOnTearDown = false;
String t1 = "a_t"; String t1 = "a_t";
@ -151,16 +149,12 @@ public class FullSolrCloudTest extends AbstractDistributedZkTestCase {
System System
.setProperty("solr.directoryFactory", "solr.StandardDirectoryFactory"); .setProperty("solr.directoryFactory", "solr.StandardDirectoryFactory");
System.setProperty("solrcloud.update.delay", "0"); System.setProperty("solrcloud.update.delay", "0");
System.setProperty("enable.update.log", "true");
System.setProperty("remove.version.field", "true");
} }
@AfterClass @AfterClass
public static void afterClass() { public static void afterClass() {
System.clearProperty("solr.directoryFactory"); System.clearProperty("solr.directoryFactory");
System.clearProperty("solrcloud.update.delay"); System.clearProperty("solrcloud.update.delay");
System.clearProperty("enable.update.log");
System.clearProperty("remove.version.field");
} }
public FullSolrCloudTest() { public FullSolrCloudTest() {
@ -655,45 +649,7 @@ public class FullSolrCloudTest extends AbstractDistributedZkTestCase {
protected void waitForRecoveriesToFinish(boolean verbose) protected void waitForRecoveriesToFinish(boolean verbose)
throws KeeperException, InterruptedException { throws KeeperException, InterruptedException {
boolean cont = true; super.waitForRecoveriesToFinish(DEFAULT_COLLECTION, zkStateReader, verbose);
int cnt = 0;
while (cont) {
if (verbose) System.out.println("-");
boolean sawLiveRecovering = false;
zkStateReader.updateCloudState(true);
CloudState cloudState = zkStateReader.getCloudState();
Map<String,Slice> slices = cloudState.getSlices(DEFAULT_COLLECTION);
for (Map.Entry<String,Slice> entry : slices.entrySet()) {
Map<String,ZkNodeProps> shards = entry.getValue().getShards();
for (Map.Entry<String,ZkNodeProps> shard : shards.entrySet()) {
if (verbose) System.out.println("rstate:"
+ shard.getValue().get(ZkStateReader.STATE_PROP)
+ " live:"
+ cloudState.liveNodesContain(shard.getValue().get(
ZkStateReader.NODE_NAME_PROP)));
String state = shard.getValue().get(ZkStateReader.STATE_PROP);
if ((state.equals(ZkStateReader.RECOVERING)
|| state.equals(ZkStateReader.SYNC))
&& cloudState.liveNodesContain(shard.getValue().get(
ZkStateReader.NODE_NAME_PROP))) {
sawLiveRecovering = true;
}
}
}
if (!sawLiveRecovering || cnt == 10) {
if (!sawLiveRecovering) {
if (verbose) System.out.println("no one is recoverying");
} else {
if (verbose) System.out
.println("gave up waiting for recovery to finish..");
}
cont = false;
} else {
Thread.sleep(2000);
}
cnt++;
}
} }
private void brindDownShardIndexSomeDocsAndRecover() throws Exception, private void brindDownShardIndexSomeDocsAndRecover() throws Exception,

View File

@ -0,0 +1,23 @@
package org.apache.solr.common.cloud;
/**
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
public interface SafeStopThread {
public void stop();
public boolean isClosed();
}

View File

@ -71,6 +71,11 @@ public class ZkCmdExecutor {
Thread.currentThread().interrupt(); Thread.currentThread().interrupt();
throw new InterruptedException(); throw new InterruptedException();
} }
if (Thread.currentThread() instanceof SafeStopThread) {
if (((SafeStopThread) Thread.currentThread()).isClosed()) {
throw new RuntimeException("Interrupted");
}
}
retryDelay(i); retryDelay(i);
} }
} }