SOLR-12801: Make massive improvements to the tests.

SOLR-12804: Remove static modifier from Overseer queue access.

SOLR-12896: Introduce more checks for shutdown and closed to improve clean close and shutdown. (Partial)

SOLR-12897: Introduce AlreadyClosedException to clean up silly close / shutdown logging. (Partial)

SOLR-12898: Replace cluster state polling with ZkStateReader#waitFor. (Partial)

SOLR-12923: The new AutoScaling tests are way too flaky and need special attention. (Partial)

SOLR-12932: ant test (without badapples=false) should pass easily for developers. (Partial)

SOLR-12933: Fix SolrCloud distributed commit.
This commit is contained in:
markrmiller 2018-11-29 11:58:18 -06:00
parent 81c092d826
commit 75b1831967
349 changed files with 6840 additions and 4129 deletions

View File

@ -91,4 +91,8 @@ grant {
permission javax.security.auth.kerberos.ServicePermission "HTTP/127.0.0.1@EXAMPLE.COM", "accept"; permission javax.security.auth.kerberos.ServicePermission "HTTP/127.0.0.1@EXAMPLE.COM", "accept";
permission javax.security.auth.kerberos.DelegationPermission "\"HTTP/127.0.0.1@EXAMPLE.COM\" \"krbtgt/EXAMPLE.COM@EXAMPLE.COM\""; permission javax.security.auth.kerberos.DelegationPermission "\"HTTP/127.0.0.1@EXAMPLE.COM\" \"krbtgt/EXAMPLE.COM@EXAMPLE.COM\"";
// java 8 accessibility requires this perm - should not after 8 I believe (rrd4j is the root reason we hit an accessibility code path)
permission java.awt.AWTPermission "listenToAllAWTEvents";
permission java.awt.AWTPermission "accessEventQueue";
}; };

View File

@ -131,16 +131,15 @@ New Features
---------------------- ----------------------
(No Changes) (No Changes)
Other Changes
----------------------
* SOLR-12972: deprecate unused SolrIndexConfig.luceneVersion (Christine Poerschke)
Bug Fixes Bug Fixes
---------------------- ----------------------
* SOLR-12546: CVSResponseWriter omits useDocValuesAsStored=true field when fl=* * SOLR-12546: CVSResponseWriter omits useDocValuesAsStored=true field when fl=*
(Munendra S N via Mikhail Khludnev) (Munendra S N via Mikhail Khludnev)
* SOLR-12933: Fix SolrCloud distributed commit. (Mark Miller)
Improvements Improvements
---------------------- ----------------------
@ -149,6 +148,25 @@ Improvements
* SOLR-12992: When using binary format, ExportWriter to directly copy BytesRef instead of * SOLR-12992: When using binary format, ExportWriter to directly copy BytesRef instead of
creating new String (noble) creating new String (noble)
* SOLR-12898: Replace cluster state polling with ZkStateReader#waitFor. (Mark Miller)
* SOLR-12897: Introduce AlreadyClosedException to clean up silly close / shutdown logging. (Mark Miller)
* SOLR-12896: Introduce more checks for shutdown and closed to improve clean close and shutdown. (Mark Miller)
* SOLR-12804: Remove static modifier from Overseer queue access. (Mark Miller)
Other Changes
----------------------
* SOLR-12972: deprecate unused SolrIndexConfig.luceneVersion (Christine Poerschke)
* SOLR-12801: Make massive improvements to the tests. (Mark Miller)
* SOLR-12923: The new AutoScaling tests are way too flaky and need special attention. (Mark Miller)
* SOLR-12932: ant test (without badapples=false) should pass easily for developers. (Mark Miller)
================== 7.6.0 ================== ================== 7.6.0 ==================
Consult the LUCENE_CHANGES.txt file for additional, low level, changes in this release. Consult the LUCENE_CHANGES.txt file for additional, low level, changes in this release.

View File

@ -20,6 +20,7 @@ import java.io.IOException;
import java.util.ArrayList; import java.util.ArrayList;
import java.util.Collections; import java.util.Collections;
import java.util.HashSet; import java.util.HashSet;
import java.util.concurrent.TimeoutException;
import org.apache.solr.analytics.util.AnalyticsResponseHeadings; import org.apache.solr.analytics.util.AnalyticsResponseHeadings;
import org.apache.solr.analytics.util.MedianCalculator; import org.apache.solr.analytics.util.MedianCalculator;
@ -29,11 +30,11 @@ import org.apache.solr.client.solrj.request.CollectionAdminRequest;
import org.apache.solr.client.solrj.request.QueryRequest; import org.apache.solr.client.solrj.request.QueryRequest;
import org.apache.solr.client.solrj.request.UpdateRequest; import org.apache.solr.client.solrj.request.UpdateRequest;
import org.apache.solr.client.solrj.response.QueryResponse; import org.apache.solr.client.solrj.response.QueryResponse;
import org.apache.solr.cloud.AbstractDistribZkTestBase;
import org.apache.solr.cloud.SolrCloudTestCase; import org.apache.solr.cloud.SolrCloudTestCase;
import org.apache.solr.common.params.ModifiableSolrParams; import org.apache.solr.common.params.ModifiableSolrParams;
import org.apache.solr.common.util.NamedList; import org.apache.solr.common.util.NamedList;
import org.junit.BeforeClass; import org.junit.After;
import org.junit.Before;
public class LegacyAbstractAnalyticsCloudTest extends SolrCloudTestCase { public class LegacyAbstractAnalyticsCloudTest extends SolrCloudTestCase {
@ -41,19 +42,23 @@ public class LegacyAbstractAnalyticsCloudTest extends SolrCloudTestCase {
protected static final int TIMEOUT = DEFAULT_TIMEOUT; protected static final int TIMEOUT = DEFAULT_TIMEOUT;
protected static final String id = "id"; protected static final String id = "id";
@BeforeClass @Before
public static void setupCollection() throws Exception { public void setupCollection() throws Exception {
configureCluster(4) configureCluster(4)
.addConfig("conf", configset("cloud-analytics")) .addConfig("conf", configset("cloud-analytics"))
.configure(); .configure();
CollectionAdminRequest.createCollection(COLLECTIONORALIAS, "conf", 2, 1).process(cluster.getSolrClient()); CollectionAdminRequest.createCollection(COLLECTIONORALIAS, "conf", 2, 1).process(cluster.getSolrClient());
AbstractDistribZkTestBase.waitForRecoveriesToFinish(COLLECTIONORALIAS, cluster.getSolrClient().getZkStateReader(), cluster.waitForActiveCollection(COLLECTIONORALIAS, 2, 2);
false, true, TIMEOUT);
cleanIndex();
} }
public static void cleanIndex() throws Exception { @After
public void teardownCollection() throws Exception {
cluster.deleteAllCollections();
shutdownCluster();
}
public void cleanIndex() throws Exception {
new UpdateRequest() new UpdateRequest()
.deleteByQuery("*:*") .deleteByQuery("*:*")
.commit(cluster.getSolrClient(), COLLECTIONORALIAS); .commit(cluster.getSolrClient(), COLLECTIONORALIAS);
@ -81,7 +86,7 @@ public class LegacyAbstractAnalyticsCloudTest extends SolrCloudTestCase {
} }
} }
protected NamedList<Object> queryLegacyCloudAnalytics(String[] testParams) throws SolrServerException, IOException, InterruptedException { protected NamedList<Object> queryLegacyCloudAnalytics(String[] testParams) throws SolrServerException, IOException, InterruptedException, TimeoutException {
ModifiableSolrParams params = new ModifiableSolrParams(); ModifiableSolrParams params = new ModifiableSolrParams();
params.set("q", "*:*"); params.set("q", "*:*");
params.set("indent", "true"); params.set("indent", "true");

View File

@ -21,7 +21,7 @@ import java.util.List;
import org.apache.solr.client.solrj.request.UpdateRequest; import org.apache.solr.client.solrj.request.UpdateRequest;
import org.apache.solr.common.util.NamedList; import org.apache.solr.common.util.NamedList;
import org.junit.BeforeClass; import org.junit.Before;
import org.junit.Test; import org.junit.Test;
public class LegacyNoFacetCloudTest extends LegacyAbstractAnalyticsCloudTest { public class LegacyNoFacetCloudTest extends LegacyAbstractAnalyticsCloudTest {
@ -57,16 +57,20 @@ public class LegacyNoFacetCloudTest extends LegacyAbstractAnalyticsCloudTest {
static ArrayList<String> stringTestStart; static ArrayList<String> stringTestStart;
static long stringMissing = 0; static long stringMissing = 0;
@BeforeClass @Before
public static void populate() throws Exception { public void populate() throws Exception {
cleanIndex();
intTestStart = new ArrayList<>(); intTestStart = new ArrayList<>();
longTestStart = new ArrayList<>(); longTestStart = new ArrayList<>();
floatTestStart = new ArrayList<>(); floatTestStart = new ArrayList<>();
doubleTestStart = new ArrayList<>(); doubleTestStart = new ArrayList<>();
dateTestStart = new ArrayList<>(); dateTestStart = new ArrayList<>();
stringTestStart = new ArrayList<>(); stringTestStart = new ArrayList<>();
intMissing = 0;
longMissing = 0;
doubleMissing = 0;
floatMissing = 0;
dateMissing = 0;
stringMissing = 0;
UpdateRequest req = new UpdateRequest(); UpdateRequest req = new UpdateRequest();
for (int j = 0; j < NUM_LOOPS; ++j) { for (int j = 0; j < NUM_LOOPS; ++j) {

View File

@ -24,7 +24,7 @@ import java.util.List;
import org.apache.solr.client.solrj.request.UpdateRequest; import org.apache.solr.client.solrj.request.UpdateRequest;
import org.apache.solr.common.util.NamedList; import org.apache.solr.common.util.NamedList;
import org.junit.Assert; import org.junit.Assert;
import org.junit.BeforeClass; import org.junit.Before;
import org.junit.Test; import org.junit.Test;
@ -85,9 +85,8 @@ public class LegacyFieldFacetCloudTest extends LegacyAbstractAnalyticsFacetCloud
private static ArrayList<ArrayList<Integer>> multiDateTestStart; private static ArrayList<ArrayList<Integer>> multiDateTestStart;
private static ArrayList<Long> multiDateTestMissing; private static ArrayList<Long> multiDateTestMissing;
@BeforeClass @Before
public static void beforeClass() throws Exception { public void beforeTest() throws Exception {
cleanIndex();
//INT //INT
intDateTestStart = new ArrayList<>(); intDateTestStart = new ArrayList<>();

View File

@ -24,7 +24,7 @@ import java.util.List;
import org.apache.solr.client.solrj.request.UpdateRequest; import org.apache.solr.client.solrj.request.UpdateRequest;
import org.apache.solr.common.util.NamedList; import org.apache.solr.common.util.NamedList;
import org.junit.BeforeClass; import org.junit.Before;
import org.junit.Test; import org.junit.Test;
public class LegacyFieldFacetExtrasCloudTest extends LegacyAbstractAnalyticsFacetCloudTest { public class LegacyFieldFacetExtrasCloudTest extends LegacyAbstractAnalyticsFacetCloudTest {
@ -42,9 +42,8 @@ public class LegacyFieldFacetExtrasCloudTest extends LegacyAbstractAnalyticsFace
static ArrayList<ArrayList<Integer>> intDoubleTestStart; static ArrayList<ArrayList<Integer>> intDoubleTestStart;
static ArrayList<ArrayList<Integer>> intStringTestStart; static ArrayList<ArrayList<Integer>> intStringTestStart;
@BeforeClass @Before
public static void beforeClass() throws Exception { public void beforeTest() throws Exception {
cleanIndex();
//INT //INT
intLongTestStart = new ArrayList<>(); intLongTestStart = new ArrayList<>();

View File

@ -22,7 +22,7 @@ import java.util.List;
import org.apache.solr.client.solrj.request.UpdateRequest; import org.apache.solr.client.solrj.request.UpdateRequest;
import org.apache.solr.common.util.NamedList; import org.apache.solr.common.util.NamedList;
import org.junit.BeforeClass; import org.junit.Before;
import org.junit.Test; import org.junit.Test;
public class LegacyQueryFacetCloudTest extends LegacyAbstractAnalyticsFacetCloudTest { public class LegacyQueryFacetCloudTest extends LegacyAbstractAnalyticsFacetCloudTest {
@ -39,9 +39,8 @@ public class LegacyQueryFacetCloudTest extends LegacyAbstractAnalyticsFacetCloud
private static ArrayList<ArrayList<Long>> longTestStart = new ArrayList<>(); private static ArrayList<ArrayList<Long>> longTestStart = new ArrayList<>();
private static ArrayList<ArrayList<Float>> floatTestStart = new ArrayList<>(); private static ArrayList<ArrayList<Float>> floatTestStart = new ArrayList<>();
@BeforeClass @Before
public static void beforeClass() throws Exception { public void beforeTest() throws Exception {
cleanIndex();
//INT //INT
int1TestStart.add(new ArrayList<Integer>()); int1TestStart.add(new ArrayList<Integer>());

View File

@ -21,7 +21,7 @@ import java.util.List;
import org.apache.solr.client.solrj.request.UpdateRequest; import org.apache.solr.client.solrj.request.UpdateRequest;
import org.apache.solr.common.util.NamedList; import org.apache.solr.common.util.NamedList;
import org.junit.BeforeClass; import org.junit.Before;
import org.junit.Test; import org.junit.Test;
@ -44,9 +44,8 @@ public class LegacyRangeFacetCloudTest extends LegacyAbstractAnalyticsFacetCloud
static ArrayList<ArrayList<Float>> floatDoubleTestStart; static ArrayList<ArrayList<Float>> floatDoubleTestStart;
static ArrayList<ArrayList<Float>> floatDateTestStart; static ArrayList<ArrayList<Float>> floatDateTestStart;
@BeforeClass @Before
public static void beforeClass() throws Exception { public void beforeTest() throws Exception {
cleanIndex();
//INT //INT
intLongTestStart = new ArrayList<>(); intLongTestStart = new ArrayList<>();

View File

@ -52,7 +52,7 @@ public class TestContentStreamDataSource extends AbstractDataImportHandlerTestCa
super.setUp(); super.setUp();
instance = new SolrInstance("inst", null); instance = new SolrInstance("inst", null);
instance.setUp(); instance.setUp();
jetty = createJetty(instance); jetty = createAndStartJetty(instance);
} }
@Override @Override
@ -173,7 +173,7 @@ public class TestContentStreamDataSource extends AbstractDataImportHandlerTestCa
} }
private JettySolrRunner createJetty(SolrInstance instance) throws Exception { private JettySolrRunner createAndStartJetty(SolrInstance instance) throws Exception {
Properties nodeProperties = new Properties(); Properties nodeProperties = new Properties();
nodeProperties.setProperty("solr.data.dir", instance.getDataDir()); nodeProperties.setProperty("solr.data.dir", instance.getDataDir());
JettySolrRunner jetty = new JettySolrRunner(instance.getHomeDir(), nodeProperties, buildJettyConfig("/solr")); JettySolrRunner jetty = new JettySolrRunner(instance.getHomeDir(), nodeProperties, buildJettyConfig("/solr"));

View File

@ -127,7 +127,7 @@ public class TestSolrEntityProcessorEndToEnd extends AbstractDataImportHandlerTe
// data source solr instance // data source solr instance
instance = new SolrInstance(); instance = new SolrInstance();
instance.setUp(); instance.setUp();
jetty = createJetty(instance); jetty = createAndStartJetty(instance);
} }
@Override @Override
@ -362,7 +362,7 @@ public class TestSolrEntityProcessorEndToEnd extends AbstractDataImportHandlerTe
} }
} }
private JettySolrRunner createJetty(SolrInstance instance) throws Exception { private JettySolrRunner createAndStartJetty(SolrInstance instance) throws Exception {
Properties nodeProperties = new Properties(); Properties nodeProperties = new Properties();
nodeProperties.setProperty("solr.data.dir", instance.getDataDir()); nodeProperties.setProperty("solr.data.dir", instance.getDataDir());
JettySolrRunner jetty = new JettySolrRunner(instance.getHomeDir(), nodeProperties, buildJettyConfig("/solr")); JettySolrRunner jetty = new JettySolrRunner(instance.getHomeDir(), nodeProperties, buildJettyConfig("/solr"));

View File

@ -26,7 +26,6 @@ import java.util.List;
import java.util.Locale; import java.util.Locale;
import java.util.Map; import java.util.Map;
import org.apache.solr.cloud.AbstractZkTestCase;
import org.apache.solr.cloud.ZkTestServer; import org.apache.solr.cloud.ZkTestServer;
import org.apache.solr.common.params.ModifiableSolrParams; import org.apache.solr.common.params.ModifiableSolrParams;
import org.apache.solr.common.util.SuppressForbidden; import org.apache.solr.common.util.SuppressForbidden;
@ -62,7 +61,7 @@ public class TestZKPropertiesWriter extends AbstractDataImportHandlerTestCase {
System.setProperty("zkHost", zkServer.getZkAddress()); System.setProperty("zkHost", zkServer.getZkAddress());
System.setProperty("jetty.port", "0000"); System.setProperty("jetty.port", "0000");
AbstractZkTestCase.buildZooKeeper(zkServer.getZkHost(), zkServer.getZkAddress(), getFile("dih/solr"), zkServer.buildZooKeeper(getFile("dih/solr"),
"dataimport-solrconfig.xml", "dataimport-schema.xml"); "dataimport-solrconfig.xml", "dataimport-schema.xml");
//initCore("solrconfig.xml", "schema.xml", getFile("dih/solr").getAbsolutePath()); //initCore("solrconfig.xml", "schema.xml", getFile("dih/solr").getAbsolutePath());

View File

@ -18,14 +18,13 @@ package org.apache.solr.ltr;
import java.util.Iterator; import java.util.Iterator;
import java.util.Map; import java.util.Map;
import java.util.concurrent.Executor; import java.util.concurrent.ExecutorService;
import java.util.concurrent.Semaphore; import java.util.concurrent.Semaphore;
import java.util.concurrent.SynchronousQueue;
import java.util.concurrent.TimeUnit;
import org.apache.solr.common.util.ExecutorUtil; import org.apache.solr.common.util.ExecutorUtil;
import org.apache.solr.common.util.NamedList; import org.apache.solr.common.util.NamedList;
import org.apache.solr.util.DefaultSolrThreadFactory; import org.apache.solr.core.CloseHook;
import org.apache.solr.core.SolrCore;
import org.apache.solr.util.SolrPluginUtils; import org.apache.solr.util.SolrPluginUtils;
import org.apache.solr.util.plugin.NamedListInitializedPlugin; import org.apache.solr.util.plugin.NamedListInitializedPlugin;
@ -58,7 +57,7 @@ import org.apache.solr.util.plugin.NamedListInitializedPlugin;
* <code>totalPoolThreads</code> imposes a contention between the queries if * <code>totalPoolThreads</code> imposes a contention between the queries if
* <code>(totalPoolThreads &lt; numThreadsPerRequest * total parallel queries)</code>. * <code>(totalPoolThreads &lt; numThreadsPerRequest * total parallel queries)</code>.
*/ */
final public class LTRThreadModule implements NamedListInitializedPlugin { final public class LTRThreadModule extends CloseHook implements NamedListInitializedPlugin {
public static LTRThreadModule getInstance(NamedList args) { public static LTRThreadModule getInstance(NamedList args) {
@ -103,13 +102,10 @@ final public class LTRThreadModule implements NamedListInitializedPlugin {
// settings // settings
private int totalPoolThreads = 1; private int totalPoolThreads = 1;
private int numThreadsPerRequest = 1; private int numThreadsPerRequest = 1;
private int maxPoolSize = Integer.MAX_VALUE;
private long keepAliveTimeSeconds = 10;
private String threadNamePrefix = "ltrExecutor";
// implementation // implementation
private Semaphore ltrSemaphore; private Semaphore ltrSemaphore;
private Executor createWeightScoreExecutor; private volatile ExecutorService createWeightScoreExecutor;
public LTRThreadModule() { public LTRThreadModule() {
} }
@ -132,13 +128,6 @@ final public class LTRThreadModule implements NamedListInitializedPlugin {
} else { } else {
ltrSemaphore = null; ltrSemaphore = null;
} }
createWeightScoreExecutor = new ExecutorUtil.MDCAwareThreadPoolExecutor(
0,
maxPoolSize,
keepAliveTimeSeconds, TimeUnit.SECONDS, // terminate idle threads after 10 sec
new SynchronousQueue<Runnable>(), // directly hand off tasks
new DefaultSolrThreadFactory(threadNamePrefix)
);
} }
private void validate() { private void validate() {
@ -161,18 +150,6 @@ final public class LTRThreadModule implements NamedListInitializedPlugin {
this.numThreadsPerRequest = numThreadsPerRequest; this.numThreadsPerRequest = numThreadsPerRequest;
} }
public void setMaxPoolSize(int maxPoolSize) {
this.maxPoolSize = maxPoolSize;
}
public void setKeepAliveTimeSeconds(long keepAliveTimeSeconds) {
this.keepAliveTimeSeconds = keepAliveTimeSeconds;
}
public void setThreadNamePrefix(String threadNamePrefix) {
this.threadNamePrefix = threadNamePrefix;
}
public Semaphore createQuerySemaphore() { public Semaphore createQuerySemaphore() {
return (numThreadsPerRequest > 1 ? new Semaphore(numThreadsPerRequest) : null); return (numThreadsPerRequest > 1 ? new Semaphore(numThreadsPerRequest) : null);
} }
@ -189,4 +166,18 @@ final public class LTRThreadModule implements NamedListInitializedPlugin {
createWeightScoreExecutor.execute(command); createWeightScoreExecutor.execute(command);
} }
@Override
public void preClose(SolrCore core) {
ExecutorUtil.shutdownAndAwaitTermination(createWeightScoreExecutor);
}
@Override
public void postClose(SolrCore core) {
}
public void setExecutor(ExecutorService sharedExecutor) {
this.createWeightScoreExecutor = sharedExecutor;
}
} }

View File

@ -204,6 +204,9 @@ public class LTRFeatureLoggerTransformerFactory extends TransformerFactory {
"searcher is null"); "searcher is null");
} }
leafContexts = searcher.getTopReaderContext().leaves(); leafContexts = searcher.getTopReaderContext().leaves();
if (threadManager != null) {
threadManager.setExecutor(context.getRequest().getCore().getCoreContainer().getUpdateShardHandler().getUpdateExecutor());
}
// Setup LTRScoringQuery // Setup LTRScoringQuery
scoringQuery = SolrQueryRequestContextUtils.getScoringQuery(req); scoringQuery = SolrQueryRequestContextUtils.getScoringQuery(req);

View File

@ -162,7 +162,9 @@ public class LTRQParserPlugin extends QParserPlugin implements ResourceLoaderAwa
final String fvStoreName = SolrQueryRequestContextUtils.getFvStoreName(req); final String fvStoreName = SolrQueryRequestContextUtils.getFvStoreName(req);
// Check if features are requested and if the model feature store and feature-transform feature store are the same // Check if features are requested and if the model feature store and feature-transform feature store are the same
final boolean featuresRequestedFromSameStore = (modelFeatureStoreName.equals(fvStoreName) || fvStoreName == null) ? extractFeatures:false; final boolean featuresRequestedFromSameStore = (modelFeatureStoreName.equals(fvStoreName) || fvStoreName == null) ? extractFeatures:false;
if (threadManager != null) {
threadManager.setExecutor(req.getCore().getCoreContainer().getUpdateShardHandler().getUpdateExecutor());
}
final LTRScoringQuery scoringQuery = new LTRScoringQuery(ltrScoringModel, final LTRScoringQuery scoringQuery = new LTRScoringQuery(ltrScoringModel,
extractEFIParams(localParams), extractEFIParams(localParams),
featuresRequestedFromSameStore, threadManager); featuresRequestedFromSameStore, threadManager);

View File

@ -25,7 +25,6 @@ import org.apache.solr.client.solrj.embedded.JettySolrRunner;
import org.apache.solr.client.solrj.request.CollectionAdminRequest; import org.apache.solr.client.solrj.request.CollectionAdminRequest;
import org.apache.solr.client.solrj.response.CollectionAdminResponse; import org.apache.solr.client.solrj.response.CollectionAdminResponse;
import org.apache.solr.client.solrj.response.QueryResponse; import org.apache.solr.client.solrj.response.QueryResponse;
import org.apache.solr.cloud.AbstractDistribZkTestBase;
import org.apache.solr.cloud.MiniSolrCloudCluster; import org.apache.solr.cloud.MiniSolrCloudCluster;
import org.apache.solr.common.SolrInputDocument; import org.apache.solr.common.SolrInputDocument;
import org.apache.solr.common.cloud.ZkStateReader; import org.apache.solr.common.cloud.ZkStateReader;
@ -232,7 +231,7 @@ public class TestLTROnSolrCloud extends TestRerankBase {
fail("Could not create collection. Response" + response.toString()); fail("Could not create collection. Response" + response.toString());
} }
ZkStateReader zkStateReader = solrCluster.getSolrClient().getZkStateReader(); ZkStateReader zkStateReader = solrCluster.getSolrClient().getZkStateReader();
AbstractDistribZkTestBase.waitForRecoveriesToFinish(name, zkStateReader, false, true, 100); solrCluster.waitForActiveCollection(name, numShards, numShards * numReplicas);
} }

View File

@ -39,7 +39,9 @@ public class JettyConfig {
public final SSLConfig sslConfig; public final SSLConfig sslConfig;
private JettyConfig(int port, String context, boolean stopAtShutdown, Long waitForLoadingCoresToFinishMs, Map<ServletHolder, String> extraServlets, public final int portRetryTime;
private JettyConfig(int port, int portRetryTime, String context, boolean stopAtShutdown, Long waitForLoadingCoresToFinishMs, Map<ServletHolder, String> extraServlets,
Map<Class<? extends Filter>, String> extraFilters, SSLConfig sslConfig) { Map<Class<? extends Filter>, String> extraFilters, SSLConfig sslConfig) {
this.port = port; this.port = port;
this.context = context; this.context = context;
@ -48,6 +50,7 @@ public class JettyConfig {
this.extraServlets = extraServlets; this.extraServlets = extraServlets;
this.extraFilters = extraFilters; this.extraFilters = extraFilters;
this.sslConfig = sslConfig; this.sslConfig = sslConfig;
this.portRetryTime = portRetryTime;
} }
public static Builder builder() { public static Builder builder() {
@ -74,6 +77,7 @@ public class JettyConfig {
Map<ServletHolder, String> extraServlets = new TreeMap<>(); Map<ServletHolder, String> extraServlets = new TreeMap<>();
Map<Class<? extends Filter>, String> extraFilters = new LinkedHashMap<>(); Map<Class<? extends Filter>, String> extraFilters = new LinkedHashMap<>();
SSLConfig sslConfig = null; SSLConfig sslConfig = null;
int portRetryTime = 60;
public Builder setPort(int port) { public Builder setPort(int port) {
this.port = port; this.port = port;
@ -122,8 +126,14 @@ public class JettyConfig {
return this; return this;
} }
public Builder withPortRetryTime(int portRetryTime) {
this.portRetryTime = portRetryTime;
return this;
}
public JettyConfig build() { public JettyConfig build() {
return new JettyConfig(port, context, stopAtShutdown, waitForLoadingCoresToFinishMs, extraServlets, extraFilters, sslConfig); return new JettyConfig(port, portRetryTime, context, stopAtShutdown, waitForLoadingCoresToFinishMs, extraServlets, extraFilters, sslConfig);
} }
} }

View File

@ -16,18 +16,9 @@
*/ */
package org.apache.solr.client.solrj.embedded; package org.apache.solr.client.solrj.embedded;
import javax.servlet.DispatcherType;
import javax.servlet.Filter;
import javax.servlet.FilterChain;
import javax.servlet.FilterConfig;
import javax.servlet.ServletException;
import javax.servlet.ServletRequest;
import javax.servlet.ServletResponse;
import javax.servlet.http.HttpServlet;
import javax.servlet.http.HttpServletRequest;
import javax.servlet.http.HttpServletResponse;
import java.io.IOException; import java.io.IOException;
import java.lang.invoke.MethodHandles; import java.lang.invoke.MethodHandles;
import java.net.BindException;
import java.net.MalformedURLException; import java.net.MalformedURLException;
import java.net.URL; import java.net.URL;
import java.util.ArrayList; import java.util.ArrayList;
@ -41,10 +32,24 @@ import java.util.concurrent.TimeUnit;
import java.util.concurrent.atomic.AtomicInteger; import java.util.concurrent.atomic.AtomicInteger;
import java.util.concurrent.atomic.AtomicLong; import java.util.concurrent.atomic.AtomicLong;
import javax.servlet.DispatcherType;
import javax.servlet.Filter;
import javax.servlet.FilterChain;
import javax.servlet.FilterConfig;
import javax.servlet.ServletException;
import javax.servlet.ServletRequest;
import javax.servlet.ServletResponse;
import javax.servlet.http.HttpServlet;
import javax.servlet.http.HttpServletRequest;
import javax.servlet.http.HttpServletResponse;
import org.apache.solr.client.solrj.SolrClient; import org.apache.solr.client.solrj.SolrClient;
import org.apache.solr.client.solrj.cloud.SocketProxy;
import org.apache.solr.client.solrj.impl.HttpSolrClient; import org.apache.solr.client.solrj.impl.HttpSolrClient;
import org.apache.solr.common.util.TimeSource;
import org.apache.solr.core.CoreContainer; import org.apache.solr.core.CoreContainer;
import org.apache.solr.servlet.SolrDispatchFilter; import org.apache.solr.servlet.SolrDispatchFilter;
import org.apache.solr.util.TimeOut;
import org.eclipse.jetty.server.Connector; import org.eclipse.jetty.server.Connector;
import org.eclipse.jetty.server.HttpConfiguration; import org.eclipse.jetty.server.HttpConfiguration;
import org.eclipse.jetty.server.HttpConnectionFactory; import org.eclipse.jetty.server.HttpConnectionFactory;
@ -61,6 +66,7 @@ import org.eclipse.jetty.servlet.Source;
import org.eclipse.jetty.util.component.LifeCycle; import org.eclipse.jetty.util.component.LifeCycle;
import org.eclipse.jetty.util.ssl.SslContextFactory; import org.eclipse.jetty.util.ssl.SslContextFactory;
import org.eclipse.jetty.util.thread.QueuedThreadPool; import org.eclipse.jetty.util.thread.QueuedThreadPool;
import org.eclipse.jetty.util.thread.ReservedThreadExecutor;
import org.slf4j.Logger; import org.slf4j.Logger;
import org.slf4j.LoggerFactory; import org.slf4j.LoggerFactory;
import org.slf4j.MDC; import org.slf4j.MDC;
@ -80,8 +86,8 @@ public class JettySolrRunner {
Server server; Server server;
FilterHolder dispatchFilter; volatile FilterHolder dispatchFilter;
FilterHolder debugFilter; volatile FilterHolder debugFilter;
private boolean waitOnSolr = false; private boolean waitOnSolr = false;
private int jettyPort = -1; private int jettyPort = -1;
@ -98,6 +104,16 @@ public class JettySolrRunner {
private int proxyPort = -1; private int proxyPort = -1;
private final boolean enableProxy;
private SocketProxy proxy;
private String protocol;
private String host;
private volatile boolean started = false;
public static class DebugFilter implements Filter { public static class DebugFilter implements Filter {
private static final Logger log = LoggerFactory.getLogger(MethodHandles.lookup().lookupClass()); private static final Logger log = LoggerFactory.getLogger(MethodHandles.lookup().lookupClass());
@ -200,11 +216,34 @@ public class JettySolrRunner {
* @param config the configuration * @param config the configuration
*/ */
public JettySolrRunner(String solrHome, Properties nodeProperties, JettyConfig config) { public JettySolrRunner(String solrHome, Properties nodeProperties, JettyConfig config) {
this(solrHome, nodeProperties, config, false);
}
/**
* Construct a JettySolrRunner
*
* After construction, you must start the jetty with {@link #start()}
*
* @param solrHome the solrHome to use
* @param nodeProperties the container properties
* @param config the configuration
* @param enableProxy enables proxy feature to disable connections
*/
public JettySolrRunner(String solrHome, Properties nodeProperties, JettyConfig config, boolean enableProxy) {
this.enableProxy = enableProxy;
this.solrHome = solrHome; this.solrHome = solrHome;
this.config = config; this.config = config;
this.nodeProperties = nodeProperties; this.nodeProperties = nodeProperties;
if (enableProxy) {
try {
proxy = new SocketProxy(0, config.sslConfig != null && config.sslConfig.isSSLMode());
} catch (Exception e) {
throw new RuntimeException(e);
}
setProxyPort(proxy.getListenPort());
}
this.init(this.config.port); this.init(this.config.port);
} }
@ -213,7 +252,7 @@ public class JettySolrRunner {
QueuedThreadPool qtp = new QueuedThreadPool(); QueuedThreadPool qtp = new QueuedThreadPool();
qtp.setMaxThreads(THREAD_POOL_MAX_THREADS); qtp.setMaxThreads(THREAD_POOL_MAX_THREADS);
qtp.setIdleTimeout(THREAD_POOL_MAX_IDLE_TIME_MS); qtp.setIdleTimeout(THREAD_POOL_MAX_IDLE_TIME_MS);
qtp.setStopTimeout((int) TimeUnit.MINUTES.toMillis(1)); qtp.setReservedThreads(0);
server = new Server(qtp); server = new Server(qtp);
server.manage(qtp); server.manage(qtp);
server.setStopAtShutdown(config.stopAtShutdown); server.setStopAtShutdown(config.stopAtShutdown);
@ -246,7 +285,7 @@ public class JettySolrRunner {
connector.setPort(port); connector.setPort(port);
connector.setHost("127.0.0.1"); connector.setHost("127.0.0.1");
connector.setIdleTimeout(THREAD_POOL_MAX_IDLE_TIME_MS); connector.setIdleTimeout(THREAD_POOL_MAX_IDLE_TIME_MS);
connector.setStopTimeout(0);
server.setConnectors(new Connector[] {connector}); server.setConnectors(new Connector[] {connector});
server.setSessionIdManager(new DefaultSessionIdManager(server, new Random())); server.setSessionIdManager(new DefaultSessionIdManager(server, new Random()));
} else { } else {
@ -271,10 +310,7 @@ public class JettySolrRunner {
@Override @Override
public void lifeCycleStarting(LifeCycle arg0) { public void lifeCycleStarting(LifeCycle arg0) {
synchronized (JettySolrRunner.this) {
waitOnSolr = true;
JettySolrRunner.this.notify();
}
} }
@Override @Override
@ -306,6 +342,11 @@ public class JettySolrRunner {
dispatchFilter.setHeldClass(SolrDispatchFilter.class); dispatchFilter.setHeldClass(SolrDispatchFilter.class);
dispatchFilter.setInitParameter("excludePatterns", excludePatterns); dispatchFilter.setInitParameter("excludePatterns", excludePatterns);
root.addFilter(dispatchFilter, "*", EnumSet.of(DispatcherType.REQUEST)); root.addFilter(dispatchFilter, "*", EnumSet.of(DispatcherType.REQUEST));
synchronized (JettySolrRunner.this) {
waitOnSolr = true;
JettySolrRunner.this.notify();
}
} }
@Override @Override
@ -344,15 +385,19 @@ public class JettySolrRunner {
} }
public String getNodeName() { public String getNodeName() {
if (getCoreContainer() == null) {
return null;
}
return getCoreContainer().getZkController().getNodeName(); return getCoreContainer().getZkController().getNodeName();
} }
public boolean isRunning() { public boolean isRunning() {
return server.isRunning(); return server.isRunning() && dispatchFilter != null && dispatchFilter.isRunning();
} }
public boolean isStopped() { public boolean isStopped() {
return server.isStopped(); return (server.isStopped() && dispatchFilter == null) || (server.isStopped() && dispatchFilter.isStopped()
&& ((QueuedThreadPool) server.getThreadPool()).isStopped());
} }
// ------------------------------------------------------------------------------------------------ // ------------------------------------------------------------------------------------------------
@ -382,31 +427,53 @@ public class JettySolrRunner {
// Do not let Jetty/Solr pollute the MDC for this thread // Do not let Jetty/Solr pollute the MDC for this thread
Map<String, String> prevContext = MDC.getCopyOfContextMap(); Map<String, String> prevContext = MDC.getCopyOfContextMap();
MDC.clear(); MDC.clear();
log.info("Start Jetty (original configured port={})", this.config.port);
try { try {
int port = reusePort && jettyPort != -1 ? jettyPort : this.config.port;
// if started before, make a new server // if started before, make a new server
if (startedBefore) { if (startedBefore) {
waitOnSolr = false; waitOnSolr = false;
int port = reusePort ? jettyPort : this.config.port;
init(port); init(port);
} else { } else {
startedBefore = true; startedBefore = true;
} }
if (!server.isRunning()) { if (!server.isRunning()) {
if (config.portRetryTime > 0) {
retryOnPortBindFailure(config.portRetryTime, port);
} else {
server.start(); server.start();
} }
}
synchronized (JettySolrRunner.this) { synchronized (JettySolrRunner.this) {
int cnt = 0; int cnt = 0;
while (!waitOnSolr) { while (!waitOnSolr || !dispatchFilter.isRunning() || getCoreContainer() == null) {
this.wait(100); this.wait(100);
if (cnt++ == 5) { if (cnt++ == 15) {
throw new RuntimeException("Jetty/Solr unresponsive"); throw new RuntimeException("Jetty/Solr unresponsive");
} }
} }
} }
if (config.waitForLoadingCoresToFinishMs != null && config.waitForLoadingCoresToFinishMs > 0L) waitForLoadingCoresToFinish(config.waitForLoadingCoresToFinishMs); if (config.waitForLoadingCoresToFinishMs != null && config.waitForLoadingCoresToFinishMs > 0L) {
waitForLoadingCoresToFinish(config.waitForLoadingCoresToFinishMs);
}
setProtocolAndHost();
if (enableProxy) {
if (started) {
proxy.reopen();
} else {
proxy.open(getBaseUrl().toURI());
}
}
} finally { } finally {
started = true;
if (prevContext != null) { if (prevContext != null) {
MDC.setContextMap(prevContext); MDC.setContextMap(prevContext);
} else { } else {
@ -415,6 +482,43 @@ public class JettySolrRunner {
} }
} }
private void setProtocolAndHost() {
String protocol = null;
Connector[] conns = server.getConnectors();
if (0 == conns.length) {
throw new IllegalStateException("Jetty Server has no Connectors");
}
ServerConnector c = (ServerConnector) conns[0];
protocol = c.getDefaultProtocol().startsWith("SSL") ? "https" : "http";
this.protocol = protocol;
this.host = c.getHost();
}
private void retryOnPortBindFailure(int portRetryTime, int port) throws Exception, InterruptedException {
TimeOut timeout = new TimeOut(portRetryTime, TimeUnit.SECONDS, TimeSource.NANO_TIME);
int tryCnt = 1;
while (true) {
try {
log.info("Trying to start Jetty on port {} try number {} ...", port, tryCnt++);
server.start();
break;
} catch (BindException e) {
log.info("Port is in use, will try again until timeout of " + timeout);
server.stop();
Thread.sleep(3000);
if (!timeout.hasTimedOut()) {
continue;
}
throw e;
}
}
}
/** /**
* Stop the Jetty server * Stop the Jetty server
* *
@ -427,6 +531,28 @@ public class JettySolrRunner {
try { try {
Filter filter = dispatchFilter.getFilter(); Filter filter = dispatchFilter.getFilter();
// we want to shutdown outside of jetty cutting us off
SolrDispatchFilter sdf = getSolrDispatchFilter();
Thread shutdownThead = null;
if (sdf != null) {
shutdownThead = new Thread() {
public void run() {
try {
sdf.close();
} catch (Throwable t) {
log.error("Error shutting down Solr", t);
}
}
};
sdf.closeOnDestroy(false);
shutdownThead.start();
}
QueuedThreadPool qtp = (QueuedThreadPool) server.getThreadPool();
ReservedThreadExecutor rte = qtp.getBean(ReservedThreadExecutor.class);
server.stop(); server.stop();
if (server.getState().equals(Server.FAILED)) { if (server.getState().equals(Server.FAILED)) {
@ -438,8 +564,47 @@ public class JettySolrRunner {
} }
} }
// stop timeout is 0, so we will interrupt right away
while(!qtp.isStopped()) {
qtp.stop();
if (qtp.isStopped()) {
Thread.sleep(50);
}
}
// we tried to kill everything, now we wait for executor to stop
qtp.setStopTimeout(Integer.MAX_VALUE);
qtp.stop();
qtp.join();
if (rte != null) {
// we try and wait for the reserved thread executor, but it doesn't always seem to work
// so we actually set 0 reserved threads at creation
rte.stop();
TimeOut timeout = new TimeOut(30, TimeUnit.SECONDS, TimeSource.NANO_TIME);
timeout.waitFor("Timeout waiting for reserved executor to stop.", ()
-> rte.isStopped());
}
if (shutdownThead != null) {
shutdownThead.join();
}
do {
try {
server.join(); server.join();
} catch (InterruptedException e) {
// ignore
}
} while (!server.isStopped());
} finally { } finally {
if (enableProxy) {
proxy.close();
}
if (prevContext != null) { if (prevContext != null) {
MDC.setContextMap(prevContext); MDC.setContextMap(prevContext);
} else { } else {
@ -461,15 +626,30 @@ public class JettySolrRunner {
return ((ServerConnector) conns[0]).getLocalPort(); return ((ServerConnector) conns[0]).getLocalPort();
} }
/** /**
* Returns the Local Port of the jetty Server. * Returns the Local Port of the jetty Server.
* *
* @exception RuntimeException if there is no Connector * @exception RuntimeException if there is no Connector
*/ */
public int getLocalPort() { public int getLocalPort() {
return getLocalPort(false);
}
/**
* Returns the Local Port of the jetty Server.
*
* @param internalPort pass true to get the true jetty port rather than the proxy port if configured
*
* @exception RuntimeException if there is no Connector
*/
public int getLocalPort(boolean internalPort) {
if (jettyPort == -1) { if (jettyPort == -1) {
throw new IllegalStateException("You cannot get the port until this instance has started"); throw new IllegalStateException("You cannot get the port until this instance has started");
} }
if (internalPort ) {
return jettyPort;
}
return (proxyPort != -1) ? proxyPort : jettyPort; return (proxyPort != -1) ? proxyPort : jettyPort;
} }
@ -487,23 +667,21 @@ public class JettySolrRunner {
* Connector in use by the Jetty Server contained in this runner. * Connector in use by the Jetty Server contained in this runner.
*/ */
public URL getBaseUrl() { public URL getBaseUrl() {
String protocol = null;
try { try {
Connector[] conns = server.getConnectors(); return new URL(protocol, host, jettyPort, config.context);
if (0 == conns.length) {
throw new IllegalStateException("Jetty Server has no Connectors");
}
ServerConnector c = (ServerConnector) conns[0];
if (c.getLocalPort() < 0) {
throw new IllegalStateException("Jetty Connector is not open: " +
c.getLocalPort());
}
protocol = c.getDefaultProtocol().startsWith("SSL") ? "https" : "http";
return new URL(protocol, c.getHost(), c.getLocalPort(), config.context);
} catch (MalformedURLException e) { } catch (MalformedURLException e) {
throw new IllegalStateException throw new RuntimeException(e);
("Java could not make sense of protocol: " + protocol, e); }
}
/**
* Returns a base URL consisting of the protocol, host, and port for a
* Connector in use by the Jetty Server contained in this runner.
*/
public URL getProxyBaseUrl() {
try {
return new URL(protocol, host, getLocalPort(), config.context);
} catch (MalformedURLException e) {
throw new RuntimeException(e);
} }
} }
@ -568,7 +746,11 @@ public class JettySolrRunner {
CoreContainer cores = solrFilter.getCores(); CoreContainer cores = solrFilter.getCores();
if (cores != null) { if (cores != null) {
cores.waitForLoadingCoresToFinish(timeoutMs); cores.waitForLoadingCoresToFinish(timeoutMs);
} else {
throw new IllegalStateException("The CoreContainer is not set!");
} }
} else {
throw new IllegalStateException("The dispatchFilter is not set!");
} }
} }
@ -583,4 +765,8 @@ public class JettySolrRunner {
this.delayValue = delay; this.delayValue = delay;
} }
} }
public SocketProxy getProxy() {
return proxy;
}
} }

View File

@ -73,6 +73,7 @@ public abstract class ElectionContext implements Closeable {
public ElectionContext(final String coreNodeName, public ElectionContext(final String coreNodeName,
final String electionPath, final String leaderPath, final ZkNodeProps leaderProps, final SolrZkClient zkClient) { final String electionPath, final String leaderPath, final ZkNodeProps leaderProps, final SolrZkClient zkClient) {
assert zkClient != null;
this.id = coreNodeName; this.id = coreNodeName;
this.electionPath = electionPath; this.electionPath = electionPath;
this.leaderPath = leaderPath; this.leaderPath = leaderPath;
@ -116,6 +117,7 @@ class ShardLeaderElectionContextBase extends ElectionContext {
protected String collection; protected String collection;
protected LeaderElector leaderElector; protected LeaderElector leaderElector;
protected ZkStateReader zkStateReader; protected ZkStateReader zkStateReader;
protected ZkController zkController;
private Integer leaderZkNodeParentVersion; private Integer leaderZkNodeParentVersion;
// Prevents a race between cancelling and becoming leader. // Prevents a race between cancelling and becoming leader.
@ -123,15 +125,29 @@ class ShardLeaderElectionContextBase extends ElectionContext {
public ShardLeaderElectionContextBase(LeaderElector leaderElector, public ShardLeaderElectionContextBase(LeaderElector leaderElector,
final String shardId, final String collection, final String coreNodeName, final String shardId, final String collection, final String coreNodeName,
ZkNodeProps props, ZkStateReader zkStateReader) { ZkNodeProps props, ZkController zkController) {
super(coreNodeName, ZkStateReader.COLLECTIONS_ZKNODE + "/" + collection super(coreNodeName, ZkStateReader.COLLECTIONS_ZKNODE + "/" + collection
+ "/leader_elect/" + shardId, ZkStateReader.getShardLeadersPath( + "/leader_elect/" + shardId, ZkStateReader.getShardLeadersPath(
collection, shardId), props, zkStateReader.getZkClient()); collection, shardId), props, zkController.getZkClient());
this.leaderElector = leaderElector; this.leaderElector = leaderElector;
this.zkStateReader = zkController.getZkStateReader();
this.zkClient = zkStateReader.getZkClient(); this.zkClient = zkStateReader.getZkClient();
this.zkStateReader = zkStateReader; this.zkController = zkController;
this.shardId = shardId; this.shardId = shardId;
this.collection = collection; this.collection = collection;
String parent = new Path(leaderPath).getParent().toString();
ZkCmdExecutor zcmd = new ZkCmdExecutor(30000);
// only if /collections/{collection} exists already do we succeed in creating this path
log.info("make sure parent is created {}", parent);
try {
zcmd.ensureExists(parent, (byte[])null, CreateMode.PERSISTENT, zkClient, 2);
} catch (KeeperException e) {
throw new RuntimeException(e);
} catch (InterruptedException e) {
Thread.currentThread().interrupt();
throw new RuntimeException(e);
}
} }
@Override @Override
@ -172,20 +188,11 @@ class ShardLeaderElectionContextBase extends ElectionContext {
throws KeeperException, InterruptedException, IOException { throws KeeperException, InterruptedException, IOException {
// register as leader - if an ephemeral is already there, wait to see if it goes away // register as leader - if an ephemeral is already there, wait to see if it goes away
if (!zkClient.exists(ZkStateReader.COLLECTIONS_ZKNODE + "/" + collection, true)) {
log.info("Will not register as leader because collection appears to be gone.");
return;
}
String parent = new Path(leaderPath).getParent().toString(); String parent = new Path(leaderPath).getParent().toString();
ZkCmdExecutor zcmd = new ZkCmdExecutor(30000);
// only if /collections/{collection} exists already do we succeed in creating this path
zcmd.ensureExists(parent, (byte[])null, CreateMode.PERSISTENT, zkClient, 2);
try { try {
RetryUtil.retryOnThrowable(NodeExistsException.class, 60000, 5000, () -> { RetryUtil.retryOnThrowable(NodeExistsException.class, 60000, 5000, () -> {
synchronized (lock) { synchronized (lock) {
log.debug("Creating leader registration node {} after winning as {}", leaderPath, leaderSeqPath); log.info("Creating leader registration node {} after winning as {}", leaderPath, leaderSeqPath);
List<Op> ops = new ArrayList<>(2); List<Op> ops = new ArrayList<>(2);
// We use a multi operation to get the parent nodes version, which will // We use a multi operation to get the parent nodes version, which will
@ -210,6 +217,9 @@ class ShardLeaderElectionContextBase extends ElectionContext {
assert leaderZkNodeParentVersion != null; assert leaderZkNodeParentVersion != null;
} }
}); });
} catch (NoNodeException e) {
log.info("Will not register as leader because it seems the election is no longer taking place.");
return;
} catch (Throwable t) { } catch (Throwable t) {
if (t instanceof OutOfMemoryError) { if (t instanceof OutOfMemoryError) {
throw (OutOfMemoryError) t; throw (OutOfMemoryError) t;
@ -235,7 +245,9 @@ class ShardLeaderElectionContextBase extends ElectionContext {
ZkStateReader.BASE_URL_PROP, leaderProps.get(ZkStateReader.BASE_URL_PROP), ZkStateReader.BASE_URL_PROP, leaderProps.get(ZkStateReader.BASE_URL_PROP),
ZkStateReader.CORE_NAME_PROP, leaderProps.get(ZkStateReader.CORE_NAME_PROP), ZkStateReader.CORE_NAME_PROP, leaderProps.get(ZkStateReader.CORE_NAME_PROP),
ZkStateReader.STATE_PROP, Replica.State.ACTIVE.toString()); ZkStateReader.STATE_PROP, Replica.State.ACTIVE.toString());
Overseer.getStateUpdateQueue(zkClient).offer(Utils.toJSON(m)); assert zkController != null;
assert zkController.getOverseer() != null;
zkController.getOverseer().offerStateUpdate(Utils.toJSON(m));
} }
} }
@ -254,7 +266,6 @@ class ShardLeaderElectionContextBase extends ElectionContext {
final class ShardLeaderElectionContext extends ShardLeaderElectionContextBase { final class ShardLeaderElectionContext extends ShardLeaderElectionContextBase {
private static final Logger log = LoggerFactory.getLogger(MethodHandles.lookup().lookupClass()); private static final Logger log = LoggerFactory.getLogger(MethodHandles.lookup().lookupClass());
private final ZkController zkController;
private final CoreContainer cc; private final CoreContainer cc;
private final SyncStrategy syncStrategy; private final SyncStrategy syncStrategy;
@ -264,8 +275,7 @@ final class ShardLeaderElectionContext extends ShardLeaderElectionContextBase {
final String shardId, final String collection, final String shardId, final String collection,
final String coreNodeName, ZkNodeProps props, ZkController zkController, CoreContainer cc) { final String coreNodeName, ZkNodeProps props, ZkController zkController, CoreContainer cc) {
super(leaderElector, shardId, collection, coreNodeName, props, super(leaderElector, shardId, collection, coreNodeName, props,
zkController.getZkStateReader()); zkController);
this.zkController = zkController;
this.cc = cc; this.cc = cc;
syncStrategy = new SyncStrategy(cc); syncStrategy = new SyncStrategy(cc);
} }
@ -304,11 +314,8 @@ final class ShardLeaderElectionContext extends ShardLeaderElectionContextBase {
ActionThrottle lt; ActionThrottle lt;
try (SolrCore core = cc.getCore(coreName)) { try (SolrCore core = cc.getCore(coreName)) {
if (core == null ) { if (core == null ) {
if (cc.isShutDown()) { // shutdown or removed
return; return;
} else {
throw new SolrException(ErrorCode.SERVER_ERROR, "SolrCore not found:" + coreName + " in " + cc.getLoadedCoreNames());
}
} }
MDCLoggingContext.setCore(core); MDCLoggingContext.setCore(core);
lt = core.getUpdateHandler().getSolrCoreState().getLeaderThrottle(); lt = core.getUpdateHandler().getSolrCoreState().getLeaderThrottle();
@ -326,7 +333,7 @@ final class ShardLeaderElectionContext extends ShardLeaderElectionContextBase {
// Clear the leader in clusterstate. We only need to worry about this if there is actually more than one replica. // Clear the leader in clusterstate. We only need to worry about this if there is actually more than one replica.
ZkNodeProps m = new ZkNodeProps(Overseer.QUEUE_OPERATION, OverseerAction.LEADER.toLower(), ZkNodeProps m = new ZkNodeProps(Overseer.QUEUE_OPERATION, OverseerAction.LEADER.toLower(),
ZkStateReader.SHARD_ID_PROP, shardId, ZkStateReader.COLLECTION_PROP, collection); ZkStateReader.SHARD_ID_PROP, shardId, ZkStateReader.COLLECTION_PROP, collection);
Overseer.getStateUpdateQueue(zkClient).offer(Utils.toJSON(m)); zkController.getOverseer().getStateUpdateQueue().offer(Utils.toJSON(m));
} }
boolean allReplicasInLine = false; boolean allReplicasInLine = false;
@ -349,14 +356,8 @@ final class ShardLeaderElectionContext extends ShardLeaderElectionContextBase {
try (SolrCore core = cc.getCore(coreName)) { try (SolrCore core = cc.getCore(coreName)) {
if (core == null) { if (core == null) {
if (!zkController.getCoreContainer().isShutDown()) {
cancelElection();
throw new SolrException(ErrorCode.SERVER_ERROR,
"SolrCore not found:" + coreName + " in " + cc.getLoadedCoreNames());
} else {
return; return;
} }
}
replicaType = core.getCoreDescriptor().getCloudDescriptor().getReplicaType(); replicaType = core.getCoreDescriptor().getCloudDescriptor().getReplicaType();
coreNodeName = core.getCoreDescriptor().getCloudDescriptor().getCoreNodeName(); coreNodeName = core.getCoreDescriptor().getCloudDescriptor().getCoreNodeName();
@ -698,7 +699,8 @@ final class ShardLeaderElectionContext extends ShardLeaderElectionContextBase {
final class OverseerElectionContext extends ElectionContext { final class OverseerElectionContext extends ElectionContext {
private static final Logger log = LoggerFactory.getLogger(MethodHandles.lookup().lookupClass()); private static final Logger log = LoggerFactory.getLogger(MethodHandles.lookup().lookupClass());
private final SolrZkClient zkClient; private final SolrZkClient zkClient;
private Overseer overseer; private final Overseer overseer;
private volatile boolean isClosed = false;
public OverseerElectionContext(SolrZkClient zkClient, Overseer overseer, final String zkNodeName) { public OverseerElectionContext(SolrZkClient zkClient, Overseer overseer, final String zkNodeName) {
super(zkNodeName, Overseer.OVERSEER_ELECT, Overseer.OVERSEER_ELECT + "/leader", null, zkClient); super(zkNodeName, Overseer.OVERSEER_ELECT, Overseer.OVERSEER_ELECT + "/leader", null, zkClient);
@ -732,10 +734,12 @@ final class OverseerElectionContext extends ElectionContext {
log.warn("Wait interrupted ", e); log.warn("Wait interrupted ", e);
} }
} }
if (!overseer.getZkController().isClosed() && !overseer.getZkController().getCoreContainer().isShutDown()) { synchronized (this) {
if (!this.isClosed && !overseer.getZkController().getCoreContainer().isShutDown()) {
overseer.start(id); overseer.start(id);
} }
} }
}
@Override @Override
public void cancelElection() throws InterruptedException, KeeperException { public void cancelElection() throws InterruptedException, KeeperException {
@ -744,7 +748,8 @@ final class OverseerElectionContext extends ElectionContext {
} }
@Override @Override
public void close() { public synchronized void close() {
this.isClosed = true;
overseer.close(); overseer.close();
} }

View File

@ -26,6 +26,7 @@ import java.util.regex.Matcher;
import java.util.regex.Pattern; import java.util.regex.Pattern;
import org.apache.solr.cloud.ZkController.ContextKey; import org.apache.solr.cloud.ZkController.ContextKey;
import org.apache.solr.common.AlreadyClosedException;
import org.apache.solr.common.SolrException; import org.apache.solr.common.SolrException;
import org.apache.solr.common.cloud.SolrZkClient; import org.apache.solr.common.cloud.SolrZkClient;
import org.apache.solr.common.cloud.ZkCmdExecutor; import org.apache.solr.common.cloud.ZkCmdExecutor;
@ -346,6 +347,8 @@ public class LeaderElector {
try { try {
// am I the next leader? // am I the next leader?
checkIfIamLeader(context, true); checkIfIamLeader(context, true);
} catch (AlreadyClosedException e) {
} catch (Exception e) { } catch (Exception e) {
if (!zkClient.isClosed()) { if (!zkClient.isClosed()) {
log.warn("", e); log.warn("", e);

View File

@ -16,6 +16,8 @@
*/ */
package org.apache.solr.cloud; package org.apache.solr.cloud;
import static org.apache.solr.common.params.CommonParams.ID;
import java.io.Closeable; import java.io.Closeable;
import java.io.IOException; import java.io.IOException;
import java.lang.invoke.MethodHandles; import java.lang.invoke.MethodHandles;
@ -26,7 +28,6 @@ import java.util.List;
import java.util.Map; import java.util.Map;
import java.util.Set; import java.util.Set;
import com.codahale.metrics.Timer;
import org.apache.solr.client.solrj.cloud.SolrCloudManager; import org.apache.solr.client.solrj.cloud.SolrCloudManager;
import org.apache.solr.client.solrj.impl.ClusterStateProvider; import org.apache.solr.client.solrj.impl.ClusterStateProvider;
import org.apache.solr.cloud.api.collections.OverseerCollectionMessageHandler; import org.apache.solr.cloud.api.collections.OverseerCollectionMessageHandler;
@ -39,9 +40,11 @@ import org.apache.solr.cloud.overseer.ReplicaMutator;
import org.apache.solr.cloud.overseer.SliceMutator; import org.apache.solr.cloud.overseer.SliceMutator;
import org.apache.solr.cloud.overseer.ZkStateWriter; import org.apache.solr.cloud.overseer.ZkStateWriter;
import org.apache.solr.cloud.overseer.ZkWriteCommand; import org.apache.solr.cloud.overseer.ZkWriteCommand;
import org.apache.solr.common.AlreadyClosedException;
import org.apache.solr.common.SolrCloseable; import org.apache.solr.common.SolrCloseable;
import org.apache.solr.common.SolrException; import org.apache.solr.common.SolrException;
import org.apache.solr.common.cloud.ClusterState; import org.apache.solr.common.cloud.ClusterState;
import org.apache.solr.common.cloud.ConnectionManager;
import org.apache.solr.common.cloud.SolrZkClient; import org.apache.solr.common.cloud.SolrZkClient;
import org.apache.solr.common.cloud.ZkNodeProps; import org.apache.solr.common.cloud.ZkNodeProps;
import org.apache.solr.common.cloud.ZkStateReader; import org.apache.solr.common.cloud.ZkStateReader;
@ -53,7 +56,7 @@ import org.apache.solr.common.util.Utils;
import org.apache.solr.core.CloudConfig; import org.apache.solr.core.CloudConfig;
import org.apache.solr.core.CoreContainer; import org.apache.solr.core.CoreContainer;
import org.apache.solr.handler.admin.CollectionsHandler; import org.apache.solr.handler.admin.CollectionsHandler;
import org.apache.solr.handler.component.ShardHandler; import org.apache.solr.handler.component.HttpShardHandler;
import org.apache.solr.logging.MDCLoggingContext; import org.apache.solr.logging.MDCLoggingContext;
import org.apache.solr.update.UpdateShardHandler; import org.apache.solr.update.UpdateShardHandler;
import org.apache.zookeeper.CreateMode; import org.apache.zookeeper.CreateMode;
@ -61,7 +64,7 @@ import org.apache.zookeeper.KeeperException;
import org.slf4j.Logger; import org.slf4j.Logger;
import org.slf4j.LoggerFactory; import org.slf4j.LoggerFactory;
import static org.apache.solr.common.params.CommonParams.ID; import com.codahale.metrics.Timer;
/** /**
* Cluster leader. Responsible for processing state updates, node assignments, creating/deleting * Cluster leader. Responsible for processing state updates, node assignments, creating/deleting
@ -107,7 +110,7 @@ public class Overseer implements SolrCloseable {
public ClusterStateUpdater(final ZkStateReader reader, final String myId, Stats zkStats) { public ClusterStateUpdater(final ZkStateReader reader, final String myId, Stats zkStats) {
this.zkClient = reader.getZkClient(); this.zkClient = reader.getZkClient();
this.zkStats = zkStats; this.zkStats = zkStats;
this.stateUpdateQueue = getStateUpdateQueue(zkClient, zkStats); this.stateUpdateQueue = getStateUpdateQueue(zkStats);
this.workQueue = getInternalWorkQueue(zkClient, zkStats); this.workQueue = getInternalWorkQueue(zkClient, zkStats);
this.failureMap = getFailureMap(zkClient); this.failureMap = getFailureMap(zkClient);
this.runningMap = getRunningMap(zkClient); this.runningMap = getRunningMap(zkClient);
@ -188,6 +191,8 @@ public class Overseer implements SolrCloseable {
// the workQueue is empty now, use stateUpdateQueue as fallback queue // the workQueue is empty now, use stateUpdateQueue as fallback queue
fallbackQueue = stateUpdateQueue; fallbackQueue = stateUpdateQueue;
fallbackQueueSize = 0; fallbackQueueSize = 0;
} catch (AlreadyClosedException e) {
return;
} catch (KeeperException.SessionExpiredException e) { } catch (KeeperException.SessionExpiredException e) {
log.warn("Solr cannot talk to ZK, exiting Overseer work queue loop", e); log.warn("Solr cannot talk to ZK, exiting Overseer work queue loop", e);
return; return;
@ -211,6 +216,8 @@ public class Overseer implements SolrCloseable {
} catch (InterruptedException e) { } catch (InterruptedException e) {
Thread.currentThread().interrupt(); Thread.currentThread().interrupt();
return; return;
} catch (AlreadyClosedException e) {
} catch (Exception e) { } catch (Exception e) {
log.error("Exception in Overseer main queue loop", e); log.error("Exception in Overseer main queue loop", e);
} }
@ -247,6 +254,8 @@ public class Overseer implements SolrCloseable {
} catch (InterruptedException e) { } catch (InterruptedException e) {
Thread.currentThread().interrupt(); Thread.currentThread().interrupt();
return; return;
} catch (AlreadyClosedException e) {
} catch (Exception e) { } catch (Exception e) {
log.error("Exception in Overseer main queue loop", e); log.error("Exception in Overseer main queue loop", e);
refreshClusterState = true; // it might have been a bad version error refreshClusterState = true; // it might have been a bad version error
@ -308,8 +317,10 @@ public class Overseer implements SolrCloseable {
byte[] data; byte[] data;
try { try {
data = zkClient.getData(path, null, stat, true); data = zkClient.getData(path, null, stat, true);
} catch (AlreadyClosedException e) {
return;
} catch (Exception e) { } catch (Exception e) {
log.error("could not read the "+path+" data" ,e); log.warn("Error communicating with ZooKeeper", e);
return; return;
} }
try { try {
@ -437,6 +448,11 @@ public class Overseer implements SolrCloseable {
} catch (InterruptedException e) { } catch (InterruptedException e) {
success = false; success = false;
Thread.currentThread().interrupt(); Thread.currentThread().interrupt();
} catch (AlreadyClosedException e) {
success = false;
} catch (Exception e) {
success = false;
log.warn("Unexpected exception", e);
} finally { } finally {
timerContext.stop(); timerContext.stop();
if (success) { if (success) {
@ -495,7 +511,7 @@ public class Overseer implements SolrCloseable {
private final ZkStateReader reader; private final ZkStateReader reader;
private final ShardHandler shardHandler; private final HttpShardHandler shardHandler;
private final UpdateShardHandler updateShardHandler; private final UpdateShardHandler updateShardHandler;
@ -507,11 +523,11 @@ public class Overseer implements SolrCloseable {
private Stats stats; private Stats stats;
private String id; private String id;
private boolean closed; private volatile boolean closed;
private CloudConfig config; private CloudConfig config;
// overseer not responsible for closing reader // overseer not responsible for closing reader
public Overseer(ShardHandler shardHandler, public Overseer(HttpShardHandler shardHandler,
UpdateShardHandler updateShardHandler, String adminPath, UpdateShardHandler updateShardHandler, String adminPath,
final ZkStateReader reader, ZkController zkController, CloudConfig config) final ZkStateReader reader, ZkController zkController, CloudConfig config)
throws KeeperException, InterruptedException { throws KeeperException, InterruptedException {
@ -541,7 +557,7 @@ public class Overseer implements SolrCloseable {
ThreadGroup ccTg = new ThreadGroup("Overseer collection creation process."); ThreadGroup ccTg = new ThreadGroup("Overseer collection creation process.");
OverseerNodePrioritizer overseerPrioritizer = new OverseerNodePrioritizer(reader, adminPath, shardHandler.getShardHandlerFactory()); OverseerNodePrioritizer overseerPrioritizer = new OverseerNodePrioritizer(reader, getStateUpdateQueue(), adminPath, shardHandler.getShardHandlerFactory(), updateShardHandler.getDefaultHttpClient());
overseerCollectionConfigSetProcessor = new OverseerCollectionConfigSetProcessor(reader, id, shardHandler, adminPath, stats, Overseer.this, overseerPrioritizer); overseerCollectionConfigSetProcessor = new OverseerCollectionConfigSetProcessor(reader, id, shardHandler, adminPath, stats, Overseer.this, overseerPrioritizer);
ccThread = new OverseerThread(ccTg, overseerCollectionConfigSetProcessor, "OverseerCollectionConfigSetProcessor-" + id); ccThread = new OverseerThread(ccTg, overseerCollectionConfigSetProcessor, "OverseerCollectionConfigSetProcessor-" + id);
ccThread.setDaemon(true); ccThread.setDaemon(true);
@ -554,10 +570,9 @@ public class Overseer implements SolrCloseable {
updaterThread.start(); updaterThread.start();
ccThread.start(); ccThread.start();
triggerThread.start(); triggerThread.start();
if (this.id != null) {
assert ObjectReleaseTracker.track(this); assert ObjectReleaseTracker.track(this);
} }
}
public Stats getStats() { public Stats getStats() {
return stats; return stats;
@ -595,17 +610,14 @@ public class Overseer implements SolrCloseable {
} }
public synchronized void close() { public synchronized void close() {
if (closed) return;
if (this.id != null) { if (this.id != null) {
log.info("Overseer (id=" + id + ") closing"); log.info("Overseer (id=" + id + ") closing");
} }
doClose();
this.closed = true; this.closed = true;
if (this.id != null) { doClose();
assert ObjectReleaseTracker.release(this); assert ObjectReleaseTracker.release(this);
} }
}
@Override @Override
public boolean isClosed() { public boolean isClosed() {
@ -660,11 +672,10 @@ public class Overseer implements SolrCloseable {
* <p> * <p>
* This method will create the /overseer znode in ZooKeeper if it does not exist already. * This method will create the /overseer znode in ZooKeeper if it does not exist already.
* *
* @param zkClient the {@link SolrZkClient} to be used for reading/writing to the queue
* @return a {@link ZkDistributedQueue} object * @return a {@link ZkDistributedQueue} object
*/ */
public static ZkDistributedQueue getStateUpdateQueue(final SolrZkClient zkClient) { ZkDistributedQueue getStateUpdateQueue() {
return getStateUpdateQueue(zkClient, new Stats()); return getStateUpdateQueue(new Stats());
} }
/** /**
@ -672,13 +683,15 @@ public class Overseer implements SolrCloseable {
* This method should not be used directly by anyone other than the Overseer itself. * This method should not be used directly by anyone other than the Overseer itself.
* This method will create the /overseer znode in ZooKeeper if it does not exist already. * This method will create the /overseer znode in ZooKeeper if it does not exist already.
* *
* @param zkClient the {@link SolrZkClient} to be used for reading/writing to the queue
* @param zkStats a {@link Stats} object which tracks statistics for all zookeeper operations performed by this queue * @param zkStats a {@link Stats} object which tracks statistics for all zookeeper operations performed by this queue
* @return a {@link ZkDistributedQueue} object * @return a {@link ZkDistributedQueue} object
*/ */
static ZkDistributedQueue getStateUpdateQueue(final SolrZkClient zkClient, Stats zkStats) { ZkDistributedQueue getStateUpdateQueue(Stats zkStats) {
createOverseerNode(zkClient); return new ZkDistributedQueue(reader.getZkClient(), "/overseer/queue", zkStats, STATE_UPDATE_MAX_QUEUE, new ConnectionManager.IsClosed(){
return new ZkDistributedQueue(zkClient, "/overseer/queue", zkStats, STATE_UPDATE_MAX_QUEUE); public boolean isClosed() {
return Overseer.this.isClosed() || zkController.getCoreContainer().isShutDown();
}
});
} }
/** /**
@ -697,31 +710,26 @@ public class Overseer implements SolrCloseable {
* @return a {@link ZkDistributedQueue} object * @return a {@link ZkDistributedQueue} object
*/ */
static ZkDistributedQueue getInternalWorkQueue(final SolrZkClient zkClient, Stats zkStats) { static ZkDistributedQueue getInternalWorkQueue(final SolrZkClient zkClient, Stats zkStats) {
createOverseerNode(zkClient);
return new ZkDistributedQueue(zkClient, "/overseer/queue-work", zkStats); return new ZkDistributedQueue(zkClient, "/overseer/queue-work", zkStats);
} }
/* Internal map for failed tasks, not to be used outside of the Overseer */ /* Internal map for failed tasks, not to be used outside of the Overseer */
static DistributedMap getRunningMap(final SolrZkClient zkClient) { static DistributedMap getRunningMap(final SolrZkClient zkClient) {
createOverseerNode(zkClient);
return new DistributedMap(zkClient, "/overseer/collection-map-running"); return new DistributedMap(zkClient, "/overseer/collection-map-running");
} }
/* Size-limited map for successfully completed tasks*/ /* Size-limited map for successfully completed tasks*/
static DistributedMap getCompletedMap(final SolrZkClient zkClient) { static DistributedMap getCompletedMap(final SolrZkClient zkClient) {
createOverseerNode(zkClient);
return new SizeLimitedDistributedMap(zkClient, "/overseer/collection-map-completed", NUM_RESPONSES_TO_STORE, (child) -> getAsyncIdsMap(zkClient).remove(child)); return new SizeLimitedDistributedMap(zkClient, "/overseer/collection-map-completed", NUM_RESPONSES_TO_STORE, (child) -> getAsyncIdsMap(zkClient).remove(child));
} }
/* Map for failed tasks, not to be used outside of the Overseer */ /* Map for failed tasks, not to be used outside of the Overseer */
static DistributedMap getFailureMap(final SolrZkClient zkClient) { static DistributedMap getFailureMap(final SolrZkClient zkClient) {
createOverseerNode(zkClient);
return new SizeLimitedDistributedMap(zkClient, "/overseer/collection-map-failure", NUM_RESPONSES_TO_STORE, (child) -> getAsyncIdsMap(zkClient).remove(child)); return new SizeLimitedDistributedMap(zkClient, "/overseer/collection-map-failure", NUM_RESPONSES_TO_STORE, (child) -> getAsyncIdsMap(zkClient).remove(child));
} }
/* Map of async IDs currently in use*/ /* Map of async IDs currently in use*/
static DistributedMap getAsyncIdsMap(final SolrZkClient zkClient) { static DistributedMap getAsyncIdsMap(final SolrZkClient zkClient) {
createOverseerNode(zkClient);
return new DistributedMap(zkClient, "/overseer/async_ids"); return new DistributedMap(zkClient, "/overseer/async_ids");
} }
@ -740,7 +748,7 @@ public class Overseer implements SolrCloseable {
* @param zkClient the {@link SolrZkClient} to be used for reading/writing to the queue * @param zkClient the {@link SolrZkClient} to be used for reading/writing to the queue
* @return a {@link ZkDistributedQueue} object * @return a {@link ZkDistributedQueue} object
*/ */
static OverseerTaskQueue getCollectionQueue(final SolrZkClient zkClient) { OverseerTaskQueue getCollectionQueue(final SolrZkClient zkClient) {
return getCollectionQueue(zkClient, new Stats()); return getCollectionQueue(zkClient, new Stats());
} }
@ -758,8 +766,7 @@ public class Overseer implements SolrCloseable {
* @param zkClient the {@link SolrZkClient} to be used for reading/writing to the queue * @param zkClient the {@link SolrZkClient} to be used for reading/writing to the queue
* @return a {@link ZkDistributedQueue} object * @return a {@link ZkDistributedQueue} object
*/ */
static OverseerTaskQueue getCollectionQueue(final SolrZkClient zkClient, Stats zkStats) { OverseerTaskQueue getCollectionQueue(final SolrZkClient zkClient, Stats zkStats) {
createOverseerNode(zkClient);
return new OverseerTaskQueue(zkClient, "/overseer/collection-queue-work", zkStats); return new OverseerTaskQueue(zkClient, "/overseer/collection-queue-work", zkStats);
} }
@ -778,7 +785,7 @@ public class Overseer implements SolrCloseable {
* @param zkClient the {@link SolrZkClient} to be used for reading/writing to the queue * @param zkClient the {@link SolrZkClient} to be used for reading/writing to the queue
* @return a {@link ZkDistributedQueue} object * @return a {@link ZkDistributedQueue} object
*/ */
static OverseerTaskQueue getConfigSetQueue(final SolrZkClient zkClient) { OverseerTaskQueue getConfigSetQueue(final SolrZkClient zkClient) {
return getConfigSetQueue(zkClient, new Stats()); return getConfigSetQueue(zkClient, new Stats());
} }
@ -801,15 +808,14 @@ public class Overseer implements SolrCloseable {
* @param zkClient the {@link SolrZkClient} to be used for reading/writing to the queue * @param zkClient the {@link SolrZkClient} to be used for reading/writing to the queue
* @return a {@link ZkDistributedQueue} object * @return a {@link ZkDistributedQueue} object
*/ */
static OverseerTaskQueue getConfigSetQueue(final SolrZkClient zkClient, Stats zkStats) { OverseerTaskQueue getConfigSetQueue(final SolrZkClient zkClient, Stats zkStats) {
// For now, we use the same queue as the collection queue, but ensure // For now, we use the same queue as the collection queue, but ensure
// that the actions are prefixed with a unique string. // that the actions are prefixed with a unique string.
createOverseerNode(zkClient);
return getCollectionQueue(zkClient, zkStats); return getCollectionQueue(zkClient, zkStats);
} }
private static void createOverseerNode(final SolrZkClient zkClient) { private void createOverseerNode(final SolrZkClient zkClient) {
try { try {
zkClient.create("/overseer", new byte[0], CreateMode.PERSISTENT, true); zkClient.create("/overseer", new byte[0], CreateMode.PERSISTENT, true);
} catch (KeeperException.NodeExistsException e) { } catch (KeeperException.NodeExistsException e) {
@ -823,6 +829,7 @@ public class Overseer implements SolrCloseable {
throw new RuntimeException(e); throw new RuntimeException(e);
} }
} }
public static boolean isLegacy(ZkStateReader stateReader) { public static boolean isLegacy(ZkStateReader stateReader) {
String legacyProperty = stateReader.getClusterProperty(ZkStateReader.LEGACY_CLOUD, "false"); String legacyProperty = stateReader.getClusterProperty(ZkStateReader.LEGACY_CLOUD, "false");
return "true".equals(legacyProperty); return "true".equals(legacyProperty);
@ -837,4 +844,11 @@ public class Overseer implements SolrCloseable {
return reader; return reader;
} }
public void offerStateUpdate(byte[] data) throws KeeperException, InterruptedException {
if (zkController.getZkClient().isClosed()) {
throw new AlreadyClosedException();
}
getStateUpdateQueue().offer(data);
}
} }

View File

@ -16,16 +16,16 @@
*/ */
package org.apache.solr.cloud; package org.apache.solr.cloud;
import static org.apache.solr.cloud.OverseerConfigSetMessageHandler.CONFIGSETS_ACTION_PREFIX;
import java.io.IOException; import java.io.IOException;
import org.apache.commons.io.IOUtils; import org.apache.commons.io.IOUtils;
import org.apache.solr.cloud.api.collections.OverseerCollectionMessageHandler; import org.apache.solr.cloud.api.collections.OverseerCollectionMessageHandler;
import org.apache.solr.common.cloud.ZkNodeProps; import org.apache.solr.common.cloud.ZkNodeProps;
import org.apache.solr.common.cloud.ZkStateReader; import org.apache.solr.common.cloud.ZkStateReader;
import org.apache.solr.handler.component.ShardHandler; import org.apache.solr.handler.component.HttpShardHandler;
import org.apache.solr.handler.component.ShardHandlerFactory; import org.apache.solr.handler.component.HttpShardHandlerFactory;
import static org.apache.solr.cloud.OverseerConfigSetMessageHandler.CONFIGSETS_ACTION_PREFIX;
/** /**
* An {@link OverseerTaskProcessor} that handles: * An {@link OverseerTaskProcessor} that handles:
@ -35,18 +35,18 @@ import static org.apache.solr.cloud.OverseerConfigSetMessageHandler.CONFIGSETS_A
public class OverseerCollectionConfigSetProcessor extends OverseerTaskProcessor { public class OverseerCollectionConfigSetProcessor extends OverseerTaskProcessor {
public OverseerCollectionConfigSetProcessor(ZkStateReader zkStateReader, String myId, public OverseerCollectionConfigSetProcessor(ZkStateReader zkStateReader, String myId,
final ShardHandler shardHandler, final HttpShardHandler shardHandler,
String adminPath, Stats stats, Overseer overseer, String adminPath, Stats stats, Overseer overseer,
OverseerNodePrioritizer overseerNodePrioritizer) { OverseerNodePrioritizer overseerNodePrioritizer) {
this( this(
zkStateReader, zkStateReader,
myId, myId,
shardHandler.getShardHandlerFactory(), (HttpShardHandlerFactory) shardHandler.getShardHandlerFactory(),
adminPath, adminPath,
stats, stats,
overseer, overseer,
overseerNodePrioritizer, overseerNodePrioritizer,
Overseer.getCollectionQueue(zkStateReader.getZkClient(), stats), overseer.getCollectionQueue(zkStateReader.getZkClient(), stats),
Overseer.getRunningMap(zkStateReader.getZkClient()), Overseer.getRunningMap(zkStateReader.getZkClient()),
Overseer.getCompletedMap(zkStateReader.getZkClient()), Overseer.getCompletedMap(zkStateReader.getZkClient()),
Overseer.getFailureMap(zkStateReader.getZkClient()) Overseer.getFailureMap(zkStateReader.getZkClient())
@ -54,7 +54,7 @@ public class OverseerCollectionConfigSetProcessor extends OverseerTaskProcessor
} }
protected OverseerCollectionConfigSetProcessor(ZkStateReader zkStateReader, String myId, protected OverseerCollectionConfigSetProcessor(ZkStateReader zkStateReader, String myId,
final ShardHandlerFactory shardHandlerFactory, final HttpShardHandlerFactory shardHandlerFactory,
String adminPath, String adminPath,
Stats stats, Stats stats,
Overseer overseer, Overseer overseer,
@ -79,7 +79,7 @@ public class OverseerCollectionConfigSetProcessor extends OverseerTaskProcessor
private static OverseerMessageHandlerSelector getOverseerMessageHandlerSelector( private static OverseerMessageHandlerSelector getOverseerMessageHandlerSelector(
ZkStateReader zkStateReader, ZkStateReader zkStateReader,
String myId, String myId,
final ShardHandlerFactory shardHandlerFactory, final HttpShardHandlerFactory shardHandlerFactory,
String adminPath, String adminPath,
Stats stats, Stats stats,
Overseer overseer, Overseer overseer,

View File

@ -20,6 +20,7 @@ import java.lang.invoke.MethodHandles;
import java.util.List; import java.util.List;
import java.util.Map; import java.util.Map;
import org.apache.http.client.HttpClient;
import org.apache.solr.cloud.overseer.OverseerAction; import org.apache.solr.cloud.overseer.OverseerAction;
import org.apache.solr.common.cloud.SolrZkClient; import org.apache.solr.common.cloud.SolrZkClient;
import org.apache.solr.common.cloud.ZkNodeProps; import org.apache.solr.common.cloud.ZkNodeProps;
@ -28,6 +29,7 @@ import org.apache.solr.common.params.CoreAdminParams;
import org.apache.solr.common.params.CoreAdminParams.CoreAdminAction; import org.apache.solr.common.params.CoreAdminParams.CoreAdminAction;
import org.apache.solr.common.params.ModifiableSolrParams; import org.apache.solr.common.params.ModifiableSolrParams;
import org.apache.solr.common.util.Utils; import org.apache.solr.common.util.Utils;
import org.apache.solr.handler.component.HttpShardHandlerFactory;
import org.apache.solr.handler.component.ShardHandler; import org.apache.solr.handler.component.ShardHandler;
import org.apache.solr.handler.component.ShardHandlerFactory; import org.apache.solr.handler.component.ShardHandlerFactory;
import org.apache.solr.handler.component.ShardRequest; import org.apache.solr.handler.component.ShardRequest;
@ -49,10 +51,16 @@ public class OverseerNodePrioritizer {
private final String adminPath; private final String adminPath;
private final ShardHandlerFactory shardHandlerFactory; private final ShardHandlerFactory shardHandlerFactory;
public OverseerNodePrioritizer(ZkStateReader zkStateReader, String adminPath, ShardHandlerFactory shardHandlerFactory) { private ZkDistributedQueue stateUpdateQueue;
private HttpClient httpClient;
public OverseerNodePrioritizer(ZkStateReader zkStateReader, ZkDistributedQueue stateUpdateQueue, String adminPath, ShardHandlerFactory shardHandlerFactory, HttpClient httpClient) {
this.zkStateReader = zkStateReader; this.zkStateReader = zkStateReader;
this.adminPath = adminPath; this.adminPath = adminPath;
this.shardHandlerFactory = shardHandlerFactory; this.shardHandlerFactory = shardHandlerFactory;
this.stateUpdateQueue = stateUpdateQueue;
this.httpClient = httpClient;
} }
public synchronized void prioritizeOverseerNodes(String overseerId) throws Exception { public synchronized void prioritizeOverseerNodes(String overseerId) throws Exception {
@ -88,7 +96,7 @@ public class OverseerNodePrioritizer {
invokeOverseerOp(electionNodes.get(1), "rejoin");//ask second inline to go behind invokeOverseerOp(electionNodes.get(1), "rejoin");//ask second inline to go behind
} }
//now ask the current leader to QUIT , so that the designate can takeover //now ask the current leader to QUIT , so that the designate can takeover
Overseer.getStateUpdateQueue(zkStateReader.getZkClient()).offer( stateUpdateQueue.offer(
Utils.toJSON(new ZkNodeProps(Overseer.QUEUE_OPERATION, OverseerAction.QUIT.toLower(), Utils.toJSON(new ZkNodeProps(Overseer.QUEUE_OPERATION, OverseerAction.QUIT.toLower(),
ID, OverseerTaskProcessor.getLeaderId(zkStateReader.getZkClient())))); ID, OverseerTaskProcessor.getLeaderId(zkStateReader.getZkClient()))));
@ -96,7 +104,7 @@ public class OverseerNodePrioritizer {
private void invokeOverseerOp(String electionNode, String op) { private void invokeOverseerOp(String electionNode, String op) {
ModifiableSolrParams params = new ModifiableSolrParams(); ModifiableSolrParams params = new ModifiableSolrParams();
ShardHandler shardHandler = shardHandlerFactory.getShardHandler(); ShardHandler shardHandler = ((HttpShardHandlerFactory)shardHandlerFactory).getShardHandler(httpClient);
params.set(CoreAdminParams.ACTION, CoreAdminAction.OVERSEEROP.toString()); params.set(CoreAdminParams.ACTION, CoreAdminAction.OVERSEEROP.toString());
params.set("op", op); params.set("op", op);
params.set("qt", adminPath); params.set("qt", adminPath);

View File

@ -19,6 +19,7 @@ package org.apache.solr.cloud;
import java.io.Closeable; import java.io.Closeable;
import java.lang.invoke.MethodHandles; import java.lang.invoke.MethodHandles;
import java.util.ArrayList; import java.util.ArrayList;
import java.util.Collections;
import java.util.HashMap; import java.util.HashMap;
import java.util.HashSet; import java.util.HashSet;
import java.util.LinkedHashMap; import java.util.LinkedHashMap;
@ -36,6 +37,7 @@ import org.apache.commons.io.IOUtils;
import org.apache.solr.client.solrj.SolrResponse; import org.apache.solr.client.solrj.SolrResponse;
import org.apache.solr.cloud.Overseer.LeaderStatus; import org.apache.solr.cloud.Overseer.LeaderStatus;
import org.apache.solr.cloud.OverseerTaskQueue.QueueEvent; import org.apache.solr.cloud.OverseerTaskQueue.QueueEvent;
import org.apache.solr.common.AlreadyClosedException;
import org.apache.solr.common.SolrException; import org.apache.solr.common.SolrException;
import org.apache.solr.common.cloud.SolrZkClient; import org.apache.solr.common.cloud.SolrZkClient;
import org.apache.solr.common.cloud.ZkNodeProps; import org.apache.solr.common.cloud.ZkNodeProps;
@ -86,13 +88,13 @@ public class OverseerTaskProcessor implements Runnable, Closeable {
// List of completed tasks. This is used to clean up workQueue in zk. // List of completed tasks. This is used to clean up workQueue in zk.
final private HashMap<String, QueueEvent> completedTasks; final private HashMap<String, QueueEvent> completedTasks;
private String myId; private volatile String myId;
private ZkStateReader zkStateReader; private volatile ZkStateReader zkStateReader;
private boolean isClosed; private boolean isClosed;
private Stats stats; private volatile Stats stats;
// Set of tasks that have been picked up for processing but not cleaned up from zk work-queue. // Set of tasks that have been picked up for processing but not cleaned up from zk work-queue.
// It may contain tasks that have completed execution, have been entered into the completed/failed map in zk but not // It may contain tasks that have completed execution, have been entered into the completed/failed map in zk but not
@ -102,7 +104,7 @@ public class OverseerTaskProcessor implements Runnable, Closeable {
// be executed because they are blocked or the execution queue is full // be executed because they are blocked or the execution queue is full
// This is an optimization to ensure that we do not read the same tasks // This is an optimization to ensure that we do not read the same tasks
// again and again from ZK. // again and again from ZK.
final private Map<String, QueueEvent> blockedTasks = new LinkedHashMap<>(); final private Map<String, QueueEvent> blockedTasks = Collections.synchronizedMap(new LinkedHashMap<>());
final private Predicate<String> excludedTasks = new Predicate<String>() { final private Predicate<String> excludedTasks = new Predicate<String>() {
@Override @Override
public boolean test(String s) { public boolean test(String s) {
@ -170,6 +172,8 @@ public class OverseerTaskProcessor implements Runnable, Closeable {
// We don't need to handle this. This is just a fail-safe which comes in handy in skipping already processed // We don't need to handle this. This is just a fail-safe which comes in handy in skipping already processed
// async calls. // async calls.
SolrException.log(log, "", e); SolrException.log(log, "", e);
} catch (AlreadyClosedException e) {
return;
} catch (InterruptedException e) { } catch (InterruptedException e) {
Thread.currentThread().interrupt(); Thread.currentThread().interrupt();
} }
@ -181,6 +185,8 @@ public class OverseerTaskProcessor implements Runnable, Closeable {
try { try {
prioritizer.prioritizeOverseerNodes(myId); prioritizer.prioritizeOverseerNodes(myId);
} catch (AlreadyClosedException e) {
return;
} catch (Exception e) { } catch (Exception e) {
if (!zkStateReader.getZkClient().isClosed()) { if (!zkStateReader.getZkClient().isClosed()) {
log.error("Unable to prioritize overseer ", e); log.error("Unable to prioritize overseer ", e);
@ -203,14 +209,14 @@ public class OverseerTaskProcessor implements Runnable, Closeable {
continue; // not a no, not a yes, try asking again continue; // not a no, not a yes, try asking again
} }
log.debug("Cleaning up work-queue. #Running tasks: {}", runningTasks.size()); log.debug("Cleaning up work-queue. #Running tasks: {} #Completed tasks: {}", runningTasksSize(), completedTasks.size());
cleanUpWorkQueue(); cleanUpWorkQueue();
printTrackingMaps(); printTrackingMaps();
boolean waited = false; boolean waited = false;
while (runningTasks.size() > MAX_PARALLEL_TASKS) { while (runningTasksSize() > MAX_PARALLEL_TASKS) {
synchronized (waitLock) { synchronized (waitLock) {
waitLock.wait(100);//wait for 100 ms or till a task is complete waitLock.wait(100);//wait for 100 ms or till a task is complete
} }
@ -229,7 +235,7 @@ public class OverseerTaskProcessor implements Runnable, Closeable {
// to clear out at least a few items in the queue before we read more items // to clear out at least a few items in the queue before we read more items
if (heads.size() < MAX_BLOCKED_TASKS) { if (heads.size() < MAX_BLOCKED_TASKS) {
//instead of reading MAX_PARALLEL_TASKS items always, we should only fetch as much as we can execute //instead of reading MAX_PARALLEL_TASKS items always, we should only fetch as much as we can execute
int toFetch = Math.min(MAX_BLOCKED_TASKS - heads.size(), MAX_PARALLEL_TASKS - runningTasks.size()); int toFetch = Math.min(MAX_BLOCKED_TASKS - heads.size(), MAX_PARALLEL_TASKS - runningTasksSize());
List<QueueEvent> newTasks = workQueue.peekTopN(toFetch, excludedTasks, 2000L); List<QueueEvent> newTasks = workQueue.peekTopN(toFetch, excludedTasks, 2000L);
log.debug("Got {} tasks from work-queue : [{}]", newTasks.size(), newTasks); log.debug("Got {} tasks from work-queue : [{}]", newTasks.size(), newTasks);
heads.addAll(newTasks); heads.addAll(newTasks);
@ -251,7 +257,7 @@ public class OverseerTaskProcessor implements Runnable, Closeable {
for (QueueEvent head : heads) { for (QueueEvent head : heads) {
if (!tooManyTasks) { if (!tooManyTasks) {
synchronized (runningTasks) { synchronized (runningTasks) {
tooManyTasks = runningTasks.size() >= MAX_PARALLEL_TASKS; tooManyTasks = runningTasksSize() >= MAX_PARALLEL_TASKS;
} }
} }
if (tooManyTasks) { if (tooManyTasks) {
@ -260,7 +266,9 @@ public class OverseerTaskProcessor implements Runnable, Closeable {
blockedTasks.put(head.getId(), head); blockedTasks.put(head.getId(), head);
continue; continue;
} }
synchronized (runningZKTasks) {
if (runningZKTasks.contains(head.getId())) continue; if (runningZKTasks.contains(head.getId())) continue;
}
final ZkNodeProps message = ZkNodeProps.load(head.getBytes()); final ZkNodeProps message = ZkNodeProps.load(head.getBytes());
final String asyncId = message.getStr(ASYNC); final String asyncId = message.getStr(ASYNC);
if (hasLeftOverItems) { if (hasLeftOverItems) {
@ -316,6 +324,8 @@ public class OverseerTaskProcessor implements Runnable, Closeable {
} catch (InterruptedException e) { } catch (InterruptedException e) {
Thread.currentThread().interrupt(); Thread.currentThread().interrupt();
return; return;
} catch (AlreadyClosedException e) {
} catch (Exception e) { } catch (Exception e) {
SolrException.log(log, "", e); SolrException.log(log, "", e);
} }
@ -325,12 +335,20 @@ public class OverseerTaskProcessor implements Runnable, Closeable {
} }
} }
private int runningTasksSize() {
synchronized (runningTasks) {
return runningTasks.size();
}
}
private void cleanUpWorkQueue() throws KeeperException, InterruptedException { private void cleanUpWorkQueue() throws KeeperException, InterruptedException {
synchronized (completedTasks) { synchronized (completedTasks) {
for (String id : completedTasks.keySet()) { for (String id : completedTasks.keySet()) {
workQueue.remove(completedTasks.get(id)); workQueue.remove(completedTasks.get(id));
synchronized (runningTasks) {
runningZKTasks.remove(id); runningZKTasks.remove(id);
} }
}
completedTasks.clear(); completedTasks.clear();
} }
} }
@ -502,6 +520,8 @@ public class OverseerTaskProcessor implements Runnable, Closeable {
log.debug(messageHandler.getName() + ": Message id:" + head.getId() + log.debug(messageHandler.getName() + ": Message id:" + head.getId() +
" complete, response:" + response.getResponse().toString()); " complete, response:" + response.getResponse().toString());
success = true; success = true;
} catch (AlreadyClosedException e) {
} catch (KeeperException e) { } catch (KeeperException e) {
SolrException.log(log, "", e); SolrException.log(log, "", e);
} catch (InterruptedException e) { } catch (InterruptedException e) {
@ -513,7 +533,11 @@ public class OverseerTaskProcessor implements Runnable, Closeable {
lock.unlock(); lock.unlock();
if (!success) { if (!success) {
// Reset task from tracking data structures so that it can be retried. // Reset task from tracking data structures so that it can be retried.
try {
resetTaskWithException(messageHandler, head.getId(), asyncId, taskKey, message); resetTaskWithException(messageHandler, head.getId(), asyncId, taskKey, message);
} catch(AlreadyClosedException e) {
}
} }
synchronized (waitLock){ synchronized (waitLock){
waitLock.notifyAll(); waitLock.notifyAll();
@ -587,7 +611,7 @@ public class OverseerTaskProcessor implements Runnable, Closeable {
log.debug("CompletedTasks: {}", completedTasks.keySet().toString()); log.debug("CompletedTasks: {}", completedTasks.keySet().toString());
} }
synchronized (runningZKTasks) { synchronized (runningZKTasks) {
log.debug("RunningZKTasks: {}", runningZKTasks.toString()); log.info("RunningZKTasks: {}", runningZKTasks.toString());
} }
} }
} }

View File

@ -63,7 +63,6 @@ import org.apache.solr.update.CommitUpdateCommand;
import org.apache.solr.update.PeerSyncWithLeader; import org.apache.solr.update.PeerSyncWithLeader;
import org.apache.solr.update.UpdateLog; import org.apache.solr.update.UpdateLog;
import org.apache.solr.update.UpdateLog.RecoveryInfo; import org.apache.solr.update.UpdateLog.RecoveryInfo;
import org.apache.solr.update.processor.DistributedUpdateProcessor;
import org.apache.solr.util.RefCounted; import org.apache.solr.util.RefCounted;
import org.apache.solr.util.SolrPluginUtils; import org.apache.solr.util.SolrPluginUtils;
import org.apache.solr.util.plugin.NamedListInitializedPlugin; import org.apache.solr.util.plugin.NamedListInitializedPlugin;
@ -71,18 +70,21 @@ import org.slf4j.Logger;
import org.slf4j.LoggerFactory; import org.slf4j.LoggerFactory;
/** /**
* This class may change in future and customisations are not supported * This class may change in future and customisations are not supported between versions in terms of API or back compat
* between versions in terms of API or back compat behaviour. * behaviour.
*
* @lucene.experimental * @lucene.experimental
*/ */
public class RecoveryStrategy implements Runnable, Closeable { public class RecoveryStrategy implements Runnable, Closeable {
public static class Builder implements NamedListInitializedPlugin { public static class Builder implements NamedListInitializedPlugin {
private NamedList args; private NamedList args;
@Override @Override
public void init(NamedList args) { public void init(NamedList args) {
this.args = args; this.args = args;
} }
// this should only be used from SolrCoreState // this should only be used from SolrCoreState
public RecoveryStrategy create(CoreContainer cc, CoreDescriptor cd, public RecoveryStrategy create(CoreContainer cc, CoreDescriptor cd,
RecoveryStrategy.RecoveryListener recoveryListener) { RecoveryStrategy.RecoveryListener recoveryListener) {
@ -90,6 +92,7 @@ public class RecoveryStrategy implements Runnable, Closeable {
SolrPluginUtils.invokeSetters(recoveryStrategy, args); SolrPluginUtils.invokeSetters(recoveryStrategy, args);
return recoveryStrategy; return recoveryStrategy;
} }
protected RecoveryStrategy newRecoveryStrategy(CoreContainer cc, CoreDescriptor cd, protected RecoveryStrategy newRecoveryStrategy(CoreContainer cc, CoreDescriptor cd,
RecoveryStrategy.RecoveryListener recoveryListener) { RecoveryStrategy.RecoveryListener recoveryListener) {
return new RecoveryStrategy(cc, cd, recoveryListener); return new RecoveryStrategy(cc, cd, recoveryListener);
@ -98,12 +101,14 @@ public class RecoveryStrategy implements Runnable, Closeable {
private static final Logger log = LoggerFactory.getLogger(MethodHandles.lookup().lookupClass()); private static final Logger log = LoggerFactory.getLogger(MethodHandles.lookup().lookupClass());
private int waitForUpdatesWithStaleStatePauseMilliSeconds = Integer.getInteger("solr.cloud.wait-for-updates-with-stale-state-pause", 2500); private int waitForUpdatesWithStaleStatePauseMilliSeconds = Integer
.getInteger("solr.cloud.wait-for-updates-with-stale-state-pause", 2500);
private int maxRetries = 500; private int maxRetries = 500;
private int startingRecoveryDelayMilliSeconds = 5000; private int startingRecoveryDelayMilliSeconds = 2000;
public static interface RecoveryListener { public static interface RecoveryListener {
public void recovered(); public void recovered();
public void failed(); public void failed();
} }
@ -121,6 +126,8 @@ public class RecoveryStrategy implements Runnable, Closeable {
private volatile HttpUriRequest prevSendPreRecoveryHttpUriRequest; private volatile HttpUriRequest prevSendPreRecoveryHttpUriRequest;
private final Replica.Type replicaType; private final Replica.Type replicaType;
private CoreDescriptor coreDescriptor;
protected RecoveryStrategy(CoreContainer cc, CoreDescriptor cd, RecoveryListener recoveryListener) { protected RecoveryStrategy(CoreContainer cc, CoreDescriptor cd, RecoveryListener recoveryListener) {
this.cc = cc; this.cc = cc;
this.coreName = cd.getName(); this.coreName = cd.getName();
@ -136,7 +143,8 @@ public class RecoveryStrategy implements Runnable, Closeable {
return waitForUpdatesWithStaleStatePauseMilliSeconds; return waitForUpdatesWithStaleStatePauseMilliSeconds;
} }
final public void setWaitForUpdatesWithStaleStatePauseMilliSeconds(int waitForUpdatesWithStaleStatePauseMilliSeconds) { final public void setWaitForUpdatesWithStaleStatePauseMilliSeconds(
int waitForUpdatesWithStaleStatePauseMilliSeconds) {
this.waitForUpdatesWithStaleStatePauseMilliSeconds = waitForUpdatesWithStaleStatePauseMilliSeconds; this.waitForUpdatesWithStaleStatePauseMilliSeconds = waitForUpdatesWithStaleStatePauseMilliSeconds;
} }
@ -187,8 +195,9 @@ public class RecoveryStrategy implements Runnable, Closeable {
} }
/** /**
* This method may change in future and customisations are not supported * This method may change in future and customisations are not supported between versions in terms of API or back
* between versions in terms of API or back compat behaviour. * compat behaviour.
*
* @lucene.experimental * @lucene.experimental
*/ */
protected String getReplicateLeaderUrl(ZkNodeProps leaderprops) { protected String getReplicateLeaderUrl(ZkNodeProps leaderprops) {
@ -219,7 +228,8 @@ public class RecoveryStrategy implements Runnable, Closeable {
solrParams.set(ReplicationHandler.SKIP_COMMIT_ON_MASTER_VERSION_ZERO, replicaType == Replica.Type.TLOG); solrParams.set(ReplicationHandler.SKIP_COMMIT_ON_MASTER_VERSION_ZERO, replicaType == Replica.Type.TLOG);
// always download the tlogs from the leader when running with cdcr enabled. We need to have all the tlogs // always download the tlogs from the leader when running with cdcr enabled. We need to have all the tlogs
// to ensure leader failover doesn't cause missing docs on the target // to ensure leader failover doesn't cause missing docs on the target
if (core.getUpdateHandler().getUpdateLog() != null && core.getUpdateHandler().getUpdateLog() instanceof CdcrUpdateLog) { if (core.getUpdateHandler().getUpdateLog() != null
&& core.getUpdateHandler().getUpdateLog() instanceof CdcrUpdateLog) {
solrParams.set(ReplicationHandler.TLOG_FILES, true); solrParams.set(ReplicationHandler.TLOG_FILES, true);
} }
@ -245,7 +255,8 @@ public class RecoveryStrategy implements Runnable, Closeable {
+ " from " + " from "
+ leaderUrl + leaderUrl
+ " gen:" + " gen:"
+ (core.getDeletionPolicy().getLatestCommit() != null ? "null" : core.getDeletionPolicy().getLatestCommit().getGeneration()) + (core.getDeletionPolicy().getLatestCommit() != null ? "null"
: core.getDeletionPolicy().getLatestCommit().getGeneration())
+ " data:" + core.getDataDir() + " data:" + core.getDataDir()
+ " index:" + core.getIndexDir() + " index:" + core.getIndexDir()
+ " newIndex:" + core.getNewIndexDir() + " newIndex:" + core.getNewIndexDir()
@ -265,11 +276,13 @@ public class RecoveryStrategy implements Runnable, Closeable {
IOException { IOException {
try (HttpSolrClient client = new HttpSolrClient.Builder(leaderUrl) try (HttpSolrClient client = new HttpSolrClient.Builder(leaderUrl)
.withConnectionTimeout(30000) .withConnectionTimeout(30000)
.withHttpClient(cc.getUpdateShardHandler().getRecoveryOnlyHttpClient())
.build()) { .build()) {
UpdateRequest ureq = new UpdateRequest(); UpdateRequest ureq = new UpdateRequest();
ureq.setParams(new ModifiableSolrParams()); ureq.setParams(new ModifiableSolrParams());
ureq.getParams().set(DistributedUpdateProcessor.COMMIT_END_POINT, true); // ureq.getParams().set(DistributedUpdateProcessor.COMMIT_END_POINT, true);
// ureq.getParams().set(UpdateParams.OPEN_SEARCHER, onlyLeaderIndexes);// Why do we need to open searcher if "onlyLeaderIndexes"? // ureq.getParams().set(UpdateParams.OPEN_SEARCHER, onlyLeaderIndexes);// Why do we need to open searcher if
// "onlyLeaderIndexes"?
ureq.getParams().set(UpdateParams.OPEN_SEARCHER, false); ureq.getParams().set(UpdateParams.OPEN_SEARCHER, false);
ureq.setAction(AbstractUpdateRequest.ACTION.COMMIT, false, true).process( ureq.setAction(AbstractUpdateRequest.ACTION.COMMIT, false, true).process(
client); client);
@ -306,7 +319,10 @@ public class RecoveryStrategy implements Runnable, Closeable {
} }
final public void doRecovery(SolrCore core) throws Exception { final public void doRecovery(SolrCore core) throws Exception {
if (core.getCoreDescriptor().getCloudDescriptor().requiresTransactionLog()) { // we can lose our core descriptor, so store it now
this.coreDescriptor = core.getCoreDescriptor();
if (this.coreDescriptor.getCloudDescriptor().requiresTransactionLog()) {
doSyncOrReplicateRecovery(core); doSyncOrReplicateRecovery(core);
} else { } else {
doReplicateOnlyRecovery(core); doReplicateOnlyRecovery(core);
@ -317,13 +333,16 @@ public class RecoveryStrategy implements Runnable, Closeable {
boolean successfulRecovery = false; boolean successfulRecovery = false;
// if (core.getUpdateHandler().getUpdateLog() != null) { // if (core.getUpdateHandler().getUpdateLog() != null) {
// SolrException.log(log, "'replicate-only' recovery strategy should only be used if no update logs are present, but this core has one: " // SolrException.log(log, "'replicate-only' recovery strategy should only be used if no update logs are present, but
// this core has one: "
// + core.getUpdateHandler().getUpdateLog()); // + core.getUpdateHandler().getUpdateLog());
// return; // return;
// } // }
while (!successfulRecovery && !Thread.currentThread().isInterrupted() && !isClosed()) { // don't use interruption or it will close channels though while (!successfulRecovery && !Thread.currentThread().isInterrupted() && !isClosed()) { // don't use interruption or
// it will close channels
// though
try { try {
CloudDescriptor cloudDesc = core.getCoreDescriptor().getCloudDescriptor(); CloudDescriptor cloudDesc = this.coreDescriptor.getCloudDescriptor();
ZkNodeProps leaderprops = zkStateReader.getLeaderRetry( ZkNodeProps leaderprops = zkStateReader.getLeaderRetry(
cloudDesc.getCollectionName(), cloudDesc.getShardId()); cloudDesc.getCollectionName(), cloudDesc.getShardId());
final String leaderBaseUrl = leaderprops.getStr(ZkStateReader.BASE_URL_PROP); final String leaderBaseUrl = leaderprops.getStr(ZkStateReader.BASE_URL_PROP);
@ -333,7 +352,8 @@ public class RecoveryStrategy implements Runnable, Closeable {
String ourUrl = ZkCoreNodeProps.getCoreUrl(baseUrl, coreName); String ourUrl = ZkCoreNodeProps.getCoreUrl(baseUrl, coreName);
boolean isLeader = leaderUrl.equals(ourUrl); //TODO: We can probably delete most of this code if we say this strategy can only be used for pull replicas boolean isLeader = leaderUrl.equals(ourUrl); // TODO: We can probably delete most of this code if we say this
// strategy can only be used for pull replicas
if (isLeader && !cloudDesc.isLeader()) { if (isLeader && !cloudDesc.isLeader()) {
throw new SolrException(ErrorCode.SERVER_ERROR, "Cloud state still says we are leader."); throw new SolrException(ErrorCode.SERVER_ERROR, "Cloud state still says we are leader.");
} }
@ -342,14 +362,13 @@ public class RecoveryStrategy implements Runnable, Closeable {
// we are now the leader - no one else must have been suitable // we are now the leader - no one else must have been suitable
log.warn("We have not yet recovered - but we are now the leader!"); log.warn("We have not yet recovered - but we are now the leader!");
log.info("Finished recovery process."); log.info("Finished recovery process.");
zkController.publish(core.getCoreDescriptor(), Replica.State.ACTIVE); zkController.publish(this.coreDescriptor, Replica.State.ACTIVE);
return; return;
} }
log.info("Publishing state of core [{}] as recovering, leader is [{}] and I am [{}]", core.getName(), leaderUrl, log.info("Publishing state of core [{}] as recovering, leader is [{}] and I am [{}]", core.getName(), leaderUrl,
ourUrl); ourUrl);
zkController.publish(core.getCoreDescriptor(), Replica.State.RECOVERING); zkController.publish(this.coreDescriptor, Replica.State.RECOVERING);
if (isClosed()) { if (isClosed()) {
log.info("Recovery for core {} has been closed", core.getName()); log.info("Recovery for core {} has been closed", core.getName());
@ -381,7 +400,7 @@ public class RecoveryStrategy implements Runnable, Closeable {
zkController.startReplicationFromLeader(coreName, false); zkController.startReplicationFromLeader(coreName, false);
log.info("Registering as Active after recovery."); log.info("Registering as Active after recovery.");
try { try {
zkController.publish(core.getCoreDescriptor(), Replica.State.ACTIVE); zkController.publish(this.coreDescriptor, Replica.State.ACTIVE);
} catch (Exception e) { } catch (Exception e) {
log.error("Could not publish as ACTIVE after succesful recovery", e); log.error("Could not publish as ACTIVE after succesful recovery", e);
successfulRecovery = false; successfulRecovery = false;
@ -411,7 +430,7 @@ public class RecoveryStrategy implements Runnable, Closeable {
if (retries >= maxRetries) { if (retries >= maxRetries) {
SolrException.log(log, "Recovery failed - max retries exceeded (" + retries + ")."); SolrException.log(log, "Recovery failed - max retries exceeded (" + retries + ").");
try { try {
recoveryFailed(core, zkController, baseUrl, coreZkNodeName, core.getCoreDescriptor()); recoveryFailed(core, zkController, baseUrl, coreZkNodeName, this.coreDescriptor);
} catch (Exception e) { } catch (Exception e) {
SolrException.log(log, "Could not publish that recovery failed", e); SolrException.log(log, "Could not publish that recovery failed", e);
} }
@ -457,7 +476,7 @@ public class RecoveryStrategy implements Runnable, Closeable {
if (ulog == null) { if (ulog == null) {
SolrException.log(log, "No UpdateLog found - cannot recover."); SolrException.log(log, "No UpdateLog found - cannot recover.");
recoveryFailed(core, zkController, baseUrl, coreZkNodeName, recoveryFailed(core, zkController, baseUrl, coreZkNodeName,
core.getCoreDescriptor()); this.coreDescriptor);
return; return;
} }
@ -485,13 +504,15 @@ public class RecoveryStrategy implements Runnable, Closeable {
if (oldIdx > 0) { if (oldIdx > 0) {
log.info("Found new versions added after startup: num=[{}]", oldIdx); log.info("Found new versions added after startup: num=[{}]", oldIdx);
log.info("currentVersions size={} range=[{} to {}]", recentVersions.size(), recentVersions.get(0), recentVersions.get(recentVersions.size()-1)); log.info("currentVersions size={} range=[{} to {}]", recentVersions.size(), recentVersions.get(0),
recentVersions.get(recentVersions.size() - 1));
} }
if (startingVersions.isEmpty()) { if (startingVersions.isEmpty()) {
log.info("startupVersions is empty"); log.info("startupVersions is empty");
} else { } else {
log.info("startupVersions size={} range=[{} to {}]", startingVersions.size(), startingVersions.get(0), startingVersions.get(startingVersions.size()-1)); log.info("startupVersions size={} range=[{} to {}]", startingVersions.size(), startingVersions.get(0),
startingVersions.get(startingVersions.size() - 1));
} }
} catch (Exception e) { } catch (Exception e) {
SolrException.log(log, "Error getting recent versions.", e); SolrException.log(log, "Error getting recent versions.", e);
@ -523,10 +544,12 @@ public class RecoveryStrategy implements Runnable, Closeable {
final String ourUrl = ZkCoreNodeProps.getCoreUrl(baseUrl, coreName); final String ourUrl = ZkCoreNodeProps.getCoreUrl(baseUrl, coreName);
Future<RecoveryInfo> replayFuture = null; Future<RecoveryInfo> replayFuture = null;
while (!successfulRecovery && !Thread.currentThread().isInterrupted() && !isClosed()) { // don't use interruption or it will close channels though while (!successfulRecovery && !Thread.currentThread().isInterrupted() && !isClosed()) { // don't use interruption or
// it will close channels
// though
try { try {
CloudDescriptor cloudDesc = core.getCoreDescriptor().getCloudDescriptor(); CloudDescriptor cloudDesc = this.coreDescriptor.getCloudDescriptor();
final Replica leader = pingLeader(ourUrl, core.getCoreDescriptor(), true); final Replica leader = pingLeader(ourUrl, this.coreDescriptor, true);
if (isClosed()) { if (isClosed()) {
log.info("RecoveryStrategy has been closed"); log.info("RecoveryStrategy has been closed");
break; break;
@ -540,7 +563,7 @@ public class RecoveryStrategy implements Runnable, Closeable {
// we are now the leader - no one else must have been suitable // we are now the leader - no one else must have been suitable
log.warn("We have not yet recovered - but we are now the leader!"); log.warn("We have not yet recovered - but we are now the leader!");
log.info("Finished recovery process."); log.info("Finished recovery process.");
zkController.publish(core.getCoreDescriptor(), Replica.State.ACTIVE); zkController.publish(this.coreDescriptor, Replica.State.ACTIVE);
return; return;
} }
@ -548,10 +571,10 @@ public class RecoveryStrategy implements Runnable, Closeable {
// recalling buffer updates will drop the old buffer tlog // recalling buffer updates will drop the old buffer tlog
ulog.bufferUpdates(); ulog.bufferUpdates();
log.info("Publishing state of core [{}] as recovering, leader is [{}] and I am [{}]", core.getName(), leader.getCoreUrl(), log.info("Publishing state of core [{}] as recovering, leader is [{}] and I am [{}]", core.getName(),
leader.getCoreUrl(),
ourUrl); ourUrl);
zkController.publish(core.getCoreDescriptor(), Replica.State.RECOVERING); zkController.publish(this.coreDescriptor, Replica.State.RECOVERING);
final Slice slice = zkStateReader.getClusterState().getCollection(cloudDesc.getCollectionName()) final Slice slice = zkStateReader.getClusterState().getCollection(cloudDesc.getCollectionName())
.getSlice(cloudDesc.getShardId()); .getSlice(cloudDesc.getShardId());
@ -588,7 +611,8 @@ public class RecoveryStrategy implements Runnable, Closeable {
// first thing we just try to sync // first thing we just try to sync
if (firstTime) { if (firstTime) {
firstTime = false; // only try sync the first time through the loop firstTime = false; // only try sync the first time through the loop
log.info("Attempting to PeerSync from [{}] - recoveringAfterStartup=[{}]", leader.getCoreUrl(), recoveringAfterStartup); log.info("Attempting to PeerSync from [{}] - recoveringAfterStartup=[{}]", leader.getCoreUrl(),
recoveringAfterStartup);
// System.out.println("Attempting to PeerSync from " + leaderUrl // System.out.println("Attempting to PeerSync from " + leaderUrl
// + " i am:" + zkController.getNodeName()); // + " i am:" + zkController.getNodeName());
PeerSyncWithLeader peerSyncWithLeader = new PeerSyncWithLeader(core, PeerSyncWithLeader peerSyncWithLeader = new PeerSyncWithLeader(core,
@ -658,7 +682,7 @@ public class RecoveryStrategy implements Runnable, Closeable {
if (replicaType == Replica.Type.TLOG) { if (replicaType == Replica.Type.TLOG) {
zkController.startReplicationFromLeader(coreName, true); zkController.startReplicationFromLeader(coreName, true);
} }
zkController.publish(core.getCoreDescriptor(), Replica.State.ACTIVE); zkController.publish(this.coreDescriptor, Replica.State.ACTIVE);
} catch (Exception e) { } catch (Exception e) {
log.error("Could not publish as ACTIVE after succesful recovery", e); log.error("Could not publish as ACTIVE after succesful recovery", e);
successfulRecovery = false; successfulRecovery = false;
@ -688,7 +712,7 @@ public class RecoveryStrategy implements Runnable, Closeable {
if (retries >= maxRetries) { if (retries >= maxRetries) {
SolrException.log(log, "Recovery failed - max retries exceeded (" + retries + ")."); SolrException.log(log, "Recovery failed - max retries exceeded (" + retries + ").");
try { try {
recoveryFailed(core, zkController, baseUrl, coreZkNodeName, core.getCoreDescriptor()); recoveryFailed(core, zkController, baseUrl, coreZkNodeName, this.coreDescriptor);
} catch (Exception e) { } catch (Exception e) {
SolrException.log(log, "Could not publish that recovery failed", e); SolrException.log(log, "Could not publish that recovery failed", e);
} }
@ -699,12 +723,12 @@ public class RecoveryStrategy implements Runnable, Closeable {
} }
try { try {
// Wait an exponential interval between retries, start at 5 seconds and work up to a minute. // Wait an exponential interval between retries, start at 2 seconds and work up to a minute.
// If we're at attempt >= 4, there's no point computing pow(2, retries) because the result // Since we sleep at 2 seconds sub-intervals in
// will always be the minimum of the two (12). Since we sleep at 5 seconds sub-intervals in // order to check if we were closed, 30 is chosen as the maximum loopCount (2s * 30 = 1m).
// order to check if we were closed, 12 is chosen as the maximum loopCount (5s * 12 = 1m). double loopCount = Math.min(Math.pow(2, retries - 1), 30);
double loopCount = retries < 4 ? Math.min(Math.pow(2, retries), 12) : 12; log.info("Wait [{}] seconds before trying to recover again (attempt={})",
log.info("Wait [{}] seconds before trying to recover again (attempt={})", loopCount, retries); loopCount * startingRecoveryDelayMilliSeconds, retries);
for (int i = 0; i < loopCount; i++) { for (int i = 0; i < loopCount; i++) {
if (isClosed()) { if (isClosed()) {
log.info("RecoveryStrategy has been closed"); log.info("RecoveryStrategy has been closed");
@ -731,13 +755,15 @@ public class RecoveryStrategy implements Runnable, Closeable {
log.info("Finished recovery process, successful=[{}]", Boolean.toString(successfulRecovery)); log.info("Finished recovery process, successful=[{}]", Boolean.toString(successfulRecovery));
} }
private final Replica pingLeader(String ourUrl, CoreDescriptor coreDesc, boolean mayPutReplicaAsDown) throws Exception { private final Replica pingLeader(String ourUrl, CoreDescriptor coreDesc, boolean mayPutReplicaAsDown)
throws Exception {
int numTried = 0; int numTried = 0;
while (true) { while (true) {
CloudDescriptor cloudDesc = coreDesc.getCloudDescriptor(); CloudDescriptor cloudDesc = coreDesc.getCloudDescriptor();
DocCollection docCollection = zkStateReader.getClusterState().getCollection(cloudDesc.getCollectionName()); DocCollection docCollection = zkStateReader.getClusterState().getCollection(cloudDesc.getCollectionName());
if (!isClosed() && mayPutReplicaAsDown && numTried == 1 && if (!isClosed() && mayPutReplicaAsDown && numTried == 1 &&
docCollection.getReplica(coreDesc.getCloudDescriptor().getCoreNodeName()).getState() == Replica.State.ACTIVE) { docCollection.getReplica(coreDesc.getCloudDescriptor().getCoreNodeName())
.getState() == Replica.State.ACTIVE) {
// this operation may take a long time, by putting replica into DOWN state, client won't query this replica // this operation may take a long time, by putting replica into DOWN state, client won't query this replica
zkController.publish(coreDesc, Replica.State.DOWN); zkController.publish(coreDesc, Replica.State.DOWN);
} }
@ -763,6 +789,7 @@ public class RecoveryStrategy implements Runnable, Closeable {
try (HttpSolrClient httpSolrClient = new HttpSolrClient.Builder(leaderReplica.getCoreUrl()) try (HttpSolrClient httpSolrClient = new HttpSolrClient.Builder(leaderReplica.getCoreUrl())
.withSocketTimeout(1000) .withSocketTimeout(1000)
.withConnectionTimeout(1000) .withConnectionTimeout(1000)
.withHttpClient(cc.getUpdateShardHandler().getRecoveryOnlyHttpClient())
.build()) { .build()) {
SolrPingResponse resp = httpSolrClient.ping(); SolrPingResponse resp = httpSolrClient.ping();
return leaderReplica; return leaderReplica;
@ -838,7 +865,7 @@ public class RecoveryStrategy implements Runnable, Closeable {
} }
final public boolean isClosed() { final public boolean isClosed() {
return close; return close || cc.isShutDown();
} }
final private void sendPrepRecoveryCmd(String leaderBaseUrl, String leaderCoreName, Slice slice) final private void sendPrepRecoveryCmd(String leaderBaseUrl, String leaderCoreName, Slice slice)
@ -858,8 +885,9 @@ public class RecoveryStrategy implements Runnable, Closeable {
int conflictWaitMs = zkController.getLeaderConflictResolveWait(); int conflictWaitMs = zkController.getLeaderConflictResolveWait();
// timeout after 5 seconds more than the max timeout (conflictWait + 3 seconds) on the server side // timeout after 5 seconds more than the max timeout (conflictWait + 3 seconds) on the server side
int readTimeout = conflictWaitMs + 8000; int readTimeout = conflictWaitMs + Integer.parseInt(System.getProperty("prepRecoveryReadTimeoutExtraWait", "8000"));
try (HttpSolrClient client = new HttpSolrClient.Builder(leaderBaseUrl).build()) { try (HttpSolrClient client = new HttpSolrClient.Builder(leaderBaseUrl)
.withHttpClient(cc.getUpdateShardHandler().getRecoveryOnlyHttpClient()).build()) {
client.setConnectionTimeout(10000); client.setConnectionTimeout(10000);
client.setSoTimeout(readTimeout); client.setSoTimeout(readTimeout);
HttpUriRequestResponse mrr = client.httpUriRequest(prepCmd); HttpUriRequestResponse mrr = client.httpUriRequest(prepCmd);

View File

@ -39,11 +39,11 @@ import org.slf4j.LoggerFactory;
public class ReplicateFromLeader { public class ReplicateFromLeader {
private static final Logger log = LoggerFactory.getLogger(MethodHandles.lookup().lookupClass()); private static final Logger log = LoggerFactory.getLogger(MethodHandles.lookup().lookupClass());
private CoreContainer cc; private final CoreContainer cc;
private String coreName; private final String coreName;
private ReplicationHandler replicationProcess; private volatile ReplicationHandler replicationProcess;
private long lastVersion = 0; private volatile long lastVersion = 0;
public ReplicateFromLeader(CoreContainer cc, String coreName) { public ReplicateFromLeader(CoreContainer cc, String coreName) {
this.cc = cc; this.cc = cc;

View File

@ -35,6 +35,7 @@ import org.apache.solr.common.util.NamedList;
import org.apache.solr.core.CoreContainer; import org.apache.solr.core.CoreContainer;
import org.apache.solr.core.CoreDescriptor; import org.apache.solr.core.CoreDescriptor;
import org.apache.solr.core.SolrCore; import org.apache.solr.core.SolrCore;
import org.apache.solr.handler.component.HttpShardHandlerFactory;
import org.apache.solr.handler.component.ShardHandler; import org.apache.solr.handler.component.ShardHandler;
import org.apache.solr.handler.component.ShardRequest; import org.apache.solr.handler.component.ShardRequest;
import org.apache.solr.handler.component.ShardResponse; import org.apache.solr.handler.component.ShardResponse;
@ -70,7 +71,7 @@ public class SyncStrategy {
public SyncStrategy(CoreContainer cc) { public SyncStrategy(CoreContainer cc) {
UpdateShardHandler updateShardHandler = cc.getUpdateShardHandler(); UpdateShardHandler updateShardHandler = cc.getUpdateShardHandler();
client = updateShardHandler.getDefaultHttpClient(); client = updateShardHandler.getDefaultHttpClient();
shardHandler = cc.getShardHandlerFactory().getShardHandler(); shardHandler = ((HttpShardHandlerFactory)cc.getShardHandlerFactory()).getShardHandler(cc.getUpdateShardHandler().getDefaultHttpClient());
updateExecutor = updateShardHandler.getUpdateExecutor(); updateExecutor = updateShardHandler.getUpdateExecutor();
} }
@ -113,16 +114,17 @@ public class SyncStrategy {
private PeerSync.PeerSyncResult syncReplicas(ZkController zkController, SolrCore core, private PeerSync.PeerSyncResult syncReplicas(ZkController zkController, SolrCore core,
ZkNodeProps leaderProps, boolean peerSyncOnlyWithActive) { ZkNodeProps leaderProps, boolean peerSyncOnlyWithActive) {
boolean success = false;
PeerSync.PeerSyncResult result = null;
CloudDescriptor cloudDesc = core.getCoreDescriptor().getCloudDescriptor();
String collection = cloudDesc.getCollectionName();
String shardId = cloudDesc.getShardId();
if (isClosed) { if (isClosed) {
log.info("We have been closed, won't sync with replicas"); log.info("We have been closed, won't sync with replicas");
return PeerSync.PeerSyncResult.failure(); return PeerSync.PeerSyncResult.failure();
} }
boolean success = false;
PeerSync.PeerSyncResult result = null;
assert core != null;
assert core.getCoreDescriptor() != null;
CloudDescriptor cloudDesc = core.getCoreDescriptor().getCloudDescriptor();
String collection = cloudDesc.getCollectionName();
String shardId = cloudDesc.getShardId();
// first sync ourselves - we are the potential leader after all // first sync ourselves - we are the potential leader after all
try { try {
@ -160,6 +162,11 @@ public class SyncStrategy {
List<ZkCoreNodeProps> nodes = zkController.getZkStateReader() List<ZkCoreNodeProps> nodes = zkController.getZkStateReader()
.getReplicaProps(collection, shardId,core.getCoreDescriptor().getCloudDescriptor().getCoreNodeName()); .getReplicaProps(collection, shardId,core.getCoreDescriptor().getCloudDescriptor().getCoreNodeName());
if (isClosed) {
log.info("We have been closed, won't sync with replicas");
return PeerSync.PeerSyncResult.failure();
}
if (nodes == null) { if (nodes == null) {
// I have no replicas // I have no replicas
return PeerSync.PeerSyncResult.success(); return PeerSync.PeerSyncResult.success();
@ -184,6 +191,11 @@ public class SyncStrategy {
String shardId, ZkNodeProps leaderProps, CoreDescriptor cd, String shardId, ZkNodeProps leaderProps, CoreDescriptor cd,
int nUpdates) { int nUpdates) {
if (isClosed) {
log.info("We have been closed, won't sync replicas to me.");
return;
}
// sync everyone else // sync everyone else
// TODO: we should do this in parallel at least // TODO: we should do this in parallel at least
List<ZkCoreNodeProps> nodes = zkController List<ZkCoreNodeProps> nodes = zkController
@ -289,6 +301,11 @@ public class SyncStrategy {
} }
@Override @Override
public void run() { public void run() {
if (isClosed) {
log.info("We have been closed, won't request recovery");
return;
}
RequestRecovery recoverRequestCmd = new RequestRecovery(); RequestRecovery recoverRequestCmd = new RequestRecovery();
recoverRequestCmd.setAction(CoreAdminAction.REQUESTRECOVERY); recoverRequestCmd.setAction(CoreAdminAction.REQUESTRECOVERY);
recoverRequestCmd.setCoreName(coreName); recoverRequestCmd.setCoreName(coreName);

View File

@ -16,6 +16,7 @@
*/ */
package org.apache.solr.cloud; package org.apache.solr.cloud;
import java.io.Closeable;
import java.io.File; import java.io.File;
import java.io.IOException; import java.io.IOException;
import java.io.UnsupportedEncodingException; import java.io.UnsupportedEncodingException;
@ -46,6 +47,7 @@ import java.util.concurrent.ConcurrentHashMap;
import java.util.concurrent.CountDownLatch; import java.util.concurrent.CountDownLatch;
import java.util.concurrent.ExecutionException; import java.util.concurrent.ExecutionException;
import java.util.concurrent.ExecutorService; import java.util.concurrent.ExecutorService;
import java.util.concurrent.ForkJoinPool;
import java.util.concurrent.Future; import java.util.concurrent.Future;
import java.util.concurrent.TimeUnit; import java.util.concurrent.TimeUnit;
import java.util.concurrent.TimeoutException; import java.util.concurrent.TimeoutException;
@ -62,11 +64,13 @@ import org.apache.solr.client.solrj.request.CoreAdminRequest.WaitForState;
import org.apache.solr.client.solrj.cloud.autoscaling.TriggerEventType; import org.apache.solr.client.solrj.cloud.autoscaling.TriggerEventType;
import org.apache.solr.cloud.overseer.OverseerAction; import org.apache.solr.cloud.overseer.OverseerAction;
import org.apache.solr.cloud.overseer.SliceMutator; import org.apache.solr.cloud.overseer.SliceMutator;
import org.apache.solr.common.AlreadyClosedException;
import org.apache.solr.common.SolrException; import org.apache.solr.common.SolrException;
import org.apache.solr.common.SolrException.ErrorCode; import org.apache.solr.common.SolrException.ErrorCode;
import org.apache.solr.common.cloud.BeforeReconnect; import org.apache.solr.common.cloud.BeforeReconnect;
import org.apache.solr.common.cloud.ClusterState; import org.apache.solr.common.cloud.ClusterState;
import org.apache.solr.common.cloud.CollectionStateWatcher; import org.apache.solr.common.cloud.CollectionStateWatcher;
import org.apache.solr.common.cloud.ConnectionManager;
import org.apache.solr.common.cloud.DefaultConnectionStrategy; import org.apache.solr.common.cloud.DefaultConnectionStrategy;
import org.apache.solr.common.cloud.DefaultZkACLProvider; import org.apache.solr.common.cloud.DefaultZkACLProvider;
import org.apache.solr.common.cloud.DefaultZkCredentialsProvider; import org.apache.solr.common.cloud.DefaultZkCredentialsProvider;
@ -90,6 +94,7 @@ import org.apache.solr.common.params.CollectionParams;
import org.apache.solr.common.params.CommonParams; import org.apache.solr.common.params.CommonParams;
import org.apache.solr.common.params.CoreAdminParams; import org.apache.solr.common.params.CoreAdminParams;
import org.apache.solr.common.params.SolrParams; import org.apache.solr.common.params.SolrParams;
import org.apache.solr.common.util.ExecutorUtil;
import org.apache.solr.common.util.IOUtils; import org.apache.solr.common.util.IOUtils;
import org.apache.solr.common.util.ObjectReleaseTracker; import org.apache.solr.common.util.ObjectReleaseTracker;
import org.apache.solr.common.util.StrUtils; import org.apache.solr.common.util.StrUtils;
@ -102,6 +107,7 @@ import org.apache.solr.core.CoreDescriptor;
import org.apache.solr.core.SolrCore; import org.apache.solr.core.SolrCore;
import org.apache.solr.core.SolrCoreInitializationException; import org.apache.solr.core.SolrCoreInitializationException;
import org.apache.solr.handler.admin.ConfigSetsHandlerApi; import org.apache.solr.handler.admin.ConfigSetsHandlerApi;
import org.apache.solr.handler.component.HttpShardHandler;
import org.apache.solr.logging.MDCLoggingContext; import org.apache.solr.logging.MDCLoggingContext;
import org.apache.solr.search.SolrIndexSearcher; import org.apache.solr.search.SolrIndexSearcher;
import org.apache.solr.servlet.SolrDispatchFilter; import org.apache.solr.servlet.SolrDispatchFilter;
@ -137,7 +143,7 @@ import static org.apache.solr.common.cloud.ZkStateReader.SHARD_ID_PROP;
* <p> * <p>
* TODO: exceptions during close on attempts to update cloud state * TODO: exceptions during close on attempts to update cloud state
*/ */
public class ZkController { public class ZkController implements Closeable {
private static final Logger log = LoggerFactory.getLogger(MethodHandles.lookup().lookupClass()); private static final Logger log = LoggerFactory.getLogger(MethodHandles.lookup().lookupClass());
static final int WAIT_DOWN_STATES_TIMEOUT_SECONDS = 60; static final int WAIT_DOWN_STATES_TIMEOUT_SECONDS = 60;
@ -433,11 +439,14 @@ public class ZkController {
closeOutstandingElections(registerOnReconnect); closeOutstandingElections(registerOnReconnect);
markAllAsNotLeader(registerOnReconnect); markAllAsNotLeader(registerOnReconnect);
} }
}, zkACLProvider); }, zkACLProvider, new ConnectionManager.IsClosed() {
@Override
public boolean isClosed() {
return cc.isShutDown();
}});
this.overseerJobQueue = Overseer.getStateUpdateQueue(zkClient);
this.overseerCollectionQueue = Overseer.getCollectionQueue(zkClient);
this.overseerConfigSetQueue = Overseer.getConfigSetQueue(zkClient);
this.overseerRunningMap = Overseer.getRunningMap(zkClient); this.overseerRunningMap = Overseer.getRunningMap(zkClient);
this.overseerCompletedMap = Overseer.getCompletedMap(zkClient); this.overseerCompletedMap = Overseer.getCompletedMap(zkClient);
this.overseerFailureMap = Overseer.getFailureMap(zkClient); this.overseerFailureMap = Overseer.getFailureMap(zkClient);
@ -449,6 +458,10 @@ public class ZkController {
init(registerOnReconnect); init(registerOnReconnect);
this.overseerJobQueue = overseer.getStateUpdateQueue();
this.overseerCollectionQueue = overseer.getCollectionQueue(zkClient);
this.overseerConfigSetQueue = overseer.getConfigSetQueue(zkClient);
assert ObjectReleaseTracker.track(this); assert ObjectReleaseTracker.track(this);
} }
@ -554,28 +567,41 @@ public class ZkController {
*/ */
public void close() { public void close() {
this.isClosed = true; this.isClosed = true;
ForkJoinPool customThreadPool = new ForkJoinPool(10);
customThreadPool.submit(() -> Collections.singleton(overseerElector.getContext()).parallelStream().forEach(c -> {
IOUtils.closeQuietly(c);
}));
customThreadPool.submit(() -> Collections.singleton(overseer).parallelStream().forEach(c -> {
IOUtils.closeQuietly(c);
}));
synchronized (collectionToTerms) { synchronized (collectionToTerms) {
collectionToTerms.values().forEach(ZkCollectionTerms::close); customThreadPool.submit(() -> collectionToTerms.values().parallelStream().forEach(c -> {
c.close();
}));
} }
try { try {
for (ElectionContext context : electionContexts.values()) {
try { customThreadPool.submit(() -> replicateFromLeaders.values().parallelStream().forEach(c -> {
context.close(); c.stopReplication();
} catch (Exception e) { }));
log.error("Error closing overseer", e);
} customThreadPool.submit(() -> electionContexts.values().parallelStream().forEach(c -> {
} IOUtils.closeQuietly(c);
}));
} finally { } finally {
try {
IOUtils.closeQuietly(overseerElector.getContext()); customThreadPool.submit(() -> Collections.singleton(cloudSolrClient).parallelStream().forEach(c -> {
IOUtils.closeQuietly(overseer); IOUtils.closeQuietly(c);
} finally { }));
if (cloudSolrClient != null) { customThreadPool.submit(() -> Collections.singleton(cloudManager).parallelStream().forEach(c -> {
IOUtils.closeQuietly(cloudSolrClient); IOUtils.closeQuietly(c);
} }));
if (cloudManager != null) {
IOUtils.closeQuietly(cloudManager);
}
try { try {
try { try {
zkStateReader.close(); zkStateReader.close();
@ -587,9 +613,16 @@ public class ZkController {
zkClient.close(); zkClient.close();
} catch (Exception e) { } catch (Exception e) {
log.error("Error closing zkClient", e); log.error("Error closing zkClient", e);
} finally {
// just in case the OverseerElectionContext managed to start another Overseer
IOUtils.closeQuietly(overseer);
ExecutorUtil.shutdownAndAwaitTermination(customThreadPool);
} }
} }
}
} }
assert ObjectReleaseTracker.release(this); assert ObjectReleaseTracker.release(this);
} }
@ -669,9 +702,11 @@ public class ZkController {
if (cloudManager != null) { if (cloudManager != null) {
return cloudManager; return cloudManager;
} }
cloudSolrClient = new CloudSolrClient.Builder(Collections.singletonList(zkServerAddress), Optional.empty()) cloudSolrClient = new CloudSolrClient.Builder(Collections.singletonList(zkServerAddress), Optional.empty()).withSocketTimeout(30000).withConnectionTimeout(15000)
.withHttpClient(cc.getUpdateShardHandler().getDefaultHttpClient()).build(); .withHttpClient(cc.getUpdateShardHandler().getDefaultHttpClient())
.withConnectionTimeout(15000).withSocketTimeout(30000).build();
cloudManager = new SolrClientCloudManager(new ZkDistributedQueueFactory(zkClient), cloudSolrClient); cloudManager = new SolrClientCloudManager(new ZkDistributedQueueFactory(zkClient), cloudSolrClient);
cloudManager.getClusterStateProvider().connect();
} }
return cloudManager; return cloudManager;
} }
@ -764,7 +799,8 @@ public class ZkController {
* @throws KeeperException if there is a Zookeeper error * @throws KeeperException if there is a Zookeeper error
* @throws InterruptedException on interrupt * @throws InterruptedException on interrupt
*/ */
public static void createClusterZkNodes(SolrZkClient zkClient) throws KeeperException, InterruptedException, IOException { public static void createClusterZkNodes(SolrZkClient zkClient)
throws KeeperException, InterruptedException, IOException {
ZkCmdExecutor cmdExecutor = new ZkCmdExecutor(zkClient.getZkClientTimeout()); ZkCmdExecutor cmdExecutor = new ZkCmdExecutor(zkClient.getZkClientTimeout());
cmdExecutor.ensureExists(ZkStateReader.LIVE_NODES_ZKNODE, zkClient); cmdExecutor.ensureExists(ZkStateReader.LIVE_NODES_ZKNODE, zkClient);
cmdExecutor.ensureExists(ZkStateReader.COLLECTIONS_ZKNODE, zkClient); cmdExecutor.ensureExists(ZkStateReader.COLLECTIONS_ZKNODE, zkClient);
@ -839,7 +875,7 @@ public class ZkController {
// start the overseer first as following code may need it's processing // start the overseer first as following code may need it's processing
if (!zkRunOnly) { if (!zkRunOnly) {
overseerElector = new LeaderElector(zkClient); overseerElector = new LeaderElector(zkClient);
this.overseer = new Overseer(cc.getShardHandlerFactory().getShardHandler(), cc.getUpdateShardHandler(), this.overseer = new Overseer((HttpShardHandler) cc.getShardHandlerFactory().getShardHandler(), cc.getUpdateShardHandler(),
CommonParams.CORES_HANDLER_PATH, zkStateReader, this, cloudConfig); CommonParams.CORES_HANDLER_PATH, zkStateReader, this, cloudConfig);
ElectionContext context = new OverseerElectionContext(zkClient, ElectionContext context = new OverseerElectionContext(zkClient,
overseer, getNodeName()); overseer, getNodeName());
@ -911,10 +947,10 @@ public class ZkController {
LiveNodesListener listener = (oldNodes, newNodes) -> { LiveNodesListener listener = (oldNodes, newNodes) -> {
oldNodes.removeAll(newNodes); oldNodes.removeAll(newNodes);
if (oldNodes.isEmpty()) { // only added nodes if (oldNodes.isEmpty()) { // only added nodes
return; return false;
} }
if (isClosed) { if (isClosed) {
return; return true;
} }
// if this node is in the top three then attempt to create nodeLost message // if this node is in the top three then attempt to create nodeLost message
int i = 0; int i = 0;
@ -923,7 +959,7 @@ public class ZkController {
break; break;
} }
if (i > 2) { if (i > 2) {
return; // this node is not in the top three return false; // this node is not in the top three
} }
i++; i++;
} }
@ -948,12 +984,18 @@ public class ZkController {
} }
} }
} }
return false;
}; };
zkStateReader.registerLiveNodesListener(listener); zkStateReader.registerLiveNodesListener(listener);
} }
public void publishAndWaitForDownStates() throws KeeperException, public void publishAndWaitForDownStates() throws KeeperException,
InterruptedException { InterruptedException {
publishAndWaitForDownStates(WAIT_DOWN_STATES_TIMEOUT_SECONDS);
}
public void publishAndWaitForDownStates(int timeoutSeconds) throws KeeperException,
InterruptedException {
publishNodeAsDown(getNodeName()); publishNodeAsDown(getNodeName());
@ -983,7 +1025,7 @@ public class ZkController {
}); });
} }
boolean allPublishedDown = latch.await(WAIT_DOWN_STATES_TIMEOUT_SECONDS, TimeUnit.SECONDS); boolean allPublishedDown = latch.await(timeoutSeconds, TimeUnit.SECONDS);
if (!allPublishedDown) { if (!allPublishedDown) {
log.warn("Timed out waiting to see all nodes published as DOWN in our cluster state."); log.warn("Timed out waiting to see all nodes published as DOWN in our cluster state.");
} }
@ -1051,10 +1093,13 @@ public class ZkController {
log.info("Remove node as live in ZooKeeper:" + nodePath); log.info("Remove node as live in ZooKeeper:" + nodePath);
List<Op> ops = new ArrayList<>(2); List<Op> ops = new ArrayList<>(2);
ops.add(Op.delete(nodePath, -1)); ops.add(Op.delete(nodePath, -1));
if (zkClient.exists(nodeAddedPath, true)) {
ops.add(Op.delete(nodeAddedPath, -1)); ops.add(Op.delete(nodeAddedPath, -1));
}
try {
zkClient.multi(ops, true); zkClient.multi(ops, true);
} catch (NoNodeException e) {
}
} }
public String getNodeName() { public String getNodeName() {
@ -1158,6 +1203,10 @@ public class ZkController {
// TODO: should this actually be done earlier, before (or as part of) // TODO: should this actually be done earlier, before (or as part of)
// leader election perhaps? // leader election perhaps?
if (core == null) {
throw new SolrException(ErrorCode.SERVICE_UNAVAILABLE, "SolrCore is no longer available to register");
}
UpdateLog ulog = core.getUpdateHandler().getUpdateLog(); UpdateLog ulog = core.getUpdateHandler().getUpdateLog();
boolean isTlogReplicaAndNotLeader = replica.getType() == Replica.Type.TLOG && !isLeader; boolean isTlogReplicaAndNotLeader = replica.getType() == Replica.Type.TLOG && !isLeader;
if (isTlogReplicaAndNotLeader) { if (isTlogReplicaAndNotLeader) {
@ -1270,6 +1319,7 @@ public class ZkController {
final long msInSec = 1000L; final long msInSec = 1000L;
int maxTries = (int) Math.floor(leaderConflictResolveWait / msInSec); int maxTries = (int) Math.floor(leaderConflictResolveWait / msInSec);
while (!leaderUrl.equals(clusterStateLeaderUrl)) { while (!leaderUrl.equals(clusterStateLeaderUrl)) {
if (cc.isShutDown()) throw new AlreadyClosedException();
if (tries > maxTries) { if (tries > maxTries) {
throw new SolrException(ErrorCode.SERVER_ERROR, throw new SolrException(ErrorCode.SERVER_ERROR,
"There is conflicting information about the leader of shard: " "There is conflicting information about the leader of shard: "
@ -1290,6 +1340,8 @@ public class ZkController {
.getCoreUrl(); .getCoreUrl();
} }
} catch (AlreadyClosedException e) {
throw e;
} catch (Exception e) { } catch (Exception e) {
log.error("Error getting leader from zk", e); log.error("Error getting leader from zk", e);
throw new SolrException(SolrException.ErrorCode.SERVER_ERROR, throw new SolrException(SolrException.ErrorCode.SERVER_ERROR,
@ -1336,7 +1388,7 @@ public class ZkController {
Thread.sleep(1000); Thread.sleep(1000);
} }
if (cc.isShutDown()) { if (cc.isShutDown()) {
throw new SolrException(ErrorCode.SERVICE_UNAVAILABLE, "CoreContainer is closed"); throw new AlreadyClosedException();
} }
} }
throw new SolrException(ErrorCode.SERVICE_UNAVAILABLE, "Could not get leader props", exp); throw new SolrException(ErrorCode.SERVICE_UNAVAILABLE, "Could not get leader props", exp);
@ -2392,6 +2444,9 @@ public class ZkController {
} }
private boolean fireEventListeners(String zkDir) { private boolean fireEventListeners(String zkDir) {
if (isClosed || cc.isShutDown()) {
return false;
}
synchronized (confDirectoryListeners) { synchronized (confDirectoryListeners) {
// if this is not among directories to be watched then don't set the watcher anymore // if this is not among directories to be watched then don't set the watcher anymore
if (!confDirectoryListeners.containsKey(zkDir)) { if (!confDirectoryListeners.containsKey(zkDir)) {
@ -2527,15 +2582,17 @@ public class ZkController {
* @param nodeName to operate on * @param nodeName to operate on
*/ */
public void publishNodeAsDown(String nodeName) { public void publishNodeAsDown(String nodeName) {
log.debug("Publish node={} as DOWN", nodeName); log.info("Publish node={} as DOWN", nodeName);
ZkNodeProps m = new ZkNodeProps(Overseer.QUEUE_OPERATION, OverseerAction.DOWNNODE.toLower(), ZkNodeProps m = new ZkNodeProps(Overseer.QUEUE_OPERATION, OverseerAction.DOWNNODE.toLower(),
ZkStateReader.NODE_NAME_PROP, nodeName); ZkStateReader.NODE_NAME_PROP, nodeName);
try { try {
Overseer.getStateUpdateQueue(getZkClient()).offer(Utils.toJSON(m)); overseer.getStateUpdateQueue().offer(Utils.toJSON(m));
} catch (AlreadyClosedException e) {
log.info("Not publishing node as DOWN because a resource required to do so is already closed.");
} catch (InterruptedException e) { } catch (InterruptedException e) {
Thread.interrupted(); Thread.currentThread().interrupt();
log.debug("Publish node as down was interrupted."); log.debug("Publish node as down was interrupted.");
} catch (Exception e) { } catch (KeeperException e) {
log.warn("Could not publish node as down: " + e.getMessage()); log.warn("Could not publish node as down: " + e.getMessage());
} }
} }

View File

@ -39,6 +39,7 @@ import org.apache.solr.common.SolrException;
import org.apache.solr.common.SolrException.ErrorCode; import org.apache.solr.common.SolrException.ErrorCode;
import org.apache.solr.common.cloud.SolrZkClient; import org.apache.solr.common.cloud.SolrZkClient;
import org.apache.solr.common.cloud.ZkCmdExecutor; import org.apache.solr.common.cloud.ZkCmdExecutor;
import org.apache.solr.common.cloud.ConnectionManager.IsClosed;
import org.apache.solr.common.util.Pair; import org.apache.solr.common.util.Pair;
import org.apache.zookeeper.CreateMode; import org.apache.zookeeper.CreateMode;
import org.apache.zookeeper.KeeperException; import org.apache.zookeeper.KeeperException;
@ -115,9 +116,13 @@ public class ZkDistributedQueue implements DistributedQueue {
} }
public ZkDistributedQueue(SolrZkClient zookeeper, String dir, Stats stats, int maxQueueSize) { public ZkDistributedQueue(SolrZkClient zookeeper, String dir, Stats stats, int maxQueueSize) {
this(zookeeper, dir, stats, maxQueueSize, null);
}
public ZkDistributedQueue(SolrZkClient zookeeper, String dir, Stats stats, int maxQueueSize, IsClosed higherLevelIsClosed) {
this.dir = dir; this.dir = dir;
ZkCmdExecutor cmdExecutor = new ZkCmdExecutor(zookeeper.getZkClientTimeout()); ZkCmdExecutor cmdExecutor = new ZkCmdExecutor(zookeeper.getZkClientTimeout(), higherLevelIsClosed);
try { try {
cmdExecutor.ensureExists(dir, zookeeper); cmdExecutor.ensureExists(dir, zookeeper);
} catch (KeeperException e) { } catch (KeeperException e) {

View File

@ -315,27 +315,22 @@ public class ZkShardTerms implements AutoCloseable{
private void ensureTermNodeExist() { private void ensureTermNodeExist() {
String path = "/collections/" + collection + "/terms"; String path = "/collections/" + collection + "/terms";
try { try {
if (!zkClient.exists(path, true)) {
try {
zkClient.makePath(path, true);
} catch (KeeperException.NodeExistsException e) {
// it's okay if another beats us creating the node
}
}
path += "/" + shard; path += "/" + shard;
if (!zkClient.exists(path, true)) {
try { try {
Map<String,Long> initialTerms = new HashMap<>(); Map<String,Long> initialTerms = new HashMap<>();
zkClient.create(path, Utils.toJSON(initialTerms), CreateMode.PERSISTENT, true); zkClient.makePath(path, Utils.toJSON(initialTerms), CreateMode.PERSISTENT, true);
} catch (KeeperException.NodeExistsException e) { } catch (KeeperException.NodeExistsException e) {
// it's okay if another beats us creating the node // it's okay if another beats us creating the node
} }
}
} catch (InterruptedException e) { } catch (InterruptedException e) {
Thread.interrupted(); Thread.interrupted();
throw new SolrException(SolrException.ErrorCode.SERVER_ERROR, "Error creating shard term node in Zookeeper for collection: " + collection, e); throw new SolrException(SolrException.ErrorCode.SERVER_ERROR,
"Error creating shard term node in Zookeeper for collection: " + collection, e);
} catch (KeeperException e) { } catch (KeeperException e) {
throw new SolrException(SolrException.ErrorCode.SERVER_ERROR, "Error creating shard term node in Zookeeper for collection: " + collection, e); throw new SolrException(SolrException.ErrorCode.SERVER_ERROR,
"Error creating shard term node in Zookeeper for collection: " + collection, e);
} }
} }

View File

@ -245,7 +245,7 @@ public class AddReplicaCmd implements OverseerCollectionMessageHandler.Cmd {
props = props.plus(ZkStateReader.CORE_NODE_NAME_PROP, createReplica.coreNodeName); props = props.plus(ZkStateReader.CORE_NODE_NAME_PROP, createReplica.coreNodeName);
} }
try { try {
Overseer.getStateUpdateQueue(zkStateReader.getZkClient()).offer(Utils.toJSON(props)); ocmh.overseer.offerStateUpdate(Utils.toJSON(props));
} catch (Exception e) { } catch (Exception e) {
throw new SolrException(SolrException.ErrorCode.SERVER_ERROR, "Exception updating Overseer state queue", e); throw new SolrException(SolrException.ErrorCode.SERVER_ERROR, "Exception updating Overseer state queue", e);
} }
@ -328,6 +328,7 @@ public class AddReplicaCmd implements OverseerCollectionMessageHandler.Cmd {
} }
} }
} }
log.info("Returning CreateReplica command.");
return new CreateReplica(collection, shard, node, replicaType, coreName, coreNodeName); return new CreateReplica(collection, shard, node, replicaType, coreName, coreNodeName);
} }

View File

@ -115,7 +115,7 @@ public class Assign {
} catch (IOException | KeeperException e) { } catch (IOException | KeeperException e) {
throw new SolrException(SolrException.ErrorCode.SERVER_ERROR, "Error inc and get counter from Zookeeper for collection:"+collection, e); throw new SolrException(SolrException.ErrorCode.SERVER_ERROR, "Error inc and get counter from Zookeeper for collection:"+collection, e);
} catch (InterruptedException e) { } catch (InterruptedException e) {
Thread.interrupted(); Thread.currentThread().interrupt();
throw new SolrException(SolrException.ErrorCode.SERVER_ERROR, "Error inc and get counter from Zookeeper for collection:" + collection, e); throw new SolrException(SolrException.ErrorCode.SERVER_ERROR, "Error inc and get counter from Zookeeper for collection:" + collection, e);
} }
} }
@ -182,21 +182,34 @@ public class Assign {
return String.format(Locale.ROOT, "%s_%s_replica_%s%s", collectionName, shard, type.name().substring(0,1).toLowerCase(Locale.ROOT), replicaNum); return String.format(Locale.ROOT, "%s_%s_replica_%s%s", collectionName, shard, type.name().substring(0,1).toLowerCase(Locale.ROOT), replicaNum);
} }
private static int defaultCounterValue(DocCollection collection, boolean newCollection) { private static int defaultCounterValue(DocCollection collection, boolean newCollection, String shard) {
if (newCollection) return 0; if (newCollection) return 0;
int defaultValue = collection.getReplicas().size();
int defaultValue;
if (collection.getSlice(shard) != null && collection.getSlice(shard).getReplicas().isEmpty()) {
return 0;
} else {
defaultValue = collection.getReplicas().size() * 2;
}
if (collection.getReplicationFactor() != null) { if (collection.getReplicationFactor() != null) {
// numReplicas and replicationFactor * numSlices can be not equals, // numReplicas and replicationFactor * numSlices can be not equals,
// in case of many addReplicas or deleteReplicas are executed // in case of many addReplicas or deleteReplicas are executed
defaultValue = Math.max(defaultValue, defaultValue = Math.max(defaultValue,
collection.getReplicationFactor() * collection.getSlices().size()); collection.getReplicationFactor() * collection.getSlices().size());
} }
return defaultValue * 20; return defaultValue;
}
private static int defaultCounterValue(DocCollection collection, boolean newCollection) {
if (newCollection) return 0;
int defaultValue = collection.getReplicas().size();
return defaultValue;
} }
public static String buildSolrCoreName(DistribStateManager stateManager, DocCollection collection, String shard, Replica.Type type, boolean newCollection) { public static String buildSolrCoreName(DistribStateManager stateManager, DocCollection collection, String shard, Replica.Type type, boolean newCollection) {
Slice slice = collection.getSlice(shard); Slice slice = collection.getSlice(shard);
int defaultValue = defaultCounterValue(collection, newCollection); int defaultValue = defaultCounterValue(collection, newCollection, shard);
int replicaNum = incAndGetId(stateManager, collection.getName(), defaultValue); int replicaNum = incAndGetId(stateManager, collection.getName(), defaultValue);
String coreName = buildSolrCoreName(collection.getName(), shard, type, replicaNum); String coreName = buildSolrCoreName(collection.getName(), shard, type, replicaNum);
while (existCoreName(coreName, slice)) { while (existCoreName(coreName, slice)) {

View File

@ -160,7 +160,7 @@ public class BackupCmd implements OverseerCollectionMessageHandler.Cmd {
String backupName = request.getStr(NAME); String backupName = request.getStr(NAME);
String asyncId = request.getStr(ASYNC); String asyncId = request.getStr(ASYNC);
String repoName = request.getStr(CoreAdminParams.BACKUP_REPOSITORY); String repoName = request.getStr(CoreAdminParams.BACKUP_REPOSITORY);
ShardHandler shardHandler = ocmh.shardHandlerFactory.getShardHandler(); ShardHandler shardHandler = ocmh.shardHandlerFactory.getShardHandler(ocmh.overseer.getCoreContainer().getUpdateShardHandler().getDefaultHttpClient());
Map<String, String> requestMap = new HashMap<>(); Map<String, String> requestMap = new HashMap<>();
String commitName = request.getStr(CoreAdminParams.COMMIT_NAME); String commitName = request.getStr(CoreAdminParams.COMMIT_NAME);

View File

@ -156,7 +156,7 @@ public class CreateCollectionCmd implements OverseerCollectionMessageHandler.Cmd
createCollectionZkNode(stateManager, collectionName, collectionParams); createCollectionZkNode(stateManager, collectionName, collectionParams);
Overseer.getStateUpdateQueue(zkStateReader.getZkClient()).offer(Utils.toJSON(message)); ocmh.overseer.offerStateUpdate(Utils.toJSON(message));
// wait for a while until we see the collection // wait for a while until we see the collection
TimeOut waitUntil = new TimeOut(30, TimeUnit.SECONDS, timeSource); TimeOut waitUntil = new TimeOut(30, TimeUnit.SECONDS, timeSource);
@ -195,7 +195,7 @@ public class CreateCollectionCmd implements OverseerCollectionMessageHandler.Cmd
log.debug(formatString("Creating SolrCores for new collection {0}, shardNames {1} , message : {2}", log.debug(formatString("Creating SolrCores for new collection {0}, shardNames {1} , message : {2}",
collectionName, shardNames, message)); collectionName, shardNames, message));
Map<String,ShardRequest> coresToCreate = new LinkedHashMap<>(); Map<String,ShardRequest> coresToCreate = new LinkedHashMap<>();
ShardHandler shardHandler = ocmh.shardHandlerFactory.getShardHandler(); ShardHandler shardHandler = ocmh.shardHandlerFactory.getShardHandler(ocmh.overseer.getCoreContainer().getUpdateShardHandler().getDefaultHttpClient());
for (ReplicaPosition replicaPosition : replicaPositions) { for (ReplicaPosition replicaPosition : replicaPositions) {
String nodeName = replicaPosition.node; String nodeName = replicaPosition.node;
@ -235,7 +235,7 @@ public class CreateCollectionCmd implements OverseerCollectionMessageHandler.Cmd
ZkStateReader.BASE_URL_PROP, baseUrl, ZkStateReader.BASE_URL_PROP, baseUrl,
ZkStateReader.REPLICA_TYPE, replicaPosition.type.name(), ZkStateReader.REPLICA_TYPE, replicaPosition.type.name(),
CommonAdminParams.WAIT_FOR_FINAL_STATE, Boolean.toString(waitForFinalState)); CommonAdminParams.WAIT_FOR_FINAL_STATE, Boolean.toString(waitForFinalState));
Overseer.getStateUpdateQueue(zkStateReader.getZkClient()).offer(Utils.toJSON(props)); ocmh.overseer.offerStateUpdate(Utils.toJSON(props));
} }
// Need to create new params for each request // Need to create new params for each request
@ -308,7 +308,7 @@ public class CreateCollectionCmd implements OverseerCollectionMessageHandler.Cmd
Overseer.QUEUE_OPERATION, MODIFYCOLLECTION.toString(), Overseer.QUEUE_OPERATION, MODIFYCOLLECTION.toString(),
ZkStateReader.COLLECTION_PROP, withCollection, ZkStateReader.COLLECTION_PROP, withCollection,
CollectionAdminParams.COLOCATED_WITH, collectionName); CollectionAdminParams.COLOCATED_WITH, collectionName);
Overseer.getStateUpdateQueue(zkStateReader.getZkClient()).offer(Utils.toJSON(props)); ocmh.overseer.offerStateUpdate(Utils.toJSON(props));
try { try {
zkStateReader.waitForState(withCollection, 5, TimeUnit.SECONDS, (liveNodes, collectionState) -> collectionName.equals(collectionState.getStr(COLOCATED_WITH))); zkStateReader.waitForState(withCollection, 5, TimeUnit.SECONDS, (liveNodes, collectionState) -> collectionName.equals(collectionState.getStr(COLOCATED_WITH)));
} catch (TimeoutException e) { } catch (TimeoutException e) {

View File

@ -21,7 +21,6 @@ import java.lang.invoke.MethodHandles;
import java.util.HashMap; import java.util.HashMap;
import java.util.Map; import java.util.Map;
import org.apache.solr.cloud.Overseer;
import org.apache.solr.common.SolrException; import org.apache.solr.common.SolrException;
import org.apache.solr.common.cloud.ClusterState; import org.apache.solr.common.cloud.ClusterState;
import org.apache.solr.common.cloud.DocCollection; import org.apache.solr.common.cloud.DocCollection;
@ -71,7 +70,7 @@ public class CreateShardCmd implements OverseerCollectionMessageHandler.Cmd {
} }
ZkStateReader zkStateReader = ocmh.zkStateReader; ZkStateReader zkStateReader = ocmh.zkStateReader;
Overseer.getStateUpdateQueue(zkStateReader.getZkClient()).offer(Utils.toJSON(message)); ocmh.overseer.offerStateUpdate(Utils.toJSON(message));
// wait for a while until we see the shard // wait for a while until we see the shard
ocmh.waitForNewShard(collectionName, sliceName); ocmh.waitForNewShard(collectionName, sliceName);
String async = message.getStr(ASYNC); String async = message.getStr(ASYNC);

View File

@ -84,7 +84,7 @@ public class CreateSnapshotCmd implements OverseerCollectionMessageHandler.Cmd {
Map<String, String> requestMap = new HashMap<>(); Map<String, String> requestMap = new HashMap<>();
NamedList shardRequestResults = new NamedList(); NamedList shardRequestResults = new NamedList();
Map<String, Slice> shardByCoreName = new HashMap<>(); Map<String, Slice> shardByCoreName = new HashMap<>();
ShardHandler shardHandler = ocmh.shardHandlerFactory.getShardHandler(); ShardHandler shardHandler = ocmh.shardHandlerFactory.getShardHandler(ocmh.overseer.getCoreContainer().getUpdateShardHandler().getDefaultHttpClient());
for (Slice slice : ocmh.zkStateReader.getClusterState().getCollection(collectionName).getSlices()) { for (Slice slice : ocmh.zkStateReader.getClusterState().getCollection(collectionName).getSlices()) {
for (Replica replica : slice.getReplicas()) { for (Replica replica : slice.getReplicas()) {

View File

@ -46,7 +46,6 @@ import org.apache.solr.core.SolrInfoBean;
import org.apache.solr.core.snapshots.SolrSnapshotManager; import org.apache.solr.core.snapshots.SolrSnapshotManager;
import org.apache.solr.handler.admin.MetricsHistoryHandler; import org.apache.solr.handler.admin.MetricsHistoryHandler;
import org.apache.solr.metrics.SolrMetricManager; import org.apache.solr.metrics.SolrMetricManager;
import org.apache.solr.util.TimeOut;
import org.apache.zookeeper.KeeperException; import org.apache.zookeeper.KeeperException;
import org.slf4j.Logger; import org.slf4j.Logger;
import org.slf4j.LoggerFactory; import org.slf4j.LoggerFactory;
@ -127,24 +126,26 @@ public class DeleteCollectionCmd implements OverseerCollectionMessageHandler.Cmd
} }
ZkNodeProps m = new ZkNodeProps(Overseer.QUEUE_OPERATION, DELETE.toLower(), NAME, collection); ZkNodeProps m = new ZkNodeProps(Overseer.QUEUE_OPERATION, DELETE.toLower(), NAME, collection);
Overseer.getStateUpdateQueue(zkStateReader.getZkClient()).offer(Utils.toJSON(m)); ocmh.overseer.offerStateUpdate(Utils.toJSON(m));
// wait for a while until we don't see the collection // wait for a while until we don't see the collection
TimeOut timeout = new TimeOut(30, TimeUnit.SECONDS, timeSource); zkStateReader.waitForState(collection, 60, TimeUnit.SECONDS, (liveNodes, collectionState) -> collectionState == null);
boolean removed = false;
while (! timeout.hasTimedOut()) { // TimeOut timeout = new TimeOut(60, TimeUnit.SECONDS, timeSource);
timeout.sleep(100); // boolean removed = false;
removed = !zkStateReader.getClusterState().hasCollection(collection); // while (! timeout.hasTimedOut()) {
if (removed) { // timeout.sleep(100);
timeout.sleep(500); // just a bit of time so it's more likely other // removed = !zkStateReader.getClusterState().hasCollection(collection);
// readers see on return // if (removed) {
break; // timeout.sleep(500); // just a bit of time so it's more likely other
} // // readers see on return
} // break;
if (!removed) { // }
throw new SolrException(SolrException.ErrorCode.SERVER_ERROR, // }
"Could not fully remove collection: " + collection); // if (!removed) {
} // throw new SolrException(SolrException.ErrorCode.SERVER_ERROR,
// "Could not fully remove collection: " + collection);
// }
} finally { } finally {
try { try {

View File

@ -218,7 +218,7 @@ public class DeleteReplicaCmd implements Cmd {
" with onlyIfDown='true', but state is '" + replica.getStr(ZkStateReader.STATE_PROP) + "'"); " with onlyIfDown='true', but state is '" + replica.getStr(ZkStateReader.STATE_PROP) + "'");
} }
ShardHandler shardHandler = ocmh.shardHandlerFactory.getShardHandler(); ShardHandler shardHandler = ocmh.shardHandlerFactory.getShardHandler(ocmh.overseer.getCoreContainer().getUpdateShardHandler().getDefaultHttpClient());
String core = replica.getStr(ZkStateReader.CORE_NAME_PROP); String core = replica.getStr(ZkStateReader.CORE_NAME_PROP);
String asyncId = message.getStr(ASYNC); String asyncId = message.getStr(ASYNC);
AtomicReference<Map<String, String>> requestMap = new AtomicReference<>(null); AtomicReference<Map<String, String>> requestMap = new AtomicReference<>(null);
@ -246,7 +246,7 @@ public class DeleteReplicaCmd implements Cmd {
ocmh.processResponses(results, shardHandler, false, null, asyncId, requestMap.get()); ocmh.processResponses(results, shardHandler, false, null, asyncId, requestMap.get());
//check if the core unload removed the corenode zk entry //check if the core unload removed the corenode zk entry
if (ocmh.waitForCoreNodeGone(collectionName, shard, replicaName, 5000)) return Boolean.TRUE; if (ocmh.waitForCoreNodeGone(collectionName, shard, replicaName, 30000)) return Boolean.TRUE;
} }
// try and ensure core info is removed from cluster state // try and ensure core info is removed from cluster state

View File

@ -17,6 +17,13 @@
*/ */
package org.apache.solr.cloud.api.collections; package org.apache.solr.cloud.api.collections;
import static org.apache.solr.common.cloud.ZkStateReader.COLLECTION_PROP;
import static org.apache.solr.common.cloud.ZkStateReader.NODE_NAME_PROP;
import static org.apache.solr.common.cloud.ZkStateReader.SHARD_ID_PROP;
import static org.apache.solr.common.params.CollectionParams.CollectionAction.DELETEREPLICA;
import static org.apache.solr.common.params.CollectionParams.CollectionAction.DELETESHARD;
import static org.apache.solr.common.params.CommonAdminParams.ASYNC;
import java.lang.invoke.MethodHandles; import java.lang.invoke.MethodHandles;
import java.util.ArrayList; import java.util.ArrayList;
import java.util.HashMap; import java.util.HashMap;
@ -26,12 +33,10 @@ import java.util.Map;
import java.util.concurrent.CountDownLatch; import java.util.concurrent.CountDownLatch;
import java.util.concurrent.TimeUnit; import java.util.concurrent.TimeUnit;
import org.apache.solr.client.solrj.cloud.DistributedQueue;
import org.apache.solr.cloud.Overseer; import org.apache.solr.cloud.Overseer;
import org.apache.solr.cloud.overseer.OverseerAction; import org.apache.solr.cloud.overseer.OverseerAction;
import org.apache.solr.common.SolrException; import org.apache.solr.common.SolrException;
import org.apache.solr.common.cloud.ClusterState; import org.apache.solr.common.cloud.ClusterState;
import org.apache.solr.common.cloud.DocCollection;
import org.apache.solr.common.cloud.Replica; import org.apache.solr.common.cloud.Replica;
import org.apache.solr.common.cloud.Slice; import org.apache.solr.common.cloud.Slice;
import org.apache.solr.common.cloud.ZkNodeProps; import org.apache.solr.common.cloud.ZkNodeProps;
@ -41,18 +46,10 @@ import org.apache.solr.common.util.NamedList;
import org.apache.solr.common.util.SimpleOrderedMap; import org.apache.solr.common.util.SimpleOrderedMap;
import org.apache.solr.common.util.TimeSource; import org.apache.solr.common.util.TimeSource;
import org.apache.solr.common.util.Utils; import org.apache.solr.common.util.Utils;
import org.apache.solr.util.TimeOut;
import org.apache.zookeeper.KeeperException; import org.apache.zookeeper.KeeperException;
import org.slf4j.Logger; import org.slf4j.Logger;
import org.slf4j.LoggerFactory; import org.slf4j.LoggerFactory;
import static org.apache.solr.common.cloud.ZkStateReader.COLLECTION_PROP;
import static org.apache.solr.common.cloud.ZkStateReader.NODE_NAME_PROP;
import static org.apache.solr.common.cloud.ZkStateReader.SHARD_ID_PROP;
import static org.apache.solr.common.params.CollectionParams.CollectionAction.DELETEREPLICA;
import static org.apache.solr.common.params.CollectionParams.CollectionAction.DELETESHARD;
import static org.apache.solr.common.params.CommonAdminParams.ASYNC;
public class DeleteShardCmd implements OverseerCollectionMessageHandler.Cmd { public class DeleteShardCmd implements OverseerCollectionMessageHandler.Cmd {
private static final Logger log = LoggerFactory.getLogger(MethodHandles.lookup().lookupClass()); private static final Logger log = LoggerFactory.getLogger(MethodHandles.lookup().lookupClass());
private final OverseerCollectionMessageHandler ocmh; private final OverseerCollectionMessageHandler ocmh;
@ -85,13 +82,12 @@ public class DeleteShardCmd implements OverseerCollectionMessageHandler.Cmd {
if (state == Slice.State.RECOVERY) { if (state == Slice.State.RECOVERY) {
// mark the slice as 'construction' and only then try to delete the cores // mark the slice as 'construction' and only then try to delete the cores
// see SOLR-9455 // see SOLR-9455
DistributedQueue inQueue = Overseer.getStateUpdateQueue(ocmh.zkStateReader.getZkClient());
Map<String, Object> propMap = new HashMap<>(); Map<String, Object> propMap = new HashMap<>();
propMap.put(Overseer.QUEUE_OPERATION, OverseerAction.UPDATESHARDSTATE.toLower()); propMap.put(Overseer.QUEUE_OPERATION, OverseerAction.UPDATESHARDSTATE.toLower());
propMap.put(sliceId, Slice.State.CONSTRUCTION.toString()); propMap.put(sliceId, Slice.State.CONSTRUCTION.toString());
propMap.put(ZkStateReader.COLLECTION_PROP, collectionName); propMap.put(ZkStateReader.COLLECTION_PROP, collectionName);
ZkNodeProps m = new ZkNodeProps(propMap); ZkNodeProps m = new ZkNodeProps(propMap);
inQueue.offer(Utils.toJSON(m)); ocmh.overseer.offerStateUpdate(Utils.toJSON(m));
} }
String asyncId = message.getStr(ASYNC); String asyncId = message.getStr(ASYNC);
@ -129,29 +125,14 @@ public class DeleteShardCmd implements OverseerCollectionMessageHandler.Cmd {
} }
} }
log.debug("Waiting for delete shard action to complete"); log.debug("Waiting for delete shard action to complete");
cleanupLatch.await(5, TimeUnit.MINUTES); cleanupLatch.await(1, TimeUnit.MINUTES);
ZkNodeProps m = new ZkNodeProps(Overseer.QUEUE_OPERATION, DELETESHARD.toLower(), ZkStateReader.COLLECTION_PROP, ZkNodeProps m = new ZkNodeProps(Overseer.QUEUE_OPERATION, DELETESHARD.toLower(), ZkStateReader.COLLECTION_PROP,
collectionName, ZkStateReader.SHARD_ID_PROP, sliceId); collectionName, ZkStateReader.SHARD_ID_PROP, sliceId);
ZkStateReader zkStateReader = ocmh.zkStateReader; ZkStateReader zkStateReader = ocmh.zkStateReader;
Overseer.getStateUpdateQueue(zkStateReader.getZkClient()).offer(Utils.toJSON(m)); ocmh.overseer.offerStateUpdate(Utils.toJSON(m));
// wait for a while until we don't see the shard zkStateReader.waitForState(collectionName, 45, TimeUnit.SECONDS, (l, c) -> c.getSlice(sliceId) == null);
TimeOut timeout = new TimeOut(30, TimeUnit.SECONDS, timeSource);
boolean removed = false;
while (!timeout.hasTimedOut()) {
timeout.sleep(100);
DocCollection collection = zkStateReader.getClusterState().getCollection(collectionName);
removed = collection.getSlice(sliceId) == null;
if (removed) {
timeout.sleep(100); // just a bit of time so it's more likely other readers see on return
break;
}
}
if (!removed) {
throw new SolrException(SolrException.ErrorCode.SERVER_ERROR,
"Could not fully remove collection: " + collectionName + " shard: " + sliceId);
}
log.info("Successfully deleted collection: " + collectionName + ", shard: " + sliceId); log.info("Successfully deleted collection: " + collectionName + ", shard: " + sliceId);
} catch (SolrException e) { } catch (SolrException e) {

View File

@ -69,7 +69,7 @@ public class DeleteSnapshotCmd implements OverseerCollectionMessageHandler.Cmd {
String asyncId = message.getStr(ASYNC); String asyncId = message.getStr(ASYNC);
Map<String, String> requestMap = new HashMap<>(); Map<String, String> requestMap = new HashMap<>();
NamedList shardRequestResults = new NamedList(); NamedList shardRequestResults = new NamedList();
ShardHandler shardHandler = ocmh.shardHandlerFactory.getShardHandler(); ShardHandler shardHandler = ocmh.shardHandlerFactory.getShardHandler(ocmh.overseer.getCoreContainer().getUpdateShardHandler().getDefaultHttpClient());
SolrZkClient zkClient = ocmh.zkStateReader.getZkClient(); SolrZkClient zkClient = ocmh.zkStateReader.getZkClient();
Optional<CollectionSnapshotMetaData> meta = SolrSnapshotManager.getCollectionLevelSnapshot(zkClient, collectionName, commitName); Optional<CollectionSnapshotMetaData> meta = SolrSnapshotManager.getCollectionLevelSnapshot(zkClient, collectionName, commitName);

View File

@ -42,6 +42,7 @@ import org.apache.solr.common.params.ModifiableSolrParams;
import org.apache.solr.common.util.NamedList; import org.apache.solr.common.util.NamedList;
import org.apache.solr.common.util.TimeSource; import org.apache.solr.common.util.TimeSource;
import org.apache.solr.common.util.Utils; import org.apache.solr.common.util.Utils;
import org.apache.solr.handler.component.HttpShardHandlerFactory;
import org.apache.solr.handler.component.ShardHandler; import org.apache.solr.handler.component.ShardHandler;
import org.apache.solr.handler.component.ShardHandlerFactory; import org.apache.solr.handler.component.ShardHandlerFactory;
import org.apache.solr.update.SolrIndexSplitter; import org.apache.solr.update.SolrIndexSplitter;
@ -146,7 +147,7 @@ public class MigrateCmd implements OverseerCollectionMessageHandler.Cmd {
DocRouter.Range keyHashRange = sourceRouter.keyHashRange(splitKey); DocRouter.Range keyHashRange = sourceRouter.keyHashRange(splitKey);
ShardHandlerFactory shardHandlerFactory = ocmh.shardHandlerFactory; ShardHandlerFactory shardHandlerFactory = ocmh.shardHandlerFactory;
ShardHandler shardHandler = shardHandlerFactory.getShardHandler(); ShardHandler shardHandler = ((HttpShardHandlerFactory)shardHandlerFactory).getShardHandler(ocmh.overseer.getCoreContainer().getUpdateShardHandler().getDefaultHttpClient());
log.info("Hash range for split.key: {} is: {}", splitKey, keyHashRange); log.info("Hash range for split.key: {} is: {}", splitKey, keyHashRange);
// intersect source range, keyHashRange and target range // intersect source range, keyHashRange and target range
@ -181,7 +182,7 @@ public class MigrateCmd implements OverseerCollectionMessageHandler.Cmd {
"targetCollection", targetCollection.getName(), "targetCollection", targetCollection.getName(),
"expireAt", RoutingRule.makeExpiryAt(timeout)); "expireAt", RoutingRule.makeExpiryAt(timeout));
log.info("Adding routing rule: " + m); log.info("Adding routing rule: " + m);
Overseer.getStateUpdateQueue(zkStateReader.getZkClient()).offer(Utils.toJSON(m)); ocmh.overseer.offerStateUpdate(Utils.toJSON(m));
// wait for a while until we see the new rule // wait for a while until we see the new rule
log.info("Waiting to see routing rule updated in clusterstate"); log.info("Waiting to see routing rule updated in clusterstate");

View File

@ -16,6 +16,58 @@
*/ */
package org.apache.solr.cloud.api.collections; package org.apache.solr.cloud.api.collections;
import static org.apache.solr.client.solrj.cloud.autoscaling.Policy.POLICY;
import static org.apache.solr.common.cloud.DocCollection.SNITCH;
import static org.apache.solr.common.cloud.ZkStateReader.BASE_URL_PROP;
import static org.apache.solr.common.cloud.ZkStateReader.COLLECTION_PROP;
import static org.apache.solr.common.cloud.ZkStateReader.CORE_NAME_PROP;
import static org.apache.solr.common.cloud.ZkStateReader.CORE_NODE_NAME_PROP;
import static org.apache.solr.common.cloud.ZkStateReader.ELECTION_NODE_PROP;
import static org.apache.solr.common.cloud.ZkStateReader.PROPERTY_PROP;
import static org.apache.solr.common.cloud.ZkStateReader.PROPERTY_VALUE_PROP;
import static org.apache.solr.common.cloud.ZkStateReader.REJOIN_AT_HEAD_PROP;
import static org.apache.solr.common.cloud.ZkStateReader.REPLICA_PROP;
import static org.apache.solr.common.cloud.ZkStateReader.SHARD_ID_PROP;
import static org.apache.solr.common.params.CollectionAdminParams.COLLECTION;
import static org.apache.solr.common.params.CollectionAdminParams.COLOCATED_WITH;
import static org.apache.solr.common.params.CollectionAdminParams.WITH_COLLECTION;
import static org.apache.solr.common.params.CollectionParams.CollectionAction.ADDREPLICA;
import static org.apache.solr.common.params.CollectionParams.CollectionAction.ADDREPLICAPROP;
import static org.apache.solr.common.params.CollectionParams.CollectionAction.ADDROLE;
import static org.apache.solr.common.params.CollectionParams.CollectionAction.ALIASPROP;
import static org.apache.solr.common.params.CollectionParams.CollectionAction.BACKUP;
import static org.apache.solr.common.params.CollectionParams.CollectionAction.BALANCESHARDUNIQUE;
import static org.apache.solr.common.params.CollectionParams.CollectionAction.CREATE;
import static org.apache.solr.common.params.CollectionParams.CollectionAction.CREATEALIAS;
import static org.apache.solr.common.params.CollectionParams.CollectionAction.CREATESHARD;
import static org.apache.solr.common.params.CollectionParams.CollectionAction.CREATESNAPSHOT;
import static org.apache.solr.common.params.CollectionParams.CollectionAction.DELETE;
import static org.apache.solr.common.params.CollectionParams.CollectionAction.DELETEALIAS;
import static org.apache.solr.common.params.CollectionParams.CollectionAction.DELETENODE;
import static org.apache.solr.common.params.CollectionParams.CollectionAction.DELETEREPLICA;
import static org.apache.solr.common.params.CollectionParams.CollectionAction.DELETEREPLICAPROP;
import static org.apache.solr.common.params.CollectionParams.CollectionAction.DELETESHARD;
import static org.apache.solr.common.params.CollectionParams.CollectionAction.DELETESNAPSHOT;
import static org.apache.solr.common.params.CollectionParams.CollectionAction.MAINTAINROUTEDALIAS;
import static org.apache.solr.common.params.CollectionParams.CollectionAction.MIGRATE;
import static org.apache.solr.common.params.CollectionParams.CollectionAction.MIGRATESTATEFORMAT;
import static org.apache.solr.common.params.CollectionParams.CollectionAction.MOCK_COLL_TASK;
import static org.apache.solr.common.params.CollectionParams.CollectionAction.MOCK_REPLICA_TASK;
import static org.apache.solr.common.params.CollectionParams.CollectionAction.MOCK_SHARD_TASK;
import static org.apache.solr.common.params.CollectionParams.CollectionAction.MODIFYCOLLECTION;
import static org.apache.solr.common.params.CollectionParams.CollectionAction.MOVEREPLICA;
import static org.apache.solr.common.params.CollectionParams.CollectionAction.OVERSEERSTATUS;
import static org.apache.solr.common.params.CollectionParams.CollectionAction.REBALANCELEADERS;
import static org.apache.solr.common.params.CollectionParams.CollectionAction.RELOAD;
import static org.apache.solr.common.params.CollectionParams.CollectionAction.REMOVEROLE;
import static org.apache.solr.common.params.CollectionParams.CollectionAction.REPLACENODE;
import static org.apache.solr.common.params.CollectionParams.CollectionAction.RESTORE;
import static org.apache.solr.common.params.CollectionParams.CollectionAction.SPLITSHARD;
import static org.apache.solr.common.params.CollectionParams.CollectionAction.UTILIZENODE;
import static org.apache.solr.common.params.CommonAdminParams.ASYNC;
import static org.apache.solr.common.params.CommonParams.NAME;
import static org.apache.solr.common.util.Utils.makeMap;
import java.io.IOException; import java.io.IOException;
import java.lang.invoke.MethodHandles; import java.lang.invoke.MethodHandles;
import java.util.ArrayList; import java.util.ArrayList;
@ -30,13 +82,12 @@ import java.util.Set;
import java.util.concurrent.ExecutorService; import java.util.concurrent.ExecutorService;
import java.util.concurrent.SynchronousQueue; import java.util.concurrent.SynchronousQueue;
import java.util.concurrent.TimeUnit; import java.util.concurrent.TimeUnit;
import java.util.concurrent.TimeoutException;
import com.google.common.collect.ImmutableMap;
import org.apache.commons.lang.StringUtils; import org.apache.commons.lang.StringUtils;
import org.apache.solr.client.solrj.SolrResponse; import org.apache.solr.client.solrj.SolrResponse;
import org.apache.solr.client.solrj.SolrServerException; import org.apache.solr.client.solrj.SolrServerException;
import org.apache.solr.client.solrj.cloud.DistribStateManager; import org.apache.solr.client.solrj.cloud.DistribStateManager;
import org.apache.solr.client.solrj.cloud.DistributedQueue;
import org.apache.solr.client.solrj.cloud.SolrCloudManager; import org.apache.solr.client.solrj.cloud.SolrCloudManager;
import org.apache.solr.client.solrj.cloud.autoscaling.AlreadyExistsException; import org.apache.solr.client.solrj.cloud.autoscaling.AlreadyExistsException;
import org.apache.solr.client.solrj.cloud.autoscaling.BadVersionException; import org.apache.solr.client.solrj.cloud.autoscaling.BadVersionException;
@ -79,8 +130,8 @@ import org.apache.solr.common.util.StrUtils;
import org.apache.solr.common.util.SuppressForbidden; import org.apache.solr.common.util.SuppressForbidden;
import org.apache.solr.common.util.TimeSource; import org.apache.solr.common.util.TimeSource;
import org.apache.solr.common.util.Utils; import org.apache.solr.common.util.Utils;
import org.apache.solr.handler.component.HttpShardHandlerFactory;
import org.apache.solr.handler.component.ShardHandler; import org.apache.solr.handler.component.ShardHandler;
import org.apache.solr.handler.component.ShardHandlerFactory;
import org.apache.solr.handler.component.ShardRequest; import org.apache.solr.handler.component.ShardRequest;
import org.apache.solr.handler.component.ShardResponse; import org.apache.solr.handler.component.ShardResponse;
import org.apache.solr.logging.MDCLoggingContext; import org.apache.solr.logging.MDCLoggingContext;
@ -92,25 +143,7 @@ import org.apache.zookeeper.KeeperException;
import org.slf4j.Logger; import org.slf4j.Logger;
import org.slf4j.LoggerFactory; import org.slf4j.LoggerFactory;
import static org.apache.solr.client.solrj.cloud.autoscaling.Policy.POLICY; import com.google.common.collect.ImmutableMap;
import static org.apache.solr.common.cloud.DocCollection.SNITCH;
import static org.apache.solr.common.cloud.ZkStateReader.BASE_URL_PROP;
import static org.apache.solr.common.cloud.ZkStateReader.COLLECTION_PROP;
import static org.apache.solr.common.cloud.ZkStateReader.CORE_NAME_PROP;
import static org.apache.solr.common.cloud.ZkStateReader.CORE_NODE_NAME_PROP;
import static org.apache.solr.common.cloud.ZkStateReader.ELECTION_NODE_PROP;
import static org.apache.solr.common.cloud.ZkStateReader.PROPERTY_PROP;
import static org.apache.solr.common.cloud.ZkStateReader.PROPERTY_VALUE_PROP;
import static org.apache.solr.common.cloud.ZkStateReader.REJOIN_AT_HEAD_PROP;
import static org.apache.solr.common.cloud.ZkStateReader.REPLICA_PROP;
import static org.apache.solr.common.cloud.ZkStateReader.SHARD_ID_PROP;
import static org.apache.solr.common.params.CollectionAdminParams.COLLECTION;
import static org.apache.solr.common.params.CollectionAdminParams.COLOCATED_WITH;
import static org.apache.solr.common.params.CollectionAdminParams.WITH_COLLECTION;
import static org.apache.solr.common.params.CollectionParams.CollectionAction.*;
import static org.apache.solr.common.params.CommonAdminParams.ASYNC;
import static org.apache.solr.common.params.CommonParams.NAME;
import static org.apache.solr.common.util.Utils.makeMap;
/** /**
* A {@link OverseerMessageHandler} that handles Collections API related * A {@link OverseerMessageHandler} that handles Collections API related
@ -158,7 +191,7 @@ public class OverseerCollectionMessageHandler implements OverseerMessageHandler,
private static final Logger log = LoggerFactory.getLogger(MethodHandles.lookup().lookupClass()); private static final Logger log = LoggerFactory.getLogger(MethodHandles.lookup().lookupClass());
Overseer overseer; Overseer overseer;
ShardHandlerFactory shardHandlerFactory; HttpShardHandlerFactory shardHandlerFactory;
String adminPath; String adminPath;
ZkStateReader zkStateReader; ZkStateReader zkStateReader;
SolrCloudManager cloudManager; SolrCloudManager cloudManager;
@ -191,7 +224,7 @@ public class OverseerCollectionMessageHandler implements OverseerMessageHandler,
private volatile boolean isClosed; private volatile boolean isClosed;
public OverseerCollectionMessageHandler(ZkStateReader zkStateReader, String myId, public OverseerCollectionMessageHandler(ZkStateReader zkStateReader, String myId,
final ShardHandlerFactory shardHandlerFactory, final HttpShardHandlerFactory shardHandlerFactory,
String adminPath, String adminPath,
Stats stats, Stats stats,
Overseer overseer, Overseer overseer,
@ -334,7 +367,7 @@ public class OverseerCollectionMessageHandler implements OverseerMessageHandler,
sreq.shards = new String[] {baseUrl}; sreq.shards = new String[] {baseUrl};
sreq.actualShards = sreq.shards; sreq.actualShards = sreq.shards;
sreq.params = params; sreq.params = params;
ShardHandler shardHandler = shardHandlerFactory.getShardHandler(); ShardHandler shardHandler = shardHandlerFactory.getShardHandler(overseer.getCoreContainer().getUpdateShardHandler().getDefaultHttpClient());
shardHandler.submit(sreq, baseUrl, sreq.params); shardHandler.submit(sreq, baseUrl, sreq.params);
} }
@ -343,24 +376,22 @@ public class OverseerCollectionMessageHandler implements OverseerMessageHandler,
throws Exception { throws Exception {
checkRequired(message, COLLECTION_PROP, SHARD_ID_PROP, REPLICA_PROP, PROPERTY_PROP, PROPERTY_VALUE_PROP); checkRequired(message, COLLECTION_PROP, SHARD_ID_PROP, REPLICA_PROP, PROPERTY_PROP, PROPERTY_VALUE_PROP);
SolrZkClient zkClient = zkStateReader.getZkClient(); SolrZkClient zkClient = zkStateReader.getZkClient();
DistributedQueue inQueue = Overseer.getStateUpdateQueue(zkClient);
Map<String, Object> propMap = new HashMap<>(); Map<String, Object> propMap = new HashMap<>();
propMap.put(Overseer.QUEUE_OPERATION, ADDREPLICAPROP.toLower()); propMap.put(Overseer.QUEUE_OPERATION, ADDREPLICAPROP.toLower());
propMap.putAll(message.getProperties()); propMap.putAll(message.getProperties());
ZkNodeProps m = new ZkNodeProps(propMap); ZkNodeProps m = new ZkNodeProps(propMap);
inQueue.offer(Utils.toJSON(m)); overseer.offerStateUpdate(Utils.toJSON(m));
} }
private void processReplicaDeletePropertyCommand(ClusterState clusterState, ZkNodeProps message, NamedList results) private void processReplicaDeletePropertyCommand(ClusterState clusterState, ZkNodeProps message, NamedList results)
throws Exception { throws Exception {
checkRequired(message, COLLECTION_PROP, SHARD_ID_PROP, REPLICA_PROP, PROPERTY_PROP); checkRequired(message, COLLECTION_PROP, SHARD_ID_PROP, REPLICA_PROP, PROPERTY_PROP);
SolrZkClient zkClient = zkStateReader.getZkClient(); SolrZkClient zkClient = zkStateReader.getZkClient();
DistributedQueue inQueue = Overseer.getStateUpdateQueue(zkClient);
Map<String, Object> propMap = new HashMap<>(); Map<String, Object> propMap = new HashMap<>();
propMap.put(Overseer.QUEUE_OPERATION, DELETEREPLICAPROP.toLower()); propMap.put(Overseer.QUEUE_OPERATION, DELETEREPLICAPROP.toLower());
propMap.putAll(message.getProperties()); propMap.putAll(message.getProperties());
ZkNodeProps m = new ZkNodeProps(propMap); ZkNodeProps m = new ZkNodeProps(propMap);
inQueue.offer(Utils.toJSON(m)); overseer.offerStateUpdate(Utils.toJSON(m));
} }
private void balanceProperty(ClusterState clusterState, ZkNodeProps message, NamedList results) throws Exception { private void balanceProperty(ClusterState clusterState, ZkNodeProps message, NamedList results) throws Exception {
@ -370,11 +401,10 @@ public class OverseerCollectionMessageHandler implements OverseerMessageHandler,
"' parameters are required for the BALANCESHARDUNIQUE operation, no action taken"); "' parameters are required for the BALANCESHARDUNIQUE operation, no action taken");
} }
SolrZkClient zkClient = zkStateReader.getZkClient(); SolrZkClient zkClient = zkStateReader.getZkClient();
DistributedQueue inQueue = Overseer.getStateUpdateQueue(zkClient); Map<String, Object> m = new HashMap<>();
Map<String, Object> propMap = new HashMap<>(); m.put(Overseer.QUEUE_OPERATION, BALANCESHARDUNIQUE.toLower());
propMap.put(Overseer.QUEUE_OPERATION, BALANCESHARDUNIQUE.toLower()); m.putAll(message.getProperties());
propMap.putAll(message.getProperties()); overseer.offerStateUpdate(Utils.toJSON(m));
inQueue.offer(Utils.toJSON(new ZkNodeProps(propMap)));
} }
/** /**
@ -417,20 +447,21 @@ public class OverseerCollectionMessageHandler implements OverseerMessageHandler,
} }
boolean waitForCoreNodeGone(String collectionName, String shard, String replicaName, int timeoutms) throws InterruptedException { boolean waitForCoreNodeGone(String collectionName, String shard, String replicaName, int timeoutms) throws InterruptedException {
TimeOut timeout = new TimeOut(timeoutms, TimeUnit.MILLISECONDS, timeSource); try {
while (! timeout.hasTimedOut()) { zkStateReader.waitForState(collectionName, timeoutms, TimeUnit.MILLISECONDS, (n, c) -> {
timeout.sleep(100); if (c == null)
DocCollection docCollection = zkStateReader.getClusterState().getCollection(collectionName);
if (docCollection == null) { // someone already deleted the collection
return true; return true;
} Slice slice = c.getSlice(shard);
Slice slice = docCollection.getSlice(shard);
if(slice == null || slice.getReplica(replicaName) == null) { if(slice == null || slice.getReplica(replicaName) == null) {
return true; return true;
} }
}
// replica still exists after the timeout
return false; return false;
});
} catch (TimeoutException e) {
return false;
}
return true;
} }
void deleteCoreNode(String collectionName, String replicaName, Replica replica, String core) throws Exception { void deleteCoreNode(String collectionName, String replicaName, Replica replica, String core) throws Exception {
@ -441,7 +472,7 @@ public class OverseerCollectionMessageHandler implements OverseerMessageHandler,
ZkStateReader.COLLECTION_PROP, collectionName, ZkStateReader.COLLECTION_PROP, collectionName,
ZkStateReader.CORE_NODE_NAME_PROP, replicaName, ZkStateReader.CORE_NODE_NAME_PROP, replicaName,
ZkStateReader.BASE_URL_PROP, replica.getStr(ZkStateReader.BASE_URL_PROP)); ZkStateReader.BASE_URL_PROP, replica.getStr(ZkStateReader.BASE_URL_PROP));
Overseer.getStateUpdateQueue(zkStateReader.getZkClient()).offer(Utils.toJSON(m)); overseer.offerStateUpdate(Utils.toJSON(m));
} }
void checkRequired(ZkNodeProps message, String... props) { void checkRequired(ZkNodeProps message, String... props) {
@ -475,7 +506,7 @@ public class OverseerCollectionMessageHandler implements OverseerMessageHandler,
// Actually queue the migration command. // Actually queue the migration command.
firstLoop = false; firstLoop = false;
ZkNodeProps m = new ZkNodeProps(Overseer.QUEUE_OPERATION, MIGRATESTATEFORMAT.toLower(), COLLECTION_PROP, collectionName); ZkNodeProps m = new ZkNodeProps(Overseer.QUEUE_OPERATION, MIGRATESTATEFORMAT.toLower(), COLLECTION_PROP, collectionName);
Overseer.getStateUpdateQueue(zkStateReader.getZkClient()).offer(Utils.toJSON(m)); overseer.offerStateUpdate(Utils.toJSON(m));
} }
timeout.sleep(100); timeout.sleep(100);
} }
@ -584,7 +615,7 @@ public class OverseerCollectionMessageHandler implements OverseerMessageHandler,
} }
public static void sendShardRequest(String nodeName, ModifiableSolrParams params, ShardHandler shardHandler, public void sendShardRequest(String nodeName, ModifiableSolrParams params, ShardHandler shardHandler,
String asyncId, Map<String, String> requestMap, String adminPath, String asyncId, Map<String, String> requestMap, String adminPath,
ZkStateReader zkStateReader) { ZkStateReader zkStateReader) {
if (asyncId != null) { if (asyncId != null) {
@ -640,7 +671,7 @@ public class OverseerCollectionMessageHandler implements OverseerMessageHandler,
reloadCollection(null, new ZkNodeProps(NAME, collectionName), results); reloadCollection(null, new ZkNodeProps(NAME, collectionName), results);
} }
Overseer.getStateUpdateQueue(zkStateReader.getZkClient()).offer(Utils.toJSON(message)); overseer.offerStateUpdate(Utils.toJSON(message));
TimeOut timeout = new TimeOut(30, TimeUnit.SECONDS, timeSource); TimeOut timeout = new TimeOut(30, TimeUnit.SECONDS, timeSource);
boolean areChangesVisible = true; boolean areChangesVisible = true;
@ -680,8 +711,9 @@ public class OverseerCollectionMessageHandler implements OverseerMessageHandler,
} }
Map<String, Replica> waitToSeeReplicasInState(String collectionName, Collection<String> coreNames) throws InterruptedException { Map<String, Replica> waitToSeeReplicasInState(String collectionName, Collection<String> coreNames) throws InterruptedException {
assert coreNames.size() > 0;
Map<String, Replica> result = new HashMap<>(); Map<String, Replica> result = new HashMap<>();
TimeOut timeout = new TimeOut(30, TimeUnit.SECONDS, timeSource); TimeOut timeout = new TimeOut(Integer.getInteger("solr.waitToSeeReplicasInStateTimeoutSeconds", 120), TimeUnit.SECONDS, timeSource); // could be a big cluster
while (true) { while (true) {
DocCollection coll = zkStateReader.getClusterState().getCollection(collectionName); DocCollection coll = zkStateReader.getClusterState().getCollection(collectionName);
for (String coreName : coreNames) { for (String coreName : coreNames) {
@ -791,7 +823,7 @@ public class OverseerCollectionMessageHandler implements OverseerMessageHandler,
NamedList results, Replica.State stateMatcher, String asyncId, Map<String, String> requestMap, Set<String> okayExceptions) { NamedList results, Replica.State stateMatcher, String asyncId, Map<String, String> requestMap, Set<String> okayExceptions) {
log.info("Executing Collection Cmd={}, asyncId={}", params, asyncId); log.info("Executing Collection Cmd={}, asyncId={}", params, asyncId);
String collectionName = message.getStr(NAME); String collectionName = message.getStr(NAME);
ShardHandler shardHandler = shardHandlerFactory.getShardHandler(); ShardHandler shardHandler = shardHandlerFactory.getShardHandler(overseer.getCoreContainer().getUpdateShardHandler().getDefaultHttpClient());
ClusterState clusterState = zkStateReader.getClusterState(); ClusterState clusterState = zkStateReader.getClusterState();
DocCollection coll = clusterState.getCollection(collectionName); DocCollection coll = clusterState.getCollection(collectionName);

View File

@ -18,6 +18,20 @@
package org.apache.solr.cloud.api.collections; package org.apache.solr.cloud.api.collections;
import static org.apache.solr.common.cloud.DocCollection.STATE_FORMAT;
import static org.apache.solr.common.cloud.ZkStateReader.COLLECTION_PROP;
import static org.apache.solr.common.cloud.ZkStateReader.MAX_SHARDS_PER_NODE;
import static org.apache.solr.common.cloud.ZkStateReader.NRT_REPLICAS;
import static org.apache.solr.common.cloud.ZkStateReader.PULL_REPLICAS;
import static org.apache.solr.common.cloud.ZkStateReader.REPLICATION_FACTOR;
import static org.apache.solr.common.cloud.ZkStateReader.REPLICA_TYPE;
import static org.apache.solr.common.cloud.ZkStateReader.SHARD_ID_PROP;
import static org.apache.solr.common.cloud.ZkStateReader.TLOG_REPLICAS;
import static org.apache.solr.common.params.CollectionParams.CollectionAction.CREATE;
import static org.apache.solr.common.params.CollectionParams.CollectionAction.CREATESHARD;
import static org.apache.solr.common.params.CommonAdminParams.ASYNC;
import static org.apache.solr.common.params.CommonParams.NAME;
import java.lang.invoke.MethodHandles; import java.lang.invoke.MethodHandles;
import java.net.URI; import java.net.URI;
import java.util.ArrayList; import java.util.ArrayList;
@ -33,7 +47,6 @@ import java.util.Optional;
import java.util.Properties; import java.util.Properties;
import java.util.Set; import java.util.Set;
import org.apache.solr.client.solrj.cloud.DistributedQueue;
import org.apache.solr.client.solrj.cloud.autoscaling.PolicyHelper; import org.apache.solr.client.solrj.cloud.autoscaling.PolicyHelper;
import org.apache.solr.cloud.Overseer; import org.apache.solr.cloud.Overseer;
import org.apache.solr.cloud.overseer.OverseerAction; import org.apache.solr.cloud.overseer.OverseerAction;
@ -60,20 +73,6 @@ import org.apache.solr.handler.component.ShardHandler;
import org.slf4j.Logger; import org.slf4j.Logger;
import org.slf4j.LoggerFactory; import org.slf4j.LoggerFactory;
import static org.apache.solr.common.cloud.DocCollection.STATE_FORMAT;
import static org.apache.solr.common.cloud.ZkStateReader.COLLECTION_PROP;
import static org.apache.solr.common.cloud.ZkStateReader.MAX_SHARDS_PER_NODE;
import static org.apache.solr.common.cloud.ZkStateReader.NRT_REPLICAS;
import static org.apache.solr.common.cloud.ZkStateReader.PULL_REPLICAS;
import static org.apache.solr.common.cloud.ZkStateReader.REPLICATION_FACTOR;
import static org.apache.solr.common.cloud.ZkStateReader.REPLICA_TYPE;
import static org.apache.solr.common.cloud.ZkStateReader.SHARD_ID_PROP;
import static org.apache.solr.common.cloud.ZkStateReader.TLOG_REPLICAS;
import static org.apache.solr.common.params.CollectionParams.CollectionAction.CREATE;
import static org.apache.solr.common.params.CollectionParams.CollectionAction.CREATESHARD;
import static org.apache.solr.common.params.CommonAdminParams.ASYNC;
import static org.apache.solr.common.params.CommonParams.NAME;
public class RestoreCmd implements OverseerCollectionMessageHandler.Cmd { public class RestoreCmd implements OverseerCollectionMessageHandler.Cmd {
private static final Logger log = LoggerFactory.getLogger(MethodHandles.lookup().lookupClass()); private static final Logger log = LoggerFactory.getLogger(MethodHandles.lookup().lookupClass());
@ -89,7 +88,7 @@ public class RestoreCmd implements OverseerCollectionMessageHandler.Cmd {
String restoreCollectionName = message.getStr(COLLECTION_PROP); String restoreCollectionName = message.getStr(COLLECTION_PROP);
String backupName = message.getStr(NAME); // of backup String backupName = message.getStr(NAME); // of backup
ShardHandler shardHandler = ocmh.shardHandlerFactory.getShardHandler(); ShardHandler shardHandler = ocmh.shardHandlerFactory.getShardHandler(ocmh.overseer.getCoreContainer().getUpdateShardHandler().getDefaultHttpClient());
String asyncId = message.getStr(ASYNC); String asyncId = message.getStr(ASYNC);
String repo = message.getStr(CoreAdminParams.BACKUP_REPOSITORY); String repo = message.getStr(CoreAdminParams.BACKUP_REPOSITORY);
Map<String, String> requestMap = new HashMap<>(); Map<String, String> requestMap = new HashMap<>();
@ -209,8 +208,6 @@ public class RestoreCmd implements OverseerCollectionMessageHandler.Cmd {
DocCollection restoreCollection = zkStateReader.getClusterState().getCollection(restoreCollectionName); DocCollection restoreCollection = zkStateReader.getClusterState().getCollection(restoreCollectionName);
DistributedQueue inQueue = Overseer.getStateUpdateQueue(zkStateReader.getZkClient());
//Mark all shards in CONSTRUCTION STATE while we restore the data //Mark all shards in CONSTRUCTION STATE while we restore the data
{ {
//TODO might instead createCollection accept an initial state? Is there a race? //TODO might instead createCollection accept an initial state? Is there a race?
@ -220,7 +217,7 @@ public class RestoreCmd implements OverseerCollectionMessageHandler.Cmd {
propMap.put(shard.getName(), Slice.State.CONSTRUCTION.toString()); propMap.put(shard.getName(), Slice.State.CONSTRUCTION.toString());
} }
propMap.put(ZkStateReader.COLLECTION_PROP, restoreCollectionName); propMap.put(ZkStateReader.COLLECTION_PROP, restoreCollectionName);
inQueue.offer(Utils.toJSON(new ZkNodeProps(propMap))); ocmh.overseer.offerStateUpdate(Utils.toJSON(new ZkNodeProps(propMap)));
} }
// TODO how do we leverage the RULE / SNITCH logic in createCollection? // TODO how do we leverage the RULE / SNITCH logic in createCollection?
@ -323,7 +320,7 @@ public class RestoreCmd implements OverseerCollectionMessageHandler.Cmd {
for (Slice shard : restoreCollection.getSlices()) { for (Slice shard : restoreCollection.getSlices()) {
propMap.put(shard.getName(), Slice.State.ACTIVE.toString()); propMap.put(shard.getName(), Slice.State.ACTIVE.toString());
} }
inQueue.offer(Utils.toJSON(new ZkNodeProps(propMap))); ocmh.overseer.offerStateUpdate((Utils.toJSON(new ZkNodeProps(propMap))));
} }
if (totalReplicasPerShard > 1) { if (totalReplicasPerShard > 1) {

View File

@ -30,7 +30,6 @@ import java.util.Set;
import java.util.concurrent.atomic.AtomicInteger; import java.util.concurrent.atomic.AtomicInteger;
import java.util.concurrent.atomic.AtomicReference; import java.util.concurrent.atomic.AtomicReference;
import org.apache.solr.client.solrj.cloud.DistributedQueue;
import org.apache.solr.client.solrj.cloud.NodeStateProvider; import org.apache.solr.client.solrj.cloud.NodeStateProvider;
import org.apache.solr.client.solrj.cloud.SolrCloudManager; import org.apache.solr.client.solrj.cloud.SolrCloudManager;
import org.apache.solr.client.solrj.cloud.autoscaling.PolicyHelper; import org.apache.solr.client.solrj.cloud.autoscaling.PolicyHelper;
@ -249,8 +248,8 @@ public class SplitShardCmd implements OverseerCollectionMessageHandler.Cmd {
propMap.put(ZkStateReader.SHARD_PARENT_PROP, parentSlice.getName()); propMap.put(ZkStateReader.SHARD_PARENT_PROP, parentSlice.getName());
propMap.put("shard_parent_node", nodeName); propMap.put("shard_parent_node", nodeName);
propMap.put("shard_parent_zk_session", leaderZnodeStat.getEphemeralOwner()); propMap.put("shard_parent_zk_session", leaderZnodeStat.getEphemeralOwner());
DistributedQueue inQueue = Overseer.getStateUpdateQueue(zkStateReader.getZkClient());
inQueue.offer(Utils.toJSON(new ZkNodeProps(propMap))); ocmh.overseer.offerStateUpdate(Utils.toJSON(new ZkNodeProps(propMap)));
// wait until we are able to see the new shard in cluster state // wait until we are able to see the new shard in cluster state
ocmh.waitForNewShard(collectionName, subSlice); ocmh.waitForNewShard(collectionName, subSlice);
@ -281,7 +280,7 @@ public class SplitShardCmd implements OverseerCollectionMessageHandler.Cmd {
ocmh.addReplica(clusterState, new ZkNodeProps(propMap), results, null); ocmh.addReplica(clusterState, new ZkNodeProps(propMap), results, null);
} }
ShardHandler shardHandler = ocmh.shardHandlerFactory.getShardHandler(); ShardHandler shardHandler = ocmh.shardHandlerFactory.getShardHandler(ocmh.overseer.getCoreContainer().getUpdateShardHandler().getDefaultHttpClient());
ocmh.processResponses(results, shardHandler, true, "SPLITSHARD failed to create subshard leaders", asyncId, requestMap); ocmh.processResponses(results, shardHandler, true, "SPLITSHARD failed to create subshard leaders", asyncId, requestMap);
@ -412,7 +411,7 @@ public class SplitShardCmd implements OverseerCollectionMessageHandler.Cmd {
ZkStateReader.BASE_URL_PROP, zkStateReader.getBaseUrlForNodeName(subShardNodeName), ZkStateReader.BASE_URL_PROP, zkStateReader.getBaseUrlForNodeName(subShardNodeName),
ZkStateReader.NODE_NAME_PROP, subShardNodeName, ZkStateReader.NODE_NAME_PROP, subShardNodeName,
CommonAdminParams.WAIT_FOR_FINAL_STATE, Boolean.toString(waitForFinalState)); CommonAdminParams.WAIT_FOR_FINAL_STATE, Boolean.toString(waitForFinalState));
Overseer.getStateUpdateQueue(zkStateReader.getZkClient()).offer(Utils.toJSON(props)); ocmh.overseer.offerStateUpdate(Utils.toJSON(props));
HashMap<String, Object> propMap = new HashMap<>(); HashMap<String, Object> propMap = new HashMap<>();
propMap.put(Overseer.QUEUE_OPERATION, ADDREPLICA.toLower()); propMap.put(Overseer.QUEUE_OPERATION, ADDREPLICA.toLower());
@ -446,7 +445,7 @@ public class SplitShardCmd implements OverseerCollectionMessageHandler.Cmd {
leaderZnodeStat = zkStateReader.getZkClient().exists(ZkStateReader.LIVE_NODES_ZKNODE + "/" + parentShardLeader.getNodeName(), null, true); leaderZnodeStat = zkStateReader.getZkClient().exists(ZkStateReader.LIVE_NODES_ZKNODE + "/" + parentShardLeader.getNodeName(), null, true);
if (leaderZnodeStat == null || ephemeralOwner != leaderZnodeStat.getEphemeralOwner()) { if (leaderZnodeStat == null || ephemeralOwner != leaderZnodeStat.getEphemeralOwner()) {
// put sub-shards in recovery_failed state // put sub-shards in recovery_failed state
DistributedQueue inQueue = Overseer.getStateUpdateQueue(zkStateReader.getZkClient());
Map<String, Object> propMap = new HashMap<>(); Map<String, Object> propMap = new HashMap<>();
propMap.put(Overseer.QUEUE_OPERATION, OverseerAction.UPDATESHARDSTATE.toLower()); propMap.put(Overseer.QUEUE_OPERATION, OverseerAction.UPDATESHARDSTATE.toLower());
for (String subSlice : subSlices) { for (String subSlice : subSlices) {
@ -454,7 +453,7 @@ public class SplitShardCmd implements OverseerCollectionMessageHandler.Cmd {
} }
propMap.put(ZkStateReader.COLLECTION_PROP, collectionName); propMap.put(ZkStateReader.COLLECTION_PROP, collectionName);
ZkNodeProps m = new ZkNodeProps(propMap); ZkNodeProps m = new ZkNodeProps(propMap);
inQueue.offer(Utils.toJSON(m)); ocmh.overseer.offerStateUpdate(Utils.toJSON(m));
if (leaderZnodeStat == null) { if (leaderZnodeStat == null) {
// the leader is not live anymore, fail the split! // the leader is not live anymore, fail the split!
@ -473,8 +472,7 @@ public class SplitShardCmd implements OverseerCollectionMessageHandler.Cmd {
if (repFactor == 1) { if (repFactor == 1) {
// switch sub shard states to 'active' // switch sub shard states to 'active'
log.debug("Replication factor is 1 so switching shard states"); log.info("Replication factor is 1 so switching shard states");
DistributedQueue inQueue = Overseer.getStateUpdateQueue(zkStateReader.getZkClient());
Map<String, Object> propMap = new HashMap<>(); Map<String, Object> propMap = new HashMap<>();
propMap.put(Overseer.QUEUE_OPERATION, OverseerAction.UPDATESHARDSTATE.toLower()); propMap.put(Overseer.QUEUE_OPERATION, OverseerAction.UPDATESHARDSTATE.toLower());
propMap.put(slice.get(), Slice.State.INACTIVE.toString()); propMap.put(slice.get(), Slice.State.INACTIVE.toString());
@ -483,10 +481,9 @@ public class SplitShardCmd implements OverseerCollectionMessageHandler.Cmd {
} }
propMap.put(ZkStateReader.COLLECTION_PROP, collectionName); propMap.put(ZkStateReader.COLLECTION_PROP, collectionName);
ZkNodeProps m = new ZkNodeProps(propMap); ZkNodeProps m = new ZkNodeProps(propMap);
inQueue.offer(Utils.toJSON(m)); ocmh.overseer.offerStateUpdate(Utils.toJSON(m));
} else { } else {
log.debug("Requesting shard state be set to 'recovery'"); log.info("Requesting shard state be set to 'recovery'");
DistributedQueue inQueue = Overseer.getStateUpdateQueue(zkStateReader.getZkClient());
Map<String, Object> propMap = new HashMap<>(); Map<String, Object> propMap = new HashMap<>();
propMap.put(Overseer.QUEUE_OPERATION, OverseerAction.UPDATESHARDSTATE.toLower()); propMap.put(Overseer.QUEUE_OPERATION, OverseerAction.UPDATESHARDSTATE.toLower());
for (String subSlice : subSlices) { for (String subSlice : subSlices) {
@ -494,7 +491,7 @@ public class SplitShardCmd implements OverseerCollectionMessageHandler.Cmd {
} }
propMap.put(ZkStateReader.COLLECTION_PROP, collectionName); propMap.put(ZkStateReader.COLLECTION_PROP, collectionName);
ZkNodeProps m = new ZkNodeProps(propMap); ZkNodeProps m = new ZkNodeProps(propMap);
inQueue.offer(Utils.toJSON(m)); ocmh.overseer.offerStateUpdate(Utils.toJSON(m));
} }
t = timings.sub("createCoresForReplicas"); t = timings.sub("createCoresForReplicas");
@ -590,7 +587,6 @@ public class SplitShardCmd implements OverseerCollectionMessageHandler.Cmd {
// set already created sub shards states to CONSTRUCTION - this prevents them // set already created sub shards states to CONSTRUCTION - this prevents them
// from entering into RECOVERY or ACTIVE (SOLR-9455) // from entering into RECOVERY or ACTIVE (SOLR-9455)
DistributedQueue inQueue = Overseer.getStateUpdateQueue(zkStateReader.getZkClient());
final Map<String, Object> propMap = new HashMap<>(); final Map<String, Object> propMap = new HashMap<>();
boolean sendUpdateState = false; boolean sendUpdateState = false;
propMap.put(Overseer.QUEUE_OPERATION, OverseerAction.UPDATESHARDSTATE.toLower()); propMap.put(Overseer.QUEUE_OPERATION, OverseerAction.UPDATESHARDSTATE.toLower());
@ -618,7 +614,7 @@ public class SplitShardCmd implements OverseerCollectionMessageHandler.Cmd {
if (sendUpdateState) { if (sendUpdateState) {
try { try {
ZkNodeProps m = new ZkNodeProps(propMap); ZkNodeProps m = new ZkNodeProps(propMap);
inQueue.offer(Utils.toJSON(m)); ocmh.overseer.offerStateUpdate(Utils.toJSON(m));
} catch (Exception e) { } catch (Exception e) {
// don't give up yet - just log the error, we may still be able to clean up // don't give up yet - just log the error, we may still be able to clean up
log.warn("Cleanup failed after failed split of " + collectionName + "/" + parentShard + ": (slice state changes)", e); log.warn("Cleanup failed after failed split of " + collectionName + "/" + parentShard + ": (slice state changes)", e);

View File

@ -32,6 +32,7 @@ import java.util.concurrent.TimeUnit;
import org.apache.solr.client.solrj.cloud.SolrCloudManager; import org.apache.solr.client.solrj.cloud.SolrCloudManager;
import org.apache.solr.client.solrj.cloud.autoscaling.TriggerEventType; import org.apache.solr.client.solrj.cloud.autoscaling.TriggerEventType;
import org.apache.solr.common.AlreadyClosedException;
import org.apache.solr.common.SolrException; import org.apache.solr.common.SolrException;
import org.apache.solr.common.cloud.ZkStateReader; import org.apache.solr.common.cloud.ZkStateReader;
import org.apache.solr.common.params.CollectionParams; import org.apache.solr.common.params.CollectionParams;
@ -62,7 +63,7 @@ public class NodeLostTrigger extends TriggerBase {
public void init() throws Exception { public void init() throws Exception {
super.init(); super.init();
lastLiveNodes = new HashSet<>(cloudManager.getClusterStateProvider().getLiveNodes()); lastLiveNodes = new HashSet<>(cloudManager.getClusterStateProvider().getLiveNodes());
log.debug("NodeLostTrigger {} - Initial livenodes: {}", name, lastLiveNodes); log.info("NodeLostTrigger {} - Initial livenodes: {}", name, lastLiveNodes);
// pick up lost nodes for which marker paths were created // pick up lost nodes for which marker paths were created
try { try {
List<String> lost = stateManager.listData(ZkStateReader.SOLR_AUTOSCALING_NODE_LOST_PATH); List<String> lost = stateManager.listData(ZkStateReader.SOLR_AUTOSCALING_NODE_LOST_PATH);
@ -147,7 +148,7 @@ public class NodeLostTrigger extends TriggerBase {
} }
Set<String> newLiveNodes = new HashSet<>(cloudManager.getClusterStateProvider().getLiveNodes()); Set<String> newLiveNodes = new HashSet<>(cloudManager.getClusterStateProvider().getLiveNodes());
log.debug("Running NodeLostTrigger: {} with currently live nodes: {}", name, newLiveNodes.size()); log.info("Running NodeLostTrigger: {} with currently live nodes: {} and last live nodes: {}", name, newLiveNodes.size(), lastLiveNodes.size());
// have any nodes that we were tracking been added to the cluster? // have any nodes that we were tracking been added to the cluster?
// if so, remove them from the tracking map // if so, remove them from the tracking map
@ -158,7 +159,7 @@ public class NodeLostTrigger extends TriggerBase {
Set<String> copyOfLastLiveNodes = new HashSet<>(lastLiveNodes); Set<String> copyOfLastLiveNodes = new HashSet<>(lastLiveNodes);
copyOfLastLiveNodes.removeAll(newLiveNodes); copyOfLastLiveNodes.removeAll(newLiveNodes);
copyOfLastLiveNodes.forEach(n -> { copyOfLastLiveNodes.forEach(n -> {
log.debug("Tracking lost node: {}", n); log.info("Tracking lost node: {}", n);
nodeNameVsTimeRemoved.put(n, cloudManager.getTimeSource().getTimeNs()); nodeNameVsTimeRemoved.put(n, cloudManager.getTimeSource().getTimeNs());
}); });
@ -170,7 +171,8 @@ public class NodeLostTrigger extends TriggerBase {
String nodeName = entry.getKey(); String nodeName = entry.getKey();
Long timeRemoved = entry.getValue(); Long timeRemoved = entry.getValue();
long now = cloudManager.getTimeSource().getTimeNs(); long now = cloudManager.getTimeSource().getTimeNs();
if (TimeUnit.SECONDS.convert(now - timeRemoved, TimeUnit.NANOSECONDS) >= getWaitForSecond()) { long te = TimeUnit.SECONDS.convert(now - timeRemoved, TimeUnit.NANOSECONDS);
if (te >= getWaitForSecond()) {
nodeNames.add(nodeName); nodeNames.add(nodeName);
times.add(timeRemoved); times.add(timeRemoved);
} }
@ -197,6 +199,8 @@ public class NodeLostTrigger extends TriggerBase {
} }
} }
lastLiveNodes = new HashSet<>(newLiveNodes); lastLiveNodes = new HashSet<>(newLiveNodes);
} catch (AlreadyClosedException e) {
} catch (RuntimeException e) { } catch (RuntimeException e) {
log.error("Unexpected exception in NodeLostTrigger", e); log.error("Unexpected exception in NodeLostTrigger", e);
} }

View File

@ -29,12 +29,12 @@ import java.util.Set;
import java.util.concurrent.locks.Condition; import java.util.concurrent.locks.Condition;
import java.util.concurrent.locks.ReentrantLock; import java.util.concurrent.locks.ReentrantLock;
import org.apache.lucene.store.AlreadyClosedException;
import org.apache.solr.client.solrj.cloud.autoscaling.AutoScalingConfig; import org.apache.solr.client.solrj.cloud.autoscaling.AutoScalingConfig;
import org.apache.solr.client.solrj.cloud.autoscaling.BadVersionException; import org.apache.solr.client.solrj.cloud.autoscaling.BadVersionException;
import org.apache.solr.client.solrj.cloud.DistribStateManager; import org.apache.solr.client.solrj.cloud.DistribStateManager;
import org.apache.solr.client.solrj.cloud.SolrCloudManager; import org.apache.solr.client.solrj.cloud.SolrCloudManager;
import org.apache.solr.client.solrj.cloud.autoscaling.TriggerEventType; import org.apache.solr.client.solrj.cloud.autoscaling.TriggerEventType;
import org.apache.solr.common.AlreadyClosedException;
import org.apache.solr.common.SolrCloseable; import org.apache.solr.common.SolrCloseable;
import org.apache.solr.common.cloud.ZkStateReader; import org.apache.solr.common.cloud.ZkStateReader;
import org.apache.solr.common.util.IOUtils; import org.apache.solr.common.util.IOUtils;
@ -135,6 +135,8 @@ public class OverseerTriggerThread implements Runnable, SolrCloseable {
log.debug("Adding .auto_add_replicas and .scheduled_maintenance triggers"); log.debug("Adding .auto_add_replicas and .scheduled_maintenance triggers");
cloudManager.getDistribStateManager().setData(SOLR_AUTOSCALING_CONF_PATH, Utils.toJSON(updatedConfig), updatedConfig.getZkVersion()); cloudManager.getDistribStateManager().setData(SOLR_AUTOSCALING_CONF_PATH, Utils.toJSON(updatedConfig), updatedConfig.getZkVersion());
break; break;
} catch (AlreadyClosedException e) {
break;
} catch (BadVersionException bve) { } catch (BadVersionException bve) {
// somebody else has changed the configuration so we must retry // somebody else has changed the configuration so we must retry
} catch (InterruptedException e) { } catch (InterruptedException e) {
@ -178,7 +180,7 @@ public class OverseerTriggerThread implements Runnable, SolrCloseable {
// must check for close here before we await on the condition otherwise we can only be woken up on interruption // must check for close here before we await on the condition otherwise we can only be woken up on interruption
if (isClosed) { if (isClosed) {
log.warn("OverseerTriggerThread has been closed, exiting."); log.info("OverseerTriggerThread has been closed, exiting.");
break; break;
} }
@ -190,7 +192,7 @@ public class OverseerTriggerThread implements Runnable, SolrCloseable {
// are we closed? // are we closed?
if (isClosed) { if (isClosed) {
log.warn("OverseerTriggerThread woken up but we are closed, exiting."); log.info("OverseerTriggerThread woken up but we are closed, exiting.");
break; break;
} }
@ -211,7 +213,6 @@ public class OverseerTriggerThread implements Runnable, SolrCloseable {
} catch (InterruptedException e) { } catch (InterruptedException e) {
// Restore the interrupted status // Restore the interrupted status
Thread.currentThread().interrupt(); Thread.currentThread().interrupt();
log.warn("Interrupted", e);
break; break;
} }
@ -240,6 +241,8 @@ public class OverseerTriggerThread implements Runnable, SolrCloseable {
} }
try { try {
scheduledTriggers.add(entry.getValue()); scheduledTriggers.add(entry.getValue());
} catch (AlreadyClosedException e) {
} catch (Exception e) { } catch (Exception e) {
log.warn("Exception initializing trigger " + entry.getKey() + ", configuration ignored", e); log.warn("Exception initializing trigger " + entry.getKey() + ", configuration ignored", e);
} }
@ -275,6 +278,8 @@ public class OverseerTriggerThread implements Runnable, SolrCloseable {
}); });
} catch (NoSuchElementException e) { } catch (NoSuchElementException e) {
// ignore // ignore
} catch (AlreadyClosedException e) {
} catch (Exception e) { } catch (Exception e) {
log.warn("Error removing old nodeAdded markers", e); log.warn("Error removing old nodeAdded markers", e);
} }

View File

@ -151,8 +151,8 @@ public class ScheduledTrigger extends TriggerBase {
public void run() { public void run() {
synchronized (this) { synchronized (this) {
if (isClosed) { if (isClosed) {
log.warn("ScheduledTrigger ran but was already closed"); log.debug("ScheduledTrigger ran but was already closed");
throw new RuntimeException("Trigger has been closed"); return;
} }
} }

View File

@ -42,7 +42,6 @@ import java.util.concurrent.locks.ReentrantLock;
import java.util.stream.Collectors; import java.util.stream.Collectors;
import org.apache.commons.lang3.exception.ExceptionUtils; import org.apache.commons.lang3.exception.ExceptionUtils;
import org.apache.lucene.store.AlreadyClosedException;
import org.apache.solr.client.solrj.cloud.autoscaling.AutoScalingConfig; import org.apache.solr.client.solrj.cloud.autoscaling.AutoScalingConfig;
import org.apache.solr.client.solrj.cloud.DistribStateManager; import org.apache.solr.client.solrj.cloud.DistribStateManager;
import org.apache.solr.client.solrj.cloud.SolrCloudManager; import org.apache.solr.client.solrj.cloud.SolrCloudManager;
@ -51,6 +50,7 @@ import org.apache.solr.client.solrj.cloud.autoscaling.VersionedData;
import org.apache.solr.client.solrj.request.CollectionAdminRequest.RequestStatusResponse; import org.apache.solr.client.solrj.request.CollectionAdminRequest.RequestStatusResponse;
import org.apache.solr.client.solrj.response.RequestStatusState; import org.apache.solr.client.solrj.response.RequestStatusState;
import org.apache.solr.cloud.Stats; import org.apache.solr.cloud.Stats;
import org.apache.solr.common.AlreadyClosedException;
import org.apache.solr.common.SolrException; import org.apache.solr.common.SolrException;
import org.apache.solr.common.cloud.ZkStateReader; import org.apache.solr.common.cloud.ZkStateReader;
import org.apache.solr.common.util.ExecutorUtil; import org.apache.solr.common.util.ExecutorUtil;
@ -205,7 +205,7 @@ public class ScheduledTriggers implements Closeable {
try { try {
st = new TriggerWrapper(newTrigger, cloudManager, queueStats); st = new TriggerWrapper(newTrigger, cloudManager, queueStats);
} catch (Exception e) { } catch (Exception e) {
if (isClosed) { if (isClosed || e instanceof AlreadyClosedException) {
throw new AlreadyClosedException("ScheduledTriggers has been closed and cannot be used anymore"); throw new AlreadyClosedException("ScheduledTriggers has been closed and cannot be used anymore");
} }
if (cloudManager.isClosed()) { if (cloudManager.isClosed()) {
@ -567,6 +567,8 @@ public class ScheduledTriggers implements Closeable {
// execution of the same trigger instance // execution of the same trigger instance
synchronized (TriggerWrapper.this) { synchronized (TriggerWrapper.this) {
// replay accumulated events on first run, if any // replay accumulated events on first run, if any
try {
if (replay) { if (replay) {
TriggerEvent event; TriggerEvent event;
// peek first without removing - we may crash before calling the listener // peek first without removing - we may crash before calling the listener
@ -587,8 +589,15 @@ public class ScheduledTriggers implements Closeable {
} }
replay = false; replay = false;
} }
} catch (AlreadyClosedException e) {
} catch (Exception e) {
log.error("Unexpected exception from trigger: " + trigger.getName(), e);
}
try { try {
trigger.run(); trigger.run();
} catch (AlreadyClosedException e) {
} catch (Exception e) { } catch (Exception e) {
// log but do not propagate exception because an exception thrown from a scheduled operation // log but do not propagate exception because an exception thrown from a scheduled operation
// will suppress future executions // will suppress future executions

View File

@ -36,6 +36,7 @@ import org.apache.solr.client.solrj.cloud.SolrCloudManager;
import org.apache.solr.client.solrj.cloud.autoscaling.TriggerEventType; import org.apache.solr.client.solrj.cloud.autoscaling.TriggerEventType;
import org.apache.solr.client.solrj.cloud.autoscaling.VersionedData; import org.apache.solr.client.solrj.cloud.autoscaling.VersionedData;
import org.apache.solr.common.AlreadyClosedException;
import org.apache.solr.common.cloud.ZkStateReader; import org.apache.solr.common.cloud.ZkStateReader;
import org.apache.solr.common.util.Utils; import org.apache.solr.common.util.Utils;
import org.apache.solr.core.SolrResourceLoader; import org.apache.solr.core.SolrResourceLoader;
@ -239,7 +240,9 @@ public abstract class TriggerBase implements AutoScaling.Trigger {
stateManager.createData(path, data, CreateMode.PERSISTENT); stateManager.createData(path, data, CreateMode.PERSISTENT);
} }
lastState = state; lastState = state;
} catch (InterruptedException | BadVersionException | AlreadyExistsException | IOException | KeeperException e) { } catch (AlreadyExistsException e) {
} catch (InterruptedException | BadVersionException | IOException | KeeperException e) {
log.warn("Exception updating trigger state '" + path + "'", e); log.warn("Exception updating trigger state '" + path + "'", e);
} }
} }
@ -253,6 +256,8 @@ public abstract class TriggerBase implements AutoScaling.Trigger {
VersionedData versionedData = stateManager.getData(path); VersionedData versionedData = stateManager.getData(path);
data = versionedData.getData(); data = versionedData.getData();
} }
} catch (AlreadyClosedException e) {
} catch (Exception e) { } catch (Exception e) {
log.warn("Exception getting trigger state '" + path + "'", e); log.warn("Exception getting trigger state '" + path + "'", e);
} }

View File

@ -24,6 +24,7 @@ import java.util.Map;
import org.apache.solr.client.solrj.cloud.DistributedQueue; import org.apache.solr.client.solrj.cloud.DistributedQueue;
import org.apache.solr.client.solrj.cloud.SolrCloudManager; import org.apache.solr.client.solrj.cloud.SolrCloudManager;
import org.apache.solr.cloud.Stats; import org.apache.solr.cloud.Stats;
import org.apache.solr.common.AlreadyClosedException;
import org.apache.solr.common.cloud.ZkStateReader; import org.apache.solr.common.cloud.ZkStateReader;
import org.apache.solr.common.util.Utils; import org.apache.solr.common.util.Utils;
import org.apache.solr.common.util.TimeSource; import org.apache.solr.common.util.TimeSource;
@ -78,7 +79,11 @@ public class TriggerEventQueue {
continue; continue;
} }
} }
} catch (Exception e) { }
catch (AlreadyClosedException e) {
}
catch (Exception e) {
log.warn("Exception peeking queue of trigger " + triggerName, e); log.warn("Exception peeking queue of trigger " + triggerName, e);
} }
return null; return null;

View File

@ -124,10 +124,10 @@ public class CloudConfig {
public static class CloudConfigBuilder { public static class CloudConfigBuilder {
private static final int DEFAULT_ZK_CLIENT_TIMEOUT = 15000; private static final int DEFAULT_ZK_CLIENT_TIMEOUT = 45000;
private static final int DEFAULT_LEADER_VOTE_WAIT = 180000; // 3 minutes private static final int DEFAULT_LEADER_VOTE_WAIT = 180000; // 3 minutes
private static final int DEFAULT_LEADER_CONFLICT_RESOLVE_WAIT = 180000; private static final int DEFAULT_LEADER_CONFLICT_RESOLVE_WAIT = 180000;
private static final int DEFAULT_CREATE_COLLECTION_ACTIVE_WAIT = 30; // 30 seconds private static final int DEFAULT_CREATE_COLLECTION_ACTIVE_WAIT = 45; // 45 seconds
private static final boolean DEFAULT_CREATE_COLLECTION_CHECK_LEADER_ACTIVE = false; private static final boolean DEFAULT_CREATE_COLLECTION_CHECK_LEADER_ACTIVE = false;
private static final int DEFAULT_AUTO_REPLICA_FAILOVER_WAIT_AFTER_EXPIRATION = 120000; private static final int DEFAULT_AUTO_REPLICA_FAILOVER_WAIT_AFTER_EXPIRATION = 120000;

View File

@ -16,6 +16,22 @@
*/ */
package org.apache.solr.core; package org.apache.solr.core;
import static java.util.Objects.requireNonNull;
import static org.apache.solr.common.params.CommonParams.AUTHC_PATH;
import static org.apache.solr.common.params.CommonParams.AUTHZ_PATH;
import static org.apache.solr.common.params.CommonParams.AUTOSCALING_HISTORY_PATH;
import static org.apache.solr.common.params.CommonParams.COLLECTIONS_HANDLER_PATH;
import static org.apache.solr.common.params.CommonParams.HEALTH_CHECK_HANDLER_PATH;
import static org.apache.solr.common.params.CommonParams.CONFIGSETS_HANDLER_PATH;
import static org.apache.solr.common.params.CommonParams.CORES_HANDLER_PATH;
import static org.apache.solr.common.params.CommonParams.INFO_HANDLER_PATH;
import static org.apache.solr.common.params.CommonParams.METRICS_HISTORY_PATH;
import static org.apache.solr.common.params.CommonParams.METRICS_PATH;
import static org.apache.solr.common.params.CommonParams.ZK_PATH;
import static org.apache.solr.common.params.CommonParams.ZK_STATUS_PATH;
import static org.apache.solr.core.CorePropertiesLocator.PROPERTIES_FILENAME;
import static org.apache.solr.security.AuthenticationPlugin.AUTHENTICATION_PLUGIN_PROP;
import java.io.IOException; import java.io.IOException;
import java.lang.invoke.MethodHandles; import java.lang.invoke.MethodHandles;
import java.nio.file.Path; import java.nio.file.Path;
@ -35,10 +51,9 @@ import java.util.Properties;
import java.util.concurrent.ConcurrentHashMap; import java.util.concurrent.ConcurrentHashMap;
import java.util.concurrent.ExecutionException; import java.util.concurrent.ExecutionException;
import java.util.concurrent.ExecutorService; import java.util.concurrent.ExecutorService;
import java.util.concurrent.ForkJoinPool;
import java.util.concurrent.Future; import java.util.concurrent.Future;
import com.google.common.collect.ImmutableMap;
import com.google.common.collect.Maps;
import org.apache.http.auth.AuthSchemeProvider; import org.apache.http.auth.AuthSchemeProvider;
import org.apache.http.client.CredentialsProvider; import org.apache.http.client.CredentialsProvider;
import org.apache.http.config.Lookup; import org.apache.http.config.Lookup;
@ -58,6 +73,7 @@ import org.apache.solr.cloud.CloudDescriptor;
import org.apache.solr.cloud.Overseer; import org.apache.solr.cloud.Overseer;
import org.apache.solr.cloud.ZkController; import org.apache.solr.cloud.ZkController;
import org.apache.solr.cloud.autoscaling.AutoScalingHandler; import org.apache.solr.cloud.autoscaling.AutoScalingHandler;
import org.apache.solr.common.AlreadyClosedException;
import org.apache.solr.common.SolrException; import org.apache.solr.common.SolrException;
import org.apache.solr.common.SolrException.ErrorCode; import org.apache.solr.common.SolrException.ErrorCode;
import org.apache.solr.common.cloud.DocCollection; import org.apache.solr.common.cloud.DocCollection;
@ -106,24 +122,13 @@ import org.apache.solr.util.DefaultSolrThreadFactory;
import org.apache.solr.util.OrderedExecutor; import org.apache.solr.util.OrderedExecutor;
import org.apache.solr.util.stats.MetricUtils; import org.apache.solr.util.stats.MetricUtils;
import org.apache.zookeeper.KeeperException; import org.apache.zookeeper.KeeperException;
import org.apache.zookeeper.KeeperException.ConnectionLossException;
import org.apache.zookeeper.KeeperException.SessionExpiredException;
import org.slf4j.Logger; import org.slf4j.Logger;
import org.slf4j.LoggerFactory; import org.slf4j.LoggerFactory;
import static java.util.Objects.requireNonNull; import com.google.common.collect.ImmutableMap;
import static org.apache.solr.common.params.CommonParams.AUTHC_PATH; import com.google.common.collect.Maps;
import static org.apache.solr.common.params.CommonParams.AUTHZ_PATH;
import static org.apache.solr.common.params.CommonParams.AUTOSCALING_HISTORY_PATH;
import static org.apache.solr.common.params.CommonParams.COLLECTIONS_HANDLER_PATH;
import static org.apache.solr.common.params.CommonParams.CONFIGSETS_HANDLER_PATH;
import static org.apache.solr.common.params.CommonParams.CORES_HANDLER_PATH;
import static org.apache.solr.common.params.CommonParams.HEALTH_CHECK_HANDLER_PATH;
import static org.apache.solr.common.params.CommonParams.INFO_HANDLER_PATH;
import static org.apache.solr.common.params.CommonParams.METRICS_HISTORY_PATH;
import static org.apache.solr.common.params.CommonParams.METRICS_PATH;
import static org.apache.solr.common.params.CommonParams.ZK_PATH;
import static org.apache.solr.common.params.CommonParams.ZK_STATUS_PATH;
import static org.apache.solr.core.CorePropertiesLocator.PROPERTIES_FILENAME;
import static org.apache.solr.security.AuthenticationPlugin.AUTHENTICATION_PLUGIN_PROP;
/** /**
* *
@ -148,32 +153,32 @@ public class CoreContainer {
protected final Map<String, CoreLoadFailure> coreInitFailures = new ConcurrentHashMap<>(); protected final Map<String, CoreLoadFailure> coreInitFailures = new ConcurrentHashMap<>();
protected CoreAdminHandler coreAdminHandler = null; protected volatile CoreAdminHandler coreAdminHandler = null;
protected CollectionsHandler collectionsHandler = null; protected volatile CollectionsHandler collectionsHandler = null;
protected HealthCheckHandler healthCheckHandler = null; protected volatile HealthCheckHandler healthCheckHandler = null;
private InfoHandler infoHandler; private volatile InfoHandler infoHandler;
protected ConfigSetsHandler configSetsHandler = null; protected volatile ConfigSetsHandler configSetsHandler = null;
private PKIAuthenticationPlugin pkiAuthenticationPlugin; private volatile PKIAuthenticationPlugin pkiAuthenticationPlugin;
protected Properties containerProperties; protected volatile Properties containerProperties;
private ConfigSetService coreConfigService; private volatile ConfigSetService coreConfigService;
protected ZkContainer zkSys = new ZkContainer(); protected final ZkContainer zkSys = new ZkContainer();
protected ShardHandlerFactory shardHandlerFactory; protected volatile ShardHandlerFactory shardHandlerFactory;
private UpdateShardHandler updateShardHandler; private volatile UpdateShardHandler updateShardHandler;
private ExecutorService coreContainerWorkExecutor = ExecutorUtil.newMDCAwareCachedThreadPool( private volatile ExecutorService coreContainerWorkExecutor = ExecutorUtil.newMDCAwareCachedThreadPool(
new DefaultSolrThreadFactory("coreContainerWorkExecutor") ); new DefaultSolrThreadFactory("coreContainerWorkExecutor") );
private final OrderedExecutor replayUpdatesExecutor; private final OrderedExecutor replayUpdatesExecutor;
protected LogWatcher logging = null; protected volatile LogWatcher logging = null;
private CloserThread backgroundCloser = null; private volatile CloserThread backgroundCloser = null;
protected final NodeConfig cfg; protected final NodeConfig cfg;
protected final SolrResourceLoader loader; protected final SolrResourceLoader loader;
@ -181,33 +186,33 @@ public class CoreContainer {
protected final CoresLocator coresLocator; protected final CoresLocator coresLocator;
private String hostName; private volatile String hostName;
private final BlobRepository blobRepository = new BlobRepository(this); private final BlobRepository blobRepository = new BlobRepository(this);
private PluginBag<SolrRequestHandler> containerHandlers = new PluginBag<>(SolrRequestHandler.class, null); private volatile PluginBag<SolrRequestHandler> containerHandlers = new PluginBag<>(SolrRequestHandler.class, null);
private boolean asyncSolrCoreLoad; private volatile boolean asyncSolrCoreLoad;
protected SecurityConfHandler securityConfHandler; protected volatile SecurityConfHandler securityConfHandler;
private SecurityPluginHolder<AuthorizationPlugin> authorizationPlugin; private volatile SecurityPluginHolder<AuthorizationPlugin> authorizationPlugin;
private SecurityPluginHolder<AuthenticationPlugin> authenticationPlugin; private volatile SecurityPluginHolder<AuthenticationPlugin> authenticationPlugin;
private BackupRepositoryFactory backupRepoFactory; private volatile BackupRepositoryFactory backupRepoFactory;
protected SolrMetricManager metricManager; protected volatile SolrMetricManager metricManager;
protected String metricTag = Integer.toHexString(hashCode()); protected volatile String metricTag = Integer.toHexString(hashCode());
protected MetricsHandler metricsHandler; protected MetricsHandler metricsHandler;
protected MetricsHistoryHandler metricsHistoryHandler; protected volatile MetricsHistoryHandler metricsHistoryHandler;
protected MetricsCollectorHandler metricsCollectorHandler; protected volatile MetricsCollectorHandler metricsCollectorHandler;
protected AutoscalingHistoryHandler autoscalingHistoryHandler; protected volatile AutoscalingHistoryHandler autoscalingHistoryHandler;
// Bits for the state variable. // Bits for the state variable.
@ -216,7 +221,7 @@ public class CoreContainer {
public final static long INITIAL_CORE_LOAD_COMPLETE = 0x4L; public final static long INITIAL_CORE_LOAD_COMPLETE = 0x4L;
private volatile long status = 0L; private volatile long status = 0L;
protected AutoScalingHandler autoScalingHandler; protected volatile AutoScalingHandler autoScalingHandler;
private enum CoreInitFailedAction { fromleader, none } private enum CoreInitFailedAction { fromleader, none }
@ -759,6 +764,7 @@ public class CoreContainer {
name = getZkController().getNodeName(); name = getZkController().getNodeName();
cloudManager = getZkController().getSolrCloudManager(); cloudManager = getZkController().getSolrCloudManager();
client = new CloudSolrClient.Builder(Collections.singletonList(getZkController().getZkServerAddress()), Optional.empty()) client = new CloudSolrClient.Builder(Collections.singletonList(getZkController().getZkServerAddress()), Optional.empty())
.withSocketTimeout(30000).withConnectionTimeout(15000)
.withHttpClient(updateShardHandler.getDefaultHttpClient()).build(); .withHttpClient(updateShardHandler.getDefaultHttpClient()).build();
} else { } else {
name = getNodeConfig().getNodeName(); name = getNodeConfig().getNodeName();
@ -818,53 +824,40 @@ public class CoreContainer {
return isShutDown; return isShutDown;
} }
/**
* Stops all cores.
*/
public void shutdown() { public void shutdown() {
log.info("Shutting down CoreContainer instance=" log.info("Shutting down CoreContainer instance="
+ System.identityHashCode(this)); + System.identityHashCode(this));
ForkJoinPool customThreadPool = new ForkJoinPool(6);
isShutDown = true; isShutDown = true;
try {
ExecutorUtil.shutdownAndAwaitTermination(coreContainerWorkExecutor); if (isZooKeeperAware()) {
replayUpdatesExecutor.shutdownAndAwaitTermination(); cancelCoreRecoveries();
if (metricsHistoryHandler != null) {
IOUtils.closeQuietly(metricsHistoryHandler.getSolrClient());
metricsHistoryHandler.close();
}
if (metricManager != null) {
metricManager.closeReporters(SolrMetricManager.getRegistryName(SolrInfoBean.Group.node));
metricManager.closeReporters(SolrMetricManager.getRegistryName(SolrInfoBean.Group.jvm));
metricManager.closeReporters(SolrMetricManager.getRegistryName(SolrInfoBean.Group.jetty));
metricManager.unregisterGauges(SolrMetricManager.getRegistryName(SolrInfoBean.Group.node), metricTag);
metricManager.unregisterGauges(SolrMetricManager.getRegistryName(SolrInfoBean.Group.jvm), metricTag);
metricManager.unregisterGauges(SolrMetricManager.getRegistryName(SolrInfoBean.Group.jetty), metricTag);
}
if (isZooKeeperAware()) { if (isZooKeeperAware()) {
cancelCoreRecoveries(); cancelCoreRecoveries();
zkSys.zkController.publishNodeAsDown(zkSys.zkController.getNodeName());
try { try {
zkSys.zkController.removeEphemeralLiveNode(); zkSys.zkController.removeEphemeralLiveNode();
} catch (AlreadyClosedException | SessionExpiredException | ConnectionLossException e) {
} catch (Exception e) { } catch (Exception e) {
log.warn("Error removing live node. Continuing to close CoreContainer", e); log.warn("Error removing live node. Continuing to close CoreContainer", e);
} }
if (metricManager != null) {
metricManager.closeReporters(SolrMetricManager.getRegistryName(SolrInfoBean.Group.cluster));
}
} }
try { try {
if (coreAdminHandler != null) coreAdminHandler.shutdown(); if (zkSys.zkController.getZkClient().getConnectionManager().isConnected()) {
log.info("Publish this node as DOWN...");
zkSys.zkController.publishNodeAsDown(zkSys.zkController.getNodeName());
}
} catch (Exception e) { } catch (Exception e) {
log.warn("Error shutting down CoreAdminHandler. Continuing to close CoreContainer.", e); log.warn("Error publishing nodes as down. Continuing to close CoreContainer", e);
}
} }
try { ExecutorUtil.shutdownAndAwaitTermination(coreContainerWorkExecutor);
// First wake up the closer thread, it'll terminate almost immediately since it checks isShutDown. // First wake up the closer thread, it'll terminate almost immediately since it checks isShutDown.
synchronized (solrCores.getModifyLock()) { synchronized (solrCores.getModifyLock()) {
solrCores.getModifyLock().notifyAll(); // wake up anyone waiting solrCores.getModifyLock().notifyAll(); // wake up anyone waiting
@ -897,21 +890,71 @@ public class CoreContainer {
solrCores.getModifyLock().notifyAll(); // wake up the thread solrCores.getModifyLock().notifyAll(); // wake up the thread
} }
customThreadPool.submit(() -> Collections.singleton(replayUpdatesExecutor).parallelStream().forEach(c -> {
c.shutdownAndAwaitTermination();
}));
if (metricsHistoryHandler != null) {
customThreadPool.submit(() -> Collections.singleton(metricsHistoryHandler).parallelStream().forEach(c -> {
IOUtils.closeQuietly(c);
}));
customThreadPool.submit(() -> Collections.singleton(metricsHistoryHandler.getSolrClient()).parallelStream().forEach(c -> {
IOUtils.closeQuietly(c);
}));
}
if (metricManager != null) {
metricManager.closeReporters(SolrMetricManager.getRegistryName(SolrInfoBean.Group.node));
metricManager.closeReporters(SolrMetricManager.getRegistryName(SolrInfoBean.Group.jvm));
metricManager.closeReporters(SolrMetricManager.getRegistryName(SolrInfoBean.Group.jetty));
metricManager.unregisterGauges(SolrMetricManager.getRegistryName(SolrInfoBean.Group.node), metricTag);
metricManager.unregisterGauges(SolrMetricManager.getRegistryName(SolrInfoBean.Group.jvm), metricTag);
metricManager.unregisterGauges(SolrMetricManager.getRegistryName(SolrInfoBean.Group.jetty), metricTag);
}
if (isZooKeeperAware()) {
cancelCoreRecoveries();
if (metricManager != null) {
metricManager.closeReporters(SolrMetricManager.getRegistryName(SolrInfoBean.Group.cluster));
}
}
try {
if (coreAdminHandler != null) {
customThreadPool.submit(() -> Collections.singleton(coreAdminHandler).parallelStream().forEach(c -> {
c.shutdown();
}));
}
} catch (Exception e) {
log.warn("Error shutting down CoreAdminHandler. Continuing to close CoreContainer.", e);
}
} finally { } finally {
try { try {
if (shardHandlerFactory != null) { if (shardHandlerFactory != null) {
shardHandlerFactory.close(); customThreadPool.submit(() -> Collections.singleton(shardHandlerFactory).parallelStream().forEach(c -> {
c.close();
}));
} }
} finally { } finally {
try { try {
if (updateShardHandler != null) { if (updateShardHandler != null) {
customThreadPool.submit(() -> Collections.singleton(shardHandlerFactory).parallelStream().forEach(c -> {
updateShardHandler.close(); updateShardHandler.close();
}));
} }
} finally { } finally {
try {
// we want to close zk stuff last // we want to close zk stuff last
zkSys.close(); zkSys.close();
} finally {
ExecutorUtil.shutdownAndAwaitTermination(customThreadPool);
} }
} }
}
} }
// It should be safe to close the authorization plugin at this point. // It should be safe to close the authorization plugin at this point.
@ -1384,6 +1427,9 @@ public class CoreContainer {
* @param name the name of the SolrCore to reload * @param name the name of the SolrCore to reload
*/ */
public void reload(String name) { public void reload(String name) {
if (isShutDown) {
throw new AlreadyClosedException();
}
SolrCore core = solrCores.getCoreFromAnyList(name, false); SolrCore core = solrCores.getCoreFromAnyList(name, false);
if (core != null) { if (core != null) {

View File

@ -162,6 +162,7 @@ import org.apache.solr.util.NumberUtils;
import org.apache.solr.util.PropertiesInputStream; import org.apache.solr.util.PropertiesInputStream;
import org.apache.solr.util.PropertiesOutputStream; import org.apache.solr.util.PropertiesOutputStream;
import org.apache.solr.util.RefCounted; import org.apache.solr.util.RefCounted;
import org.apache.solr.util.TestInjection;
import org.apache.solr.util.plugin.NamedListInitializedPlugin; import org.apache.solr.util.plugin.NamedListInitializedPlugin;
import org.apache.solr.util.plugin.PluginInfoInitialized; import org.apache.solr.util.plugin.PluginInfoInitialized;
import org.apache.solr.util.plugin.SolrCoreAware; import org.apache.solr.util.plugin.SolrCoreAware;
@ -764,10 +765,14 @@ public final class SolrCore implements SolrInfoBean, SolrMetricProducer, Closeab
// Create the index if it doesn't exist. // Create the index if it doesn't exist.
if (!indexExists) { if (!indexExists) {
log.debug("{}Solr index directory '{}' doesn't exist. Creating new index...", logid, indexDir); log.debug("{}Solr index directory '{}' doesn't exist. Creating new index...", logid, indexDir);
SolrIndexWriter writer = null;
SolrIndexWriter writer = SolrIndexWriter.create(this, "SolrCore.initIndex", indexDir, getDirectoryFactory(), true, try {
writer = SolrIndexWriter.create(this, "SolrCore.initIndex", indexDir, getDirectoryFactory(), true,
getLatestSchema(), solrConfig.indexConfig, solrDelPolicy, codec); getLatestSchema(), solrConfig.indexConfig, solrDelPolicy, codec);
writer.close(); } finally {
IOUtils.closeQuietly(writer);
}
} }
cleanupOldIndexDirectories(reload); cleanupOldIndexDirectories(reload);
@ -992,6 +997,33 @@ public final class SolrCore implements SolrInfoBean, SolrMetricProducer, Closeab
resourceLoader.inform(resourceLoader); resourceLoader.inform(resourceLoader);
resourceLoader.inform(this); // last call before the latch is released. resourceLoader.inform(this); // last call before the latch is released.
this.updateHandler.informEventListeners(this); this.updateHandler.informEventListeners(this);
infoRegistry.put("core", this);
// register any SolrInfoMBeans SolrResourceLoader initialized
//
// this must happen after the latch is released, because a JMX server impl may
// choose to block on registering until properties can be fetched from an MBean,
// and a SolrCoreAware MBean may have properties that depend on getting a Searcher
// from the core.
resourceLoader.inform(infoRegistry);
// Allow the directory factory to report metrics
if (directoryFactory instanceof SolrMetricProducer) {
((SolrMetricProducer) directoryFactory).initializeMetrics(metricManager, coreMetricManager.getRegistryName(),
metricTag, "directoryFactory");
}
// seed version buckets with max from index during core initialization ... requires a searcher!
seedVersionBuckets();
bufferUpdatesIfConstructing(coreDescriptor);
this.ruleExpiryLock = new ReentrantLock();
this.snapshotDelLock = new ReentrantLock();
registerConfListener();
} catch (Throwable e) { } catch (Throwable e) {
// release the latch, otherwise we block trying to do the close. This // release the latch, otherwise we block trying to do the close. This
// should be fine, since counting down on a latch of 0 is still fine // should be fine, since counting down on a latch of 0 is still fine
@ -1017,31 +1049,6 @@ public final class SolrCore implements SolrInfoBean, SolrMetricProducer, Closeab
latch.countDown(); latch.countDown();
} }
infoRegistry.put("core", this);
// register any SolrInfoMBeans SolrResourceLoader initialized
//
// this must happen after the latch is released, because a JMX server impl may
// choose to block on registering until properties can be fetched from an MBean,
// and a SolrCoreAware MBean may have properties that depend on getting a Searcher
// from the core.
resourceLoader.inform(infoRegistry);
// Allow the directory factory to report metrics
if (directoryFactory instanceof SolrMetricProducer) {
((SolrMetricProducer)directoryFactory).initializeMetrics(metricManager, coreMetricManager.getRegistryName(), metricTag, "directoryFactory");
}
// seed version buckets with max from index during core initialization ... requires a searcher!
seedVersionBuckets();
bufferUpdatesIfConstructing(coreDescriptor);
this.ruleExpiryLock = new ReentrantLock();
this.snapshotDelLock = new ReentrantLock();
registerConfListener();
assert ObjectReleaseTracker.track(this); assert ObjectReleaseTracker.track(this);
} }
@ -1999,7 +2006,7 @@ public final class SolrCore implements SolrInfoBean, SolrMetricProducer, Closeab
*/ */
public RefCounted<SolrIndexSearcher> openNewSearcher(boolean updateHandlerReopens, boolean realtime) { public RefCounted<SolrIndexSearcher> openNewSearcher(boolean updateHandlerReopens, boolean realtime) {
if (isClosed()) { // catch some errors quicker if (isClosed()) { // catch some errors quicker
throw new SolrException(ErrorCode.SERVER_ERROR, "openNewSearcher called on closed core"); throw new SolrCoreState.CoreIsClosedException();
} }
SolrIndexSearcher tmp; SolrIndexSearcher tmp;
@ -2372,7 +2379,7 @@ public final class SolrCore implements SolrInfoBean, SolrMetricProducer, Closeab
return returnSearcher ? newSearchHolder : null; return returnSearcher ? newSearchHolder : null;
} catch (Exception e) { } catch (Exception e) {
if (e instanceof SolrException) throw (SolrException)e; if (e instanceof RuntimeException) throw (RuntimeException)e;
throw new SolrException(ErrorCode.SERVER_ERROR, e); throw new SolrException(ErrorCode.SERVER_ERROR, e);
} finally { } finally {
@ -2491,6 +2498,7 @@ public final class SolrCore implements SolrInfoBean, SolrMetricProducer, Closeab
// even in the face of errors. // even in the face of errors.
onDeckSearchers--; onDeckSearchers--;
searcherLock.notifyAll(); searcherLock.notifyAll();
assert TestInjection.injectSearcherHooks(getCoreDescriptor() != null && getCoreDescriptor().getCloudDescriptor() != null ? getCoreDescriptor().getCloudDescriptor().getCollectionName() : null);
} }
} }
} }
@ -3008,7 +3016,7 @@ public final class SolrCore implements SolrInfoBean, SolrMetricProducer, Closeab
int solrConfigversion, overlayVersion, managedSchemaVersion = 0; int solrConfigversion, overlayVersion, managedSchemaVersion = 0;
SolrConfig cfg = null; SolrConfig cfg = null;
try (SolrCore solrCore = cc.solrCores.getCoreFromAnyList(coreName, true)) { try (SolrCore solrCore = cc.solrCores.getCoreFromAnyList(coreName, true)) {
if (solrCore == null || solrCore.isClosed()) return; if (solrCore == null || solrCore.isClosed() || solrCore.getCoreContainer().isShutDown()) return;
cfg = solrCore.getSolrConfig(); cfg = solrCore.getSolrConfig();
solrConfigversion = solrCore.getSolrConfig().getOverlay().getZnodeVersion(); solrConfigversion = solrCore.getSolrConfig().getOverlay().getZnodeVersion();
overlayVersion = solrCore.getSolrConfig().getZnodeVersion(); overlayVersion = solrCore.getSolrConfig().getZnodeVersion();
@ -3042,7 +3050,7 @@ public final class SolrCore implements SolrInfoBean, SolrMetricProducer, Closeab
} }
//some files in conf directory may have other than managedschema, overlay, params //some files in conf directory may have other than managedschema, overlay, params
try (SolrCore solrCore = cc.solrCores.getCoreFromAnyList(coreName, true)) { try (SolrCore solrCore = cc.solrCores.getCoreFromAnyList(coreName, true)) {
if (solrCore == null || solrCore.isClosed()) return; if (solrCore == null || solrCore.isClosed() || cc.isShutDown()) return;
for (Runnable listener : solrCore.confListeners) { for (Runnable listener : solrCore.confListeners) {
try { try {
listener.run(); listener.run();

View File

@ -31,7 +31,7 @@ import org.slf4j.LoggerFactory;
public abstract class TransientSolrCoreCacheFactory { public abstract class TransientSolrCoreCacheFactory {
private static final Logger log = LoggerFactory.getLogger(MethodHandles.lookup().lookupClass()); private static final Logger log = LoggerFactory.getLogger(MethodHandles.lookup().lookupClass());
private CoreContainer coreContainer = null; private volatile CoreContainer coreContainer = null;
public abstract TransientSolrCoreCache getTransientSolrCoreCache(); public abstract TransientSolrCoreCache getTransientSolrCoreCache();
/** /**

View File

@ -18,7 +18,7 @@ package org.apache.solr.core;
public class TransientSolrCoreCacheFactoryDefault extends TransientSolrCoreCacheFactory { public class TransientSolrCoreCacheFactoryDefault extends TransientSolrCoreCacheFactory {
TransientSolrCoreCache transientSolrCoreCache = null; volatile TransientSolrCoreCache transientSolrCoreCache = null;
@Override @Override
public TransientSolrCoreCache getTransientSolrCoreCache() { public TransientSolrCoreCache getTransientSolrCoreCache() {

View File

@ -31,6 +31,7 @@ import java.util.function.Predicate;
import org.apache.solr.cloud.CurrentCoreDescriptorProvider; import org.apache.solr.cloud.CurrentCoreDescriptorProvider;
import org.apache.solr.cloud.SolrZkServer; import org.apache.solr.cloud.SolrZkServer;
import org.apache.solr.cloud.ZkController; import org.apache.solr.cloud.ZkController;
import org.apache.solr.common.AlreadyClosedException;
import org.apache.solr.common.SolrException; import org.apache.solr.common.SolrException;
import org.apache.solr.common.cloud.Replica; import org.apache.solr.common.cloud.Replica;
import org.apache.solr.common.cloud.ZkConfigManager; import org.apache.solr.common.cloud.ZkConfigManager;
@ -174,24 +175,31 @@ public class ZkContainer {
return zkRun.substring(0, zkRun.lastIndexOf('/')); return zkRun.substring(0, zkRun.lastIndexOf('/'));
} }
public static Predicate<CoreDescriptor> testing_beforeRegisterInZk; public static volatile Predicate<CoreDescriptor> testing_beforeRegisterInZk;
public void registerInZk(final SolrCore core, boolean background, boolean skipRecovery) { public void registerInZk(final SolrCore core, boolean background, boolean skipRecovery) {
CoreDescriptor cd = core.getCoreDescriptor(); // save this here - the core may not have it later
Runnable r = () -> { Runnable r = () -> {
MDCLoggingContext.setCore(core); MDCLoggingContext.setCore(core);
try { try {
try { try {
if (testing_beforeRegisterInZk != null) { if (testing_beforeRegisterInZk != null) {
testing_beforeRegisterInZk.test(core.getCoreDescriptor()); testing_beforeRegisterInZk.test(cd);
}
if (!core.getCoreContainer().isShutDown()) {
zkController.register(core.getName(), cd, skipRecovery);
} }
zkController.register(core.getName(), core.getCoreDescriptor(), skipRecovery);
} catch (InterruptedException e) { } catch (InterruptedException e) {
// Restore the interrupted status // Restore the interrupted status
Thread.currentThread().interrupt(); Thread.currentThread().interrupt();
SolrException.log(log, "", e); SolrException.log(log, "", e);
} catch (KeeperException e) {
SolrException.log(log, "", e);
} catch (AlreadyClosedException e) {
} catch (Exception e) { } catch (Exception e) {
try { try {
zkController.publish(core.getCoreDescriptor(), Replica.State.DOWN); zkController.publish(cd, Replica.State.DOWN);
} catch (InterruptedException e1) { } catch (InterruptedException e1) {
Thread.currentThread().interrupt(); Thread.currentThread().interrupt();
log.error("", e1); log.error("", e1);

View File

@ -97,6 +97,7 @@ class CdcrReplicatorManager implements CdcrStateManager.CdcrStateObserver {
String targetCollection = params.get(CdcrParams.TARGET_COLLECTION_PARAM); String targetCollection = params.get(CdcrParams.TARGET_COLLECTION_PARAM);
CloudSolrClient client = new Builder(Collections.singletonList(zkHost), Optional.empty()) CloudSolrClient client = new Builder(Collections.singletonList(zkHost), Optional.empty())
.withSocketTimeout(30000).withConnectionTimeout(15000)
.sendUpdatesOnlyToShardLeaders() .sendUpdatesOnlyToShardLeaders()
.build(); .build();
client.setDefaultCollection(targetCollection); client.setDefaultCollection(targetCollection);

View File

@ -222,7 +222,7 @@ public class IndexFetcher {
httpClientParams.set(HttpClientUtil.PROP_BASIC_AUTH_PASS, httpBasicAuthPassword); httpClientParams.set(HttpClientUtil.PROP_BASIC_AUTH_PASS, httpBasicAuthPassword);
httpClientParams.set(HttpClientUtil.PROP_ALLOW_COMPRESSION, useCompression); httpClientParams.set(HttpClientUtil.PROP_ALLOW_COMPRESSION, useCompression);
return HttpClientUtil.createClient(httpClientParams, core.getCoreContainer().getUpdateShardHandler().getDefaultConnectionManager(), true); return HttpClientUtil.createClient(httpClientParams, core.getCoreContainer().getUpdateShardHandler().getRecoveryOnlyConnectionManager(), true);
} }
public IndexFetcher(final NamedList initArgs, final ReplicationHandler handler, final SolrCore sc) { public IndexFetcher(final NamedList initArgs, final ReplicationHandler handler, final SolrCore sc) {

View File

@ -197,7 +197,7 @@ public class ReplicationHandler extends RequestHandlerBase implements SolrCoreAw
private boolean replicateOnStart = false; private boolean replicateOnStart = false;
private ScheduledExecutorService executorService; private volatile ScheduledExecutorService executorService;
private volatile long executorStartTime; private volatile long executorStartTime;
@ -1369,6 +1369,8 @@ public class ReplicationHandler extends RequestHandlerBase implements SolrCoreAw
if (restoreFuture != null) { if (restoreFuture != null) {
restoreFuture.cancel(false); restoreFuture.cancel(false);
} }
ExecutorUtil.shutdownAndAwaitTermination(executorService);
} }
/** /**

View File

@ -125,7 +125,7 @@ public class AutoscalingHistoryHandler extends RequestHandlerBase implements Per
} }
} }
} }
try (CloudSolrClient cloudSolrClient = new CloudSolrClient.Builder(Collections.singletonList(coreContainer.getZkController().getZkServerAddress()), Optional.empty()) try (CloudSolrClient cloudSolrClient = new CloudSolrClient.Builder(Collections.singletonList(coreContainer.getZkController().getZkServerAddress()), Optional.empty()).withSocketTimeout(30000).withConnectionTimeout(15000)
.withHttpClient(coreContainer.getUpdateShardHandler().getDefaultHttpClient()) .withHttpClient(coreContainer.getUpdateShardHandler().getDefaultHttpClient())
.build()) { .build()) {
QueryResponse qr = cloudSolrClient.query(collection, params); QueryResponse qr = cloudSolrClient.query(collection, params);

View File

@ -31,6 +31,7 @@ import java.util.Map;
import java.util.Optional; import java.util.Optional;
import java.util.Set; import java.util.Set;
import java.util.concurrent.TimeUnit; import java.util.concurrent.TimeUnit;
import java.util.concurrent.TimeoutException;
import java.util.stream.Collectors; import java.util.stream.Collectors;
import com.google.common.collect.ImmutableList; import com.google.common.collect.ImmutableList;
@ -45,10 +46,10 @@ import org.apache.solr.client.solrj.request.CollectionAdminRequest;
import org.apache.solr.client.solrj.request.CoreAdminRequest.RequestSyncShard; import org.apache.solr.client.solrj.request.CoreAdminRequest.RequestSyncShard;
import org.apache.solr.client.solrj.response.RequestStatusState; import org.apache.solr.client.solrj.response.RequestStatusState;
import org.apache.solr.client.solrj.util.SolrIdentifierValidator; import org.apache.solr.client.solrj.util.SolrIdentifierValidator;
import org.apache.solr.cloud.Overseer;
import org.apache.solr.cloud.OverseerSolrResponse; import org.apache.solr.cloud.OverseerSolrResponse;
import org.apache.solr.cloud.OverseerTaskQueue; import org.apache.solr.cloud.OverseerTaskQueue;
import org.apache.solr.cloud.OverseerTaskQueue.QueueEvent; import org.apache.solr.cloud.OverseerTaskQueue.QueueEvent;
import org.apache.solr.cloud.ZkController.NotInClusterStateException;
import org.apache.solr.cloud.ZkController; import org.apache.solr.cloud.ZkController;
import org.apache.solr.cloud.ZkShardTerms; import org.apache.solr.cloud.ZkShardTerms;
import org.apache.solr.cloud.overseer.SliceMutator; import org.apache.solr.cloud.overseer.SliceMutator;
@ -285,7 +286,7 @@ public class CollectionsHandler extends RequestHandlerBase implements Permission
} else { } else {
// submits and doesn't wait for anything (no response) // submits and doesn't wait for anything (no response)
Overseer.getStateUpdateQueue(coreContainer.getZkController().getZkClient()).offer(Utils.toJSON(props)); coreContainer.getZkController().getOverseer().offerStateUpdate(Utils.toJSON(props));
} }
} }
@ -1249,28 +1250,30 @@ public class CollectionsHandler extends RequestHandlerBase implements Permission
return; return;
} }
int replicaFailCount;
if (createCollResponse.getResponse().get("failure") != null) { if (createCollResponse.getResponse().get("failure") != null) {
// TODO: we should not wait for Replicas we know failed replicaFailCount = ((NamedList) createCollResponse.getResponse().get("failure")).size();
} else {
replicaFailCount = 0;
} }
String replicaNotAlive = null;
String replicaState = null;
String nodeNotLive = null;
CloudConfig ccfg = cc.getConfig().getCloudConfig(); CloudConfig ccfg = cc.getConfig().getCloudConfig();
Integer numRetries = ccfg.getCreateCollectionWaitTimeTillActive(); // this config is actually # seconds, not # tries Integer seconds = ccfg.getCreateCollectionWaitTimeTillActive();
Boolean checkLeaderOnly = ccfg.isCreateCollectionCheckLeaderActive(); Boolean checkLeaderOnly = ccfg.isCreateCollectionCheckLeaderActive();
log.info("Wait for new collection to be active for at most " + numRetries + " seconds. Check all shard " log.info("Wait for new collection to be active for at most " + seconds + " seconds. Check all shard "
+ (checkLeaderOnly ? "leaders" : "replicas")); + (checkLeaderOnly ? "leaders" : "replicas"));
ZkStateReader zkStateReader = cc.getZkController().getZkStateReader();
for (int i = 0; i < numRetries; i++) {
ClusterState clusterState = zkStateReader.getClusterState();
final DocCollection docCollection = clusterState.getCollectionOrNull(collectionName); try {
cc.getZkController().getZkStateReader().waitForState(collectionName, seconds, TimeUnit.SECONDS, (n, c) -> {
if (docCollection != null && docCollection.getSlices() != null) { if (c == null) {
Collection<Slice> shards = docCollection.getSlices(); // the collection was not created, don't wait
replicaNotAlive = null; return true;
}
if (c.getSlices() != null) {
Collection<Slice> shards = c.getSlices();
int replicaNotAliveCnt = 0;
for (Slice shard : shards) { for (Slice shard : shards) {
Collection<Replica> replicas; Collection<Replica> replicas;
if (!checkLeaderOnly) replicas = shard.getReplicas(); if (!checkLeaderOnly) replicas = shard.getReplicas();
@ -1282,28 +1285,24 @@ public class CollectionsHandler extends RequestHandlerBase implements Permission
String state = replica.getStr(ZkStateReader.STATE_PROP); String state = replica.getStr(ZkStateReader.STATE_PROP);
log.debug("Checking replica status, collection={} replica={} state={}", collectionName, log.debug("Checking replica status, collection={} replica={} state={}", collectionName,
replica.getCoreUrl(), state); replica.getCoreUrl(), state);
if (!clusterState.liveNodesContain(replica.getNodeName()) if (!n.contains(replica.getNodeName())
|| !state.equals(Replica.State.ACTIVE.toString())) { || !state.equals(Replica.State.ACTIVE.toString())) {
replicaNotAlive = replica.getCoreUrl(); replicaNotAliveCnt++;
nodeNotLive = replica.getNodeName(); return false;
replicaState = state;
break;
} }
} }
if (replicaNotAlive != null) break;
} }
if (replicaNotAlive == null) return; if ((replicaNotAliveCnt == 0) || (replicaNotAliveCnt <= replicaFailCount)) return true;
} }
Thread.sleep(1000); // thus numRetries is roughly number of seconds return false;
} });
if (nodeNotLive != null && replicaState != null) { } catch (TimeoutException | InterruptedException e) {
log.error("Timed out waiting for new collection's replicas to become ACTIVE "
+ (replicaState.equals(Replica.State.ACTIVE.toString()) ? "node " + nodeNotLive + " is not live" String error = "Timeout waiting for active collection " + collectionName + " with timeout=" + seconds;
: "replica " + replicaNotAlive + " is in state of " + replicaState.toString()) + " with timeout=" + numRetries); throw new NotInClusterStateException(ErrorCode.SERVER_ERROR, error);
} else {
log.error("Timed out waiting for new collection's replicas to become ACTIVE with timeout=" + numRetries);
} }
} }
public static void verifyRuleParams(CoreContainer cc, Map<String, Object> m) { public static void verifyRuleParams(CoreContainer cc, Map<String, Object> m) {

View File

@ -371,7 +371,7 @@ public class CoreAdminHandler extends RequestHandlerBase implements PermissionNa
* Method to ensure shutting down of the ThreadPool Executor. * Method to ensure shutting down of the ThreadPool Executor.
*/ */
public void shutdown() { public void shutdown() {
if (parallelExecutor != null && !parallelExecutor.isShutdown()) if (parallelExecutor != null)
ExecutorUtil.shutdownAndAwaitTermination(parallelExecutor); ExecutorUtil.shutdownAndAwaitTermination(parallelExecutor);
} }

View File

@ -642,7 +642,17 @@ public class MetricsHistoryHandler extends RequestHandlerBase implements Permiss
public void close() { public void close() {
log.debug("Closing " + hashCode()); log.debug("Closing " + hashCode());
if (collectService != null) { if (collectService != null) {
boolean shutdown = false;
while (!shutdown) {
try {
// Wait a while for existing tasks to terminate
collectService.shutdownNow(); collectService.shutdownNow();
shutdown = collectService.awaitTermination(5, TimeUnit.SECONDS);
} catch (InterruptedException ie) {
// Preserve interrupt status
Thread.currentThread().interrupt();
}
}
} }
if (factory != null) { if (factory != null) {
factory.close(); factory.close();

View File

@ -18,13 +18,15 @@
package org.apache.solr.handler.admin; package org.apache.solr.handler.admin;
import java.lang.invoke.MethodHandles; import java.lang.invoke.MethodHandles;
import java.util.Objects; import java.util.concurrent.TimeUnit;
import java.util.concurrent.TimeoutException;
import java.util.concurrent.atomic.AtomicReference;
import org.apache.solr.cloud.CloudDescriptor; import org.apache.solr.cloud.CloudDescriptor;
import org.apache.solr.cloud.ZkController.NotInClusterStateException;
import org.apache.solr.cloud.ZkShardTerms; import org.apache.solr.cloud.ZkShardTerms;
import org.apache.solr.common.SolrException; import org.apache.solr.common.SolrException;
import org.apache.solr.common.cloud.ClusterState; import org.apache.solr.common.SolrException.ErrorCode;
import org.apache.solr.common.cloud.DocCollection;
import org.apache.solr.common.cloud.Replica; import org.apache.solr.common.cloud.Replica;
import org.apache.solr.common.cloud.Slice; import org.apache.solr.common.cloud.Slice;
import org.apache.solr.common.cloud.ZkStateReader; import org.apache.solr.common.cloud.ZkStateReader;
@ -47,10 +49,7 @@ class PrepRecoveryOp implements CoreAdminHandler.CoreAdminOp {
final SolrParams params = it.req.getParams(); final SolrParams params = it.req.getParams();
String cname = params.get(CoreAdminParams.CORE); String cname = params.get(CoreAdminParams.CORE, "");
if (cname == null) {
cname = "";
}
String nodeName = params.get("nodeName"); String nodeName = params.get("nodeName");
String coreNodeName = params.get("coreNodeName"); String coreNodeName = params.get("coreNodeName");
@ -59,55 +58,46 @@ class PrepRecoveryOp implements CoreAdminHandler.CoreAdminOp {
Boolean onlyIfLeader = params.getBool("onlyIfLeader"); Boolean onlyIfLeader = params.getBool("onlyIfLeader");
Boolean onlyIfLeaderActive = params.getBool("onlyIfLeaderActive"); Boolean onlyIfLeaderActive = params.getBool("onlyIfLeaderActive");
CoreContainer coreContainer = it.handler.coreContainer; CoreContainer coreContainer = it.handler.coreContainer;
// wait long enough for the leader conflict to work itself out plus a little extra // wait long enough for the leader conflict to work itself out plus a little extra
int conflictWaitMs = coreContainer.getZkController().getLeaderConflictResolveWait(); int conflictWaitMs = coreContainer.getZkController().getLeaderConflictResolveWait();
int maxTries = (int) Math.round(conflictWaitMs / 1000) + 3; log.info(
log.info("Going to wait for coreNodeName: {}, state: {}, checkLive: {}, onlyIfLeader: {}, onlyIfLeaderActive: {}, maxTime: {} s", "Going to wait for coreNodeName: {}, state: {}, checkLive: {}, onlyIfLeader: {}, onlyIfLeaderActive: {}",
coreNodeName, waitForState, checkLive, onlyIfLeader, onlyIfLeaderActive, maxTries); coreNodeName, waitForState, checkLive, onlyIfLeader, onlyIfLeaderActive);
Replica.State state = null; String collectionName;
boolean live = false; CloudDescriptor cloudDescriptor;
int retry = 0;
while (true) {
try (SolrCore core = coreContainer.getCore(cname)) { try (SolrCore core = coreContainer.getCore(cname)) {
if (core == null && retry == Math.min(30, maxTries)) { if (core == null) throw new SolrException(SolrException.ErrorCode.BAD_REQUEST, "core not found:" + cname);
throw new SolrException(SolrException.ErrorCode.BAD_REQUEST, "core not found:" collectionName = core.getCoreDescriptor().getCloudDescriptor().getCollectionName();
+ cname); cloudDescriptor = core.getCoreDescriptor()
.getCloudDescriptor();
} }
if (core != null) { AtomicReference<String> errorMessage = new AtomicReference<>();
try {
coreContainer.getZkController().getZkStateReader().waitForState(collectionName, conflictWaitMs, TimeUnit.MILLISECONDS, (n, c) -> {
if (c == null)
return false;
try (SolrCore core = coreContainer.getCore(cname)) {
if (core == null) throw new SolrException(SolrException.ErrorCode.BAD_REQUEST, "core not found:" + cname);
if (onlyIfLeader != null && onlyIfLeader) { if (onlyIfLeader != null && onlyIfLeader) {
if (!core.getCoreDescriptor().getCloudDescriptor().isLeader()) { if (!core.getCoreDescriptor().getCloudDescriptor().isLeader()) {
throw new SolrException(SolrException.ErrorCode.BAD_REQUEST, "We are not the leader"); throw new SolrException(SolrException.ErrorCode.BAD_REQUEST, "We are not the leader");
} }
} }
}
// wait until we are sure the recovering node is ready // wait until we are sure the recovering node is ready
// to accept updates // to accept updates
CloudDescriptor cloudDescriptor = core.getCoreDescriptor() Replica.State state = null;
.getCloudDescriptor(); boolean live = false;
String collectionName = cloudDescriptor.getCollectionName(); Slice slice = c.getSlice(cloudDescriptor.getShardId());
if (retry % 15 == 0) {
if (retry > 0 && log.isInfoEnabled())
log.info("After " + retry + " seconds, core " + cname + " (" +
cloudDescriptor.getShardId() + " of " +
cloudDescriptor.getCollectionName() + ") still does not have state: " +
waitForState + "; forcing ClusterState update from ZooKeeper");
// force a cluster state update
coreContainer.getZkController().getZkStateReader().forceUpdateCollection(collectionName);
}
ClusterState clusterState = coreContainer.getZkController().getClusterState();
DocCollection collection = clusterState.getCollection(collectionName);
Slice slice = collection.getSlice(cloudDescriptor.getShardId());
if (slice != null) { if (slice != null) {
final Replica replica = slice.getReplicasMap().get(coreNodeName); final Replica replica = slice.getReplicasMap().get(coreNodeName);
if (replica != null) { if (replica != null) {
state = replica.getState(); state = replica.getState();
live = clusterState.liveNodesContain(nodeName); live = n.contains(nodeName);
final Replica.State localState = cloudDescriptor.getLastPublished(); final Replica.State localState = cloudDescriptor.getLastPublished();
@ -116,76 +106,62 @@ class PrepRecoveryOp implements CoreAdminHandler.CoreAdminOp {
// this is a safeguard // this is a safeguard
boolean leaderDoesNotNeedRecovery = (onlyIfLeader != null && boolean leaderDoesNotNeedRecovery = (onlyIfLeader != null &&
onlyIfLeader && onlyIfLeader &&
core.getName().equals(replica.getStr("core")) && cname.equals(replica.getStr("core")) &&
waitForState == Replica.State.RECOVERING && waitForState == Replica.State.RECOVERING &&
localState == Replica.State.ACTIVE && localState == Replica.State.ACTIVE &&
state == Replica.State.ACTIVE); state == Replica.State.ACTIVE);
if (leaderDoesNotNeedRecovery) { if (leaderDoesNotNeedRecovery) {
log.warn("Leader " + core.getName() + " ignoring request to be in the recovering state because it is live and active."); log.warn(
"Leader " + cname + " ignoring request to be in the recovering state because it is live and active.");
} }
ZkShardTerms shardTerms = coreContainer.getZkController().getShardTerms(collectionName, slice.getName()); ZkShardTerms shardTerms = coreContainer.getZkController().getShardTerms(collectionName, slice.getName());
// if the replica is waiting for leader to see recovery state, the leader should refresh its terms // if the replica is waiting for leader to see recovery state, the leader should refresh its terms
if (waitForState == Replica.State.RECOVERING && shardTerms.registered(coreNodeName) && shardTerms.skipSendingUpdatesTo(coreNodeName)) { if (waitForState == Replica.State.RECOVERING && shardTerms.registered(coreNodeName)
&& shardTerms.skipSendingUpdatesTo(coreNodeName)) {
// The replica changed it term, then published itself as RECOVERING. // The replica changed it term, then published itself as RECOVERING.
// This core already see replica as RECOVERING // This core already see replica as RECOVERING
// so it is guarantees that a live-fetch will be enough for this core to see max term published // so it is guarantees that a live-fetch will be enough for this core to see max term published
shardTerms.refreshTerms(); shardTerms.refreshTerms();
} }
boolean onlyIfActiveCheckResult = onlyIfLeaderActive != null && onlyIfLeaderActive && localState != Replica.State.ACTIVE; boolean onlyIfActiveCheckResult = onlyIfLeaderActive != null && onlyIfLeaderActive
log.info("In WaitForState(" + waitForState + "): collection=" + collectionName + ", shard=" + slice.getName() + && localState != Replica.State.ACTIVE;
", thisCore=" + core.getName() + ", leaderDoesNotNeedRecovery=" + leaderDoesNotNeedRecovery + log.info(
", isLeader? " + core.getCoreDescriptor().getCloudDescriptor().isLeader() + "In WaitForState(" + waitForState + "): collection=" + collectionName + ", shard=" + slice.getName() +
", live=" + live + ", checkLive=" + checkLive + ", currentState=" + state.toString() + ", localState=" + localState + ", nodeName=" + nodeName + ", thisCore=" + cname + ", leaderDoesNotNeedRecovery=" + leaderDoesNotNeedRecovery +
", coreNodeName=" + coreNodeName + ", onlyIfActiveCheckResult=" + onlyIfActiveCheckResult + ", nodeProps: " + replica); ", isLeader? " + cloudDescriptor.isLeader() +
", live=" + live + ", checkLive=" + checkLive + ", currentState=" + state.toString()
+ ", localState=" + localState + ", nodeName=" + nodeName +
", coreNodeName=" + coreNodeName + ", onlyIfActiveCheckResult=" + onlyIfActiveCheckResult
+ ", nodeProps: " + replica);
if (!onlyIfActiveCheckResult && replica != null && (state == waitForState || leaderDoesNotNeedRecovery)) { if (!onlyIfActiveCheckResult && replica != null && (state == waitForState || leaderDoesNotNeedRecovery)) {
if (checkLive == null) { if (checkLive == null) {
break; return true;
} else if (checkLive && live) { } else if (checkLive && live) {
break; return true;
} else if (!checkLive && !live) { } else if (!checkLive && !live) {
break; return true;
} }
} }
} }
} }
}
if (retry++ == maxTries) {
String collection = null;
String leaderInfo = null;
String shardId = null;
try {
CloudDescriptor cloudDescriptor =
core.getCoreDescriptor().getCloudDescriptor();
collection = cloudDescriptor.getCollectionName();
shardId = cloudDescriptor.getShardId();
leaderInfo = coreContainer.getZkController().
getZkStateReader().getLeaderUrl(collection, shardId, 5000);
} catch (Exception exc) {
leaderInfo = "Not available due to: " + exc;
}
throw new SolrException(SolrException.ErrorCode.BAD_REQUEST,
"I was asked to wait on state " + waitForState + " for "
+ shardId + " in " + collection + " on " + nodeName
+ " but I still do not see the requested state. I see state: "
+ Objects.toString(state) + " live:" + live + " leader from ZK: " + leaderInfo);
}
if (coreContainer.isShutDown()) { if (coreContainer.isShutDown()) {
throw new SolrException(SolrException.ErrorCode.BAD_REQUEST, throw new SolrException(SolrException.ErrorCode.BAD_REQUEST,
"Solr is shutting down"); "Solr is shutting down");
} }
}
Thread.sleep(1000); return false;
});
} catch (TimeoutException | InterruptedException e) {
String error = errorMessage.get();
if (error == null)
error = "Timeout waiting for collection state.";
throw new NotInClusterStateException(ErrorCode.SERVER_ERROR, error);
} }
log.info("Waited coreNodeName: " + coreNodeName + ", state: " + waitForState
+ ", checkLive: " + checkLive + ", onlyIfLeader: " + onlyIfLeader + " for: " + retry + " seconds.");
} }
} }

View File

@ -16,13 +16,16 @@
*/ */
package org.apache.solr.handler.component; package org.apache.solr.handler.component;
import java.lang.invoke.MethodHandles; import static org.apache.solr.common.params.CommonParams.DISTRIB;
import java.util.concurrent.Callable;
import java.util.concurrent.Future;
import java.util.concurrent.ExecutorService;
import java.util.List;
import java.util.ArrayList;
import java.lang.invoke.MethodHandles;
import java.util.ArrayList;
import java.util.List;
import java.util.concurrent.Callable;
import java.util.concurrent.ExecutorService;
import java.util.concurrent.Future;
import org.apache.http.impl.client.CloseableHttpClient;
import org.apache.solr.client.solrj.SolrRequest; import org.apache.solr.client.solrj.SolrRequest;
import org.apache.solr.client.solrj.impl.HttpClientUtil; import org.apache.solr.client.solrj.impl.HttpClientUtil;
import org.apache.solr.client.solrj.impl.HttpSolrClient; import org.apache.solr.client.solrj.impl.HttpSolrClient;
@ -34,16 +37,14 @@ import org.apache.solr.common.params.ModifiableSolrParams;
import org.apache.solr.common.util.ExecutorUtil; import org.apache.solr.common.util.ExecutorUtil;
import org.apache.solr.common.util.SolrjNamedThreadFactory; import org.apache.solr.common.util.SolrjNamedThreadFactory;
import org.apache.solr.search.SolrIndexSearcher; import org.apache.solr.search.SolrIndexSearcher;
import org.apache.http.client.HttpClient;
import org.slf4j.Logger; import org.slf4j.Logger;
import org.slf4j.LoggerFactory; import org.slf4j.LoggerFactory;
import static org.apache.solr.common.params.CommonParams.DISTRIB;
public abstract class IterativeMergeStrategy implements MergeStrategy { public abstract class IterativeMergeStrategy implements MergeStrategy {
protected ExecutorService executorService; protected volatile ExecutorService executorService;
protected static HttpClient httpClient;
protected volatile CloseableHttpClient httpClient;
private static final Logger log = LoggerFactory.getLogger(MethodHandles.lookup().lookupClass()); private static final Logger log = LoggerFactory.getLogger(MethodHandles.lookup().lookupClass());
@ -51,11 +52,13 @@ public abstract class IterativeMergeStrategy implements MergeStrategy {
rb._responseDocs = new SolrDocumentList(); // Null pointers will occur otherwise. rb._responseDocs = new SolrDocumentList(); // Null pointers will occur otherwise.
rb.onePassDistributedQuery = true; // Turn off the second pass distributed. rb.onePassDistributedQuery = true; // Turn off the second pass distributed.
executorService = ExecutorUtil.newMDCAwareCachedThreadPool(new SolrjNamedThreadFactory("IterativeMergeStrategy")); executorService = ExecutorUtil.newMDCAwareCachedThreadPool(new SolrjNamedThreadFactory("IterativeMergeStrategy"));
httpClient = getHttpClient();
try { try {
process(rb, sreq); process(rb, sreq);
} catch (Exception e) { } catch (Exception e) {
throw new RuntimeException(e); throw new RuntimeException(e);
} finally { } finally {
HttpClientUtil.close(httpClient);
executorService.shutdownNow(); executorService.shutdownNow();
} }
} }
@ -76,7 +79,7 @@ public abstract class IterativeMergeStrategy implements MergeStrategy {
} }
public static class CallBack implements Callable<CallBack> { public class CallBack implements Callable<CallBack> {
private HttpSolrClient solrClient; private HttpSolrClient solrClient;
private QueryRequest req; private QueryRequest req;
private QueryResponse response; private QueryResponse response;
@ -85,7 +88,7 @@ public abstract class IterativeMergeStrategy implements MergeStrategy {
public CallBack(ShardResponse originalShardResponse, QueryRequest req) { public CallBack(ShardResponse originalShardResponse, QueryRequest req) {
this.solrClient = new Builder(originalShardResponse.getShardAddress()) this.solrClient = new Builder(originalShardResponse.getShardAddress())
.withHttpClient(getHttpClient()) .withHttpClient(httpClient)
.build(); .build();
this.req = req; this.req = req;
this.originalShardResponse = originalShardResponse; this.originalShardResponse = originalShardResponse;
@ -122,16 +125,16 @@ public abstract class IterativeMergeStrategy implements MergeStrategy {
protected abstract void process(ResponseBuilder rb, ShardRequest sreq) throws Exception; protected abstract void process(ResponseBuilder rb, ShardRequest sreq) throws Exception;
static synchronized HttpClient getHttpClient() { private CloseableHttpClient getHttpClient() {
if(httpClient == null) {
ModifiableSolrParams params = new ModifiableSolrParams(); ModifiableSolrParams params = new ModifiableSolrParams();
params.set(HttpClientUtil.PROP_MAX_CONNECTIONS, 128); params.set(HttpClientUtil.PROP_MAX_CONNECTIONS, 128);
params.set(HttpClientUtil.PROP_MAX_CONNECTIONS_PER_HOST, 32); params.set(HttpClientUtil.PROP_MAX_CONNECTIONS_PER_HOST, 32);
httpClient = HttpClientUtil.createClient(params); CloseableHttpClient httpClient = HttpClientUtil.createClient(params);
return httpClient;
} else {
return httpClient; return httpClient;
} }
} }
}

View File

@ -38,7 +38,6 @@ import org.apache.solr.common.util.DataInputInputStream;
import org.apache.solr.common.util.FastInputStream; import org.apache.solr.common.util.FastInputStream;
import org.apache.solr.common.util.JavaBinCodec; import org.apache.solr.common.util.JavaBinCodec;
import org.apache.solr.common.util.NamedList; import org.apache.solr.common.util.NamedList;
import org.apache.solr.handler.RequestHandlerUtils;
import org.apache.solr.request.SolrQueryRequest; import org.apache.solr.request.SolrQueryRequest;
import org.apache.solr.response.SolrQueryResponse; import org.apache.solr.response.SolrQueryResponse;
import org.apache.solr.update.AddUpdateCommand; import org.apache.solr.update.AddUpdateCommand;
@ -89,13 +88,6 @@ public class JavabinLoader extends ContentStreamLoader {
@Override @Override
public void update(SolrInputDocument document, UpdateRequest updateRequest, Integer commitWithin, Boolean overwrite) { public void update(SolrInputDocument document, UpdateRequest updateRequest, Integer commitWithin, Boolean overwrite) {
if (document == null) { if (document == null) {
// Perhaps commit from the parameters
try {
RequestHandlerUtils.handleCommit(req, processor, updateRequest.getParams(), false);
RequestHandlerUtils.handleRollback(req, processor, updateRequest.getParams(), false);
} catch (IOException e) {
throw new SolrException(SolrException.ErrorCode.SERVER_ERROR, "ERROR handling commit/rollback");
}
return; return;
} }
if (addCmd == null) { if (addCmd == null) {

View File

@ -53,7 +53,7 @@ class SolrSchema extends AbstractSchema {
@Override @Override
protected Map<String, Table> getTableMap() { protected Map<String, Table> getTableMap() {
String zk = this.properties.getProperty("zk"); String zk = this.properties.getProperty("zk");
try(CloudSolrClient cloudSolrClient = new CloudSolrClient.Builder(Collections.singletonList(zk), Optional.empty()).build()) { try(CloudSolrClient cloudSolrClient = new CloudSolrClient.Builder(Collections.singletonList(zk), Optional.empty()).withSocketTimeout(30000).withConnectionTimeout(15000).build()) {
cloudSolrClient.connect(); cloudSolrClient.connect();
ZkStateReader zkStateReader = cloudSolrClient.getZkStateReader(); ZkStateReader zkStateReader = cloudSolrClient.getZkStateReader();
ClusterState clusterState = zkStateReader.getClusterState(); ClusterState clusterState = zkStateReader.getClusterState();
@ -77,7 +77,7 @@ class SolrSchema extends AbstractSchema {
private Map<String, LukeResponse.FieldInfo> getFieldInfo(String collection) { private Map<String, LukeResponse.FieldInfo> getFieldInfo(String collection) {
String zk = this.properties.getProperty("zk"); String zk = this.properties.getProperty("zk");
try(CloudSolrClient cloudSolrClient = new CloudSolrClient.Builder(Collections.singletonList(zk), Optional.empty()).build()) { try(CloudSolrClient cloudSolrClient = new CloudSolrClient.Builder(Collections.singletonList(zk), Optional.empty()).withSocketTimeout(30000).withConnectionTimeout(15000).build()) {
cloudSolrClient.connect(); cloudSolrClient.connect();
LukeRequest lukeRequest = new LukeRequest(); LukeRequest lukeRequest = new LukeRequest();
lukeRequest.setNumTerms(0); lukeRequest.setNumTerms(0);

View File

@ -34,8 +34,6 @@ import java.util.concurrent.Future;
import java.util.concurrent.FutureTask; import java.util.concurrent.FutureTask;
import java.util.concurrent.RunnableFuture; import java.util.concurrent.RunnableFuture;
import java.util.concurrent.Semaphore; import java.util.concurrent.Semaphore;
import java.util.concurrent.SynchronousQueue;
import java.util.concurrent.TimeUnit;
import java.util.function.Predicate; import java.util.function.Predicate;
import org.apache.lucene.index.LeafReader; import org.apache.lucene.index.LeafReader;
@ -66,7 +64,6 @@ import org.apache.solr.common.params.FacetParams;
import org.apache.solr.common.params.GroupParams; import org.apache.solr.common.params.GroupParams;
import org.apache.solr.common.params.RequiredSolrParams; import org.apache.solr.common.params.RequiredSolrParams;
import org.apache.solr.common.params.SolrParams; import org.apache.solr.common.params.SolrParams;
import org.apache.solr.common.util.ExecutorUtil;
import org.apache.solr.common.util.NamedList; import org.apache.solr.common.util.NamedList;
import org.apache.solr.common.util.SimpleOrderedMap; import org.apache.solr.common.util.SimpleOrderedMap;
import org.apache.solr.common.util.StrUtils; import org.apache.solr.common.util.StrUtils;
@ -93,7 +90,6 @@ import org.apache.solr.search.facet.FacetDebugInfo;
import org.apache.solr.search.facet.FacetRequest; import org.apache.solr.search.facet.FacetRequest;
import org.apache.solr.search.grouping.GroupingSpecification; import org.apache.solr.search.grouping.GroupingSpecification;
import org.apache.solr.util.BoundedTreeSet; import org.apache.solr.util.BoundedTreeSet;
import org.apache.solr.util.DefaultSolrThreadFactory;
import org.apache.solr.util.RTimer; import org.apache.solr.util.RTimer;
import org.slf4j.Logger; import org.slf4j.Logger;
import org.slf4j.LoggerFactory; import org.slf4j.LoggerFactory;
@ -170,6 +166,7 @@ public class SimpleFacets {
this.docsOrig = docs; this.docsOrig = docs;
this.global = params; this.global = params;
this.rb = rb; this.rb = rb;
this.facetExecutor = req.getCore().getCoreContainer().getUpdateShardHandler().getUpdateExecutor();
} }
public void setFacetDebugInfo(FacetDebugInfo fdebugParent) { public void setFacetDebugInfo(FacetDebugInfo fdebugParent) {
@ -773,13 +770,7 @@ public class SimpleFacets {
} }
}; };
static final Executor facetExecutor = new ExecutorUtil.MDCAwareThreadPoolExecutor( private final Executor facetExecutor;
0,
Integer.MAX_VALUE,
10, TimeUnit.SECONDS, // terminate idle threads after 10 sec
new SynchronousQueue<Runnable>() // directly hand off tasks
, new DefaultSolrThreadFactory("facetExecutor")
);
/** /**
* Returns a list of value constraints and the associated facet counts * Returns a list of value constraints and the associated facet counts

View File

@ -55,7 +55,7 @@ public class SolrRequestInfo {
SolrRequestInfo prev = threadLocal.get(); SolrRequestInfo prev = threadLocal.get();
if (prev != null) { if (prev != null) {
log.error("Previous SolrRequestInfo was not closed! req=" + prev.req.getOriginalParams().toString()); log.error("Previous SolrRequestInfo was not closed! req=" + prev.req.getOriginalParams().toString());
log.error("prev == info : {}", prev.req == info.req); log.error("prev == info : {}", prev.req == info.req, new RuntimeException());
} }
assert prev == null; assert prev == null;

View File

@ -60,7 +60,7 @@ public class PKIAuthenticationPlugin extends AuthenticationPlugin implements Htt
private final Map<String, PublicKey> keyCache = new ConcurrentHashMap<>(); private final Map<String, PublicKey> keyCache = new ConcurrentHashMap<>();
private final PublicKeyHandler publicKeyHandler; private final PublicKeyHandler publicKeyHandler;
private final CoreContainer cores; private final CoreContainer cores;
private final int MAX_VALIDITY = Integer.parseInt(System.getProperty("pkiauth.ttl", "10000")); private final int MAX_VALIDITY = Integer.parseInt(System.getProperty("pkiauth.ttl", "15000"));
private final String myNodeName; private final String myNodeName;
private final HttpHeaderClientInterceptor interceptor = new HttpHeaderClientInterceptor(); private final HttpHeaderClientInterceptor interceptor = new HttpHeaderClientInterceptor();
private boolean interceptorRegistered = false; private boolean interceptorRegistered = false;

View File

@ -885,9 +885,8 @@ public class HttpSolrCall {
boolean byCoreName = false; boolean byCoreName = false;
if (slices == null) { if (slices == null) {
activeSlices = new ArrayList<>();
// look by core name
byCoreName = true; byCoreName = true;
activeSlices = new ArrayList<>();
getSlicesForCollections(clusterState, activeSlices, true); getSlicesForCollections(clusterState, activeSlices, true);
if (activeSlices.isEmpty()) { if (activeSlices.isEmpty()) {
getSlicesForCollections(clusterState, activeSlices, false); getSlicesForCollections(clusterState, activeSlices, false);
@ -930,7 +929,7 @@ public class HttpSolrCall {
if (!activeReplicas || (liveNodes.contains(replica.getNodeName()) if (!activeReplicas || (liveNodes.contains(replica.getNodeName())
&& replica.getState() == Replica.State.ACTIVE)) { && replica.getState() == Replica.State.ACTIVE)) {
if (byCoreName && !collectionName.equals(replica.getStr(CORE_NAME_PROP))) { if (byCoreName && !origCorename.equals(replica.getStr(CORE_NAME_PROP))) {
// if it's by core name, make sure they match // if it's by core name, make sure they match
continue; continue;
} }

View File

@ -102,6 +102,7 @@ public class SolrDispatchFilter extends BaseSolrFilter {
private final String metricTag = Integer.toHexString(hashCode()); private final String metricTag = Integer.toHexString(hashCode());
private SolrMetricManager metricManager; private SolrMetricManager metricManager;
private String registryName; private String registryName;
private volatile boolean closeOnDestroy = true;
/** /**
* Enum to define action that needs to be processed. * Enum to define action that needs to be processed.
@ -294,11 +295,22 @@ public class SolrDispatchFilter extends BaseSolrFilter {
@Override @Override
public void destroy() { public void destroy() {
if (closeOnDestroy) {
close();
}
}
public void close() {
CoreContainer cc = cores;
cores = null;
try {
try { try {
FileCleaningTracker fileCleaningTracker = SolrRequestParsers.fileCleaningTracker; FileCleaningTracker fileCleaningTracker = SolrRequestParsers.fileCleaningTracker;
if (fileCleaningTracker != null) { if (fileCleaningTracker != null) {
fileCleaningTracker.exitWhenFinished(); fileCleaningTracker.exitWhenFinished();
} }
} catch (NullPointerException e) {
// okay
} catch (Exception e) { } catch (Exception e) {
log.warn("Exception closing FileCleaningTracker", e); log.warn("Exception closing FileCleaningTracker", e);
} finally { } finally {
@ -306,14 +318,20 @@ public class SolrDispatchFilter extends BaseSolrFilter {
} }
if (metricManager != null) { if (metricManager != null) {
metricManager.unregisterGauges(registryName, metricTag);
}
if (cores != null) {
try { try {
cores.shutdown(); metricManager.unregisterGauges(registryName, metricTag);
} catch (NullPointerException e) {
// okay
} catch (Exception e) {
log.warn("Exception closing FileCleaningTracker", e);
} finally { } finally {
cores = null; metricManager = null;
}
}
} finally {
if (cc != null) {
httpClient = null;
cc.shutdown();
} }
} }
} }
@ -594,4 +612,8 @@ public class SolrDispatchFilter extends BaseSolrFilter {
return response; return response;
} }
} }
public void closeOnDestroy(boolean closeOnDestroy) {
this.closeOnDestroy = closeOnDestroy;
}
} }

View File

@ -59,7 +59,7 @@ public final class CommitTracker implements Runnable {
private long tLogFileSizeUpperBound; private long tLogFileSizeUpperBound;
private final ScheduledExecutorService scheduler = private final ScheduledExecutorService scheduler =
Executors.newScheduledThreadPool(1, new DefaultSolrThreadFactory("commitScheduler")); Executors.newScheduledThreadPool(0, new DefaultSolrThreadFactory("commitScheduler"));
private ScheduledFuture pending; private ScheduledFuture pending;
// state // state

View File

@ -814,7 +814,7 @@ public class DirectUpdateHandler2 extends UpdateHandler implements SolrCoreState
} }
public static boolean commitOnClose = true; // TODO: make this a real config option or move it to TestInjection public static volatile boolean commitOnClose = true; // TODO: make this a real config option or move it to TestInjection
// IndexWriterCloser interface method - called from solrCoreState.decref(this) // IndexWriterCloser interface method - called from solrCoreState.decref(this)
@Override @Override
@ -823,16 +823,14 @@ public class DirectUpdateHandler2 extends UpdateHandler implements SolrCoreState
assert TestInjection.injectNonGracefullClose(core.getCoreContainer()); assert TestInjection.injectNonGracefullClose(core.getCoreContainer());
boolean clearRequestInfo = false; boolean clearRequestInfo = false;
solrCoreState.getCommitLock().lock();
try {
SolrQueryRequest req = new LocalSolrQueryRequest(core, new ModifiableSolrParams()); SolrQueryRequest req = new LocalSolrQueryRequest(core, new ModifiableSolrParams());
SolrQueryResponse rsp = new SolrQueryResponse(); SolrQueryResponse rsp = new SolrQueryResponse();
if (SolrRequestInfo.getRequestInfo() == null) { if (SolrRequestInfo.getRequestInfo() == null) {
clearRequestInfo = true; clearRequestInfo = true;
SolrRequestInfo.setRequestInfo(new SolrRequestInfo(req, rsp)); // important for debugging SolrRequestInfo.setRequestInfo(new SolrRequestInfo(req, rsp)); // important for debugging
} }
try {
if (!commitOnClose) { if (!commitOnClose) {
if (writer != null) { if (writer != null) {
writer.rollback(); writer.rollback();
@ -846,8 +844,12 @@ public class DirectUpdateHandler2 extends UpdateHandler implements SolrCoreState
} }
// do a commit before we quit? // do a commit before we quit?
boolean tryToCommit = writer != null && ulog != null && ulog.hasUncommittedChanges() && ulog.getState() == UpdateLog.State.ACTIVE; boolean tryToCommit = writer != null && ulog != null && ulog.hasUncommittedChanges()
&& ulog.getState() == UpdateLog.State.ACTIVE;
// be tactical with this lock! closing the updatelog can deadlock when it tries to commit
solrCoreState.getCommitLock().lock();
try {
try { try {
if (tryToCommit) { if (tryToCommit) {
log.info("Committing on IndexWriter close."); log.info("Committing on IndexWriter close.");
@ -878,6 +880,13 @@ public class DirectUpdateHandler2 extends UpdateHandler implements SolrCoreState
} }
} }
} finally {
solrCoreState.getCommitLock().unlock();
}
} finally {
if (clearRequestInfo) SolrRequestInfo.clearRequestInfo();
}
// we went through the normal process to commit, so we don't have to artificially // we went through the normal process to commit, so we don't have to artificially
// cap any ulog files. // cap any ulog files.
try { try {
@ -893,10 +902,6 @@ public class DirectUpdateHandler2 extends UpdateHandler implements SolrCoreState
writer.close(); writer.close();
} }
} finally {
solrCoreState.getCommitLock().unlock();
if (clearRequestInfo) SolrRequestInfo.clearRequestInfo();
}
} }
@Override @Override

View File

@ -26,6 +26,7 @@ import org.apache.lucene.index.IndexWriter;
import org.apache.lucene.search.Sort; import org.apache.lucene.search.Sort;
import org.apache.solr.cloud.ActionThrottle; import org.apache.solr.cloud.ActionThrottle;
import org.apache.solr.cloud.RecoveryStrategy; import org.apache.solr.cloud.RecoveryStrategy;
import org.apache.solr.common.AlreadyClosedException;
import org.apache.solr.core.CoreContainer; import org.apache.solr.core.CoreContainer;
import org.apache.solr.core.CoreDescriptor; import org.apache.solr.core.CoreDescriptor;
import org.apache.solr.core.DirectoryFactory; import org.apache.solr.core.DirectoryFactory;
@ -172,7 +173,12 @@ public abstract class SolrCoreState {
public abstract void setLastReplicateIndexSuccess(boolean success); public abstract void setLastReplicateIndexSuccess(boolean success);
public static class CoreIsClosedException extends IllegalStateException { public static class CoreIsClosedException extends AlreadyClosedException {
public CoreIsClosedException() {
super();
}
public CoreIsClosedException(String s) { public CoreIsClosedException(String s) {
super(s); super(s);
} }

View File

@ -183,7 +183,7 @@ public class UpdateLog implements PluginInfoInitialized, SolrMetricProducer {
} }
long id = -1; long id = -1;
protected State state = State.ACTIVE; protected volatile State state = State.ACTIVE;
protected TransactionLog bufferTlog; protected TransactionLog bufferTlog;
protected TransactionLog tlog; protected TransactionLog tlog;
@ -1351,9 +1351,10 @@ public class UpdateLog implements PluginInfoInitialized, SolrMetricProducer {
} }
public void close(boolean committed, boolean deleteOnClose) { public void close(boolean committed, boolean deleteOnClose) {
synchronized (this) {
recoveryExecutor.shutdown(); // no new tasks recoveryExecutor.shutdown(); // no new tasks
synchronized (this) {
// Don't delete the old tlogs, we want to be able to replay from them and retrieve old versions // Don't delete the old tlogs, we want to be able to replay from them and retrieve old versions
doClose(prevTlog, committed); doClose(prevTlog, committed);
@ -1373,13 +1374,14 @@ public class UpdateLog implements PluginInfoInitialized, SolrMetricProducer {
bufferTlog.forceClose(); bufferTlog.forceClose();
} }
}
try { try {
ExecutorUtil.shutdownAndAwaitTermination(recoveryExecutor); ExecutorUtil.shutdownAndAwaitTermination(recoveryExecutor);
} catch (Exception e) { } catch (Exception e) {
SolrException.log(log, e); SolrException.log(log, e);
} }
} }
}
static class Update { static class Update {

View File

@ -66,10 +66,14 @@ public class UpdateShardHandler implements SolrMetricProducer, SolrInfoBean {
private final CloseableHttpClient updateOnlyClient; private final CloseableHttpClient updateOnlyClient;
private final CloseableHttpClient recoveryOnlyClient;
private final CloseableHttpClient defaultClient; private final CloseableHttpClient defaultClient;
private final InstrumentedPoolingHttpClientConnectionManager updateOnlyConnectionManager; private final InstrumentedPoolingHttpClientConnectionManager updateOnlyConnectionManager;
private final InstrumentedPoolingHttpClientConnectionManager recoveryOnlyConnectionManager;
private final InstrumentedPoolingHttpClientConnectionManager defaultConnectionManager; private final InstrumentedPoolingHttpClientConnectionManager defaultConnectionManager;
private final InstrumentedHttpRequestExecutor httpRequestExecutor; private final InstrumentedHttpRequestExecutor httpRequestExecutor;
@ -83,10 +87,13 @@ public class UpdateShardHandler implements SolrMetricProducer, SolrInfoBean {
public UpdateShardHandler(UpdateShardHandlerConfig cfg) { public UpdateShardHandler(UpdateShardHandlerConfig cfg) {
updateOnlyConnectionManager = new InstrumentedPoolingHttpClientConnectionManager(HttpClientUtil.getSchemaRegisteryProvider().getSchemaRegistry()); updateOnlyConnectionManager = new InstrumentedPoolingHttpClientConnectionManager(HttpClientUtil.getSchemaRegisteryProvider().getSchemaRegistry());
recoveryOnlyConnectionManager = new InstrumentedPoolingHttpClientConnectionManager(HttpClientUtil.getSchemaRegisteryProvider().getSchemaRegistry());
defaultConnectionManager = new InstrumentedPoolingHttpClientConnectionManager(HttpClientUtil.getSchemaRegisteryProvider().getSchemaRegistry()); defaultConnectionManager = new InstrumentedPoolingHttpClientConnectionManager(HttpClientUtil.getSchemaRegisteryProvider().getSchemaRegistry());
if (cfg != null ) { if (cfg != null ) {
updateOnlyConnectionManager.setMaxTotal(cfg.getMaxUpdateConnections()); updateOnlyConnectionManager.setMaxTotal(cfg.getMaxUpdateConnections());
updateOnlyConnectionManager.setDefaultMaxPerRoute(cfg.getMaxUpdateConnectionsPerHost()); updateOnlyConnectionManager.setDefaultMaxPerRoute(cfg.getMaxUpdateConnectionsPerHost());
recoveryOnlyConnectionManager.setMaxTotal(cfg.getMaxUpdateConnections());
recoveryOnlyConnectionManager.setDefaultMaxPerRoute(cfg.getMaxUpdateConnectionsPerHost());
defaultConnectionManager.setMaxTotal(cfg.getMaxUpdateConnections()); defaultConnectionManager.setMaxTotal(cfg.getMaxUpdateConnections());
defaultConnectionManager.setDefaultMaxPerRoute(cfg.getMaxUpdateConnectionsPerHost()); defaultConnectionManager.setDefaultMaxPerRoute(cfg.getMaxUpdateConnectionsPerHost());
} }
@ -110,6 +117,7 @@ public class UpdateShardHandler implements SolrMetricProducer, SolrInfoBean {
httpRequestExecutor = new InstrumentedHttpRequestExecutor(metricNameStrategy); httpRequestExecutor = new InstrumentedHttpRequestExecutor(metricNameStrategy);
updateOnlyClient = HttpClientUtil.createClient(clientParams, updateOnlyConnectionManager, false, httpRequestExecutor); updateOnlyClient = HttpClientUtil.createClient(clientParams, updateOnlyConnectionManager, false, httpRequestExecutor);
recoveryOnlyClient = HttpClientUtil.createClient(clientParams, recoveryOnlyConnectionManager, false, httpRequestExecutor);
defaultClient = HttpClientUtil.createClient(clientParams, defaultConnectionManager, false, httpRequestExecutor); defaultClient = HttpClientUtil.createClient(clientParams, defaultConnectionManager, false, httpRequestExecutor);
// following is done only for logging complete configuration. // following is done only for logging complete configuration.
@ -178,6 +186,11 @@ public class UpdateShardHandler implements SolrMetricProducer, SolrInfoBean {
return updateOnlyClient; return updateOnlyClient;
} }
// don't introduce a bug, this client is for recovery ops only!
public HttpClient getRecoveryOnlyHttpClient() {
return recoveryOnlyClient;
}
/** /**
* This method returns an executor that is meant for non search related tasks. * This method returns an executor that is meant for non search related tasks.
@ -192,6 +205,10 @@ public class UpdateShardHandler implements SolrMetricProducer, SolrInfoBean {
return defaultConnectionManager; return defaultConnectionManager;
} }
public PoolingHttpClientConnectionManager getRecoveryOnlyConnectionManager() {
return recoveryOnlyConnectionManager;
}
/** /**
* *
* @return executor for recovery operations * @return executor for recovery operations
@ -206,12 +223,14 @@ public class UpdateShardHandler implements SolrMetricProducer, SolrInfoBean {
ExecutorUtil.shutdownAndAwaitTermination(updateExecutor); ExecutorUtil.shutdownAndAwaitTermination(updateExecutor);
ExecutorUtil.shutdownAndAwaitTermination(recoveryExecutor); ExecutorUtil.shutdownAndAwaitTermination(recoveryExecutor);
} catch (Exception e) { } catch (Exception e) {
SolrException.log(log, e); throw new RuntimeException(e);
} finally { } finally {
HttpClientUtil.close(updateOnlyClient); HttpClientUtil.close(updateOnlyClient);
HttpClientUtil.close(recoveryOnlyClient);
HttpClientUtil.close(defaultClient); HttpClientUtil.close(defaultClient);
updateOnlyConnectionManager.close(); updateOnlyConnectionManager.close();
defaultConnectionManager.close(); defaultConnectionManager.close();
recoveryOnlyConnectionManager.close();
} }
} }

View File

@ -16,6 +16,9 @@
*/ */
package org.apache.solr.update.processor; package org.apache.solr.update.processor;
import static org.apache.solr.common.params.CommonParams.DISTRIB;
import static org.apache.solr.update.processor.DistributingUpdateProcessorFactory.DISTRIB_UPDATE_PARAM;
import java.io.IOException; import java.io.IOException;
import java.lang.invoke.MethodHandles; import java.lang.invoke.MethodHandles;
import java.util.ArrayList; import java.util.ArrayList;
@ -28,6 +31,9 @@ import java.util.List;
import java.util.Map; import java.util.Map;
import java.util.Map.Entry; import java.util.Map.Entry;
import java.util.Set; import java.util.Set;
import java.util.concurrent.CompletionService;
import java.util.concurrent.ExecutorCompletionService;
import java.util.concurrent.Future;
import java.util.concurrent.TimeUnit; import java.util.concurrent.TimeUnit;
import java.util.concurrent.locks.ReentrantLock; import java.util.concurrent.locks.ReentrantLock;
@ -37,7 +43,6 @@ import org.apache.lucene.util.CharsRefBuilder;
import org.apache.solr.client.solrj.SolrRequest; import org.apache.solr.client.solrj.SolrRequest;
import org.apache.solr.client.solrj.SolrRequest.METHOD; import org.apache.solr.client.solrj.SolrRequest.METHOD;
import org.apache.solr.client.solrj.SolrServerException; import org.apache.solr.client.solrj.SolrServerException;
import org.apache.solr.client.solrj.cloud.DistributedQueue;
import org.apache.solr.client.solrj.impl.HttpSolrClient; import org.apache.solr.client.solrj.impl.HttpSolrClient;
import org.apache.solr.client.solrj.request.GenericSolrRequest; import org.apache.solr.client.solrj.request.GenericSolrRequest;
import org.apache.solr.client.solrj.request.UpdateRequest; import org.apache.solr.client.solrj.request.UpdateRequest;
@ -97,9 +102,6 @@ import org.apache.zookeeper.KeeperException;
import org.slf4j.Logger; import org.slf4j.Logger;
import org.slf4j.LoggerFactory; import org.slf4j.LoggerFactory;
import static org.apache.solr.common.params.CommonParams.DISTRIB;
import static org.apache.solr.update.processor.DistributingUpdateProcessorFactory.DISTRIB_UPDATE_PARAM;
// NOT mt-safe... create a new processor for each add thread // NOT mt-safe... create a new processor for each add thread
// TODO: we really should not wait for distrib after local? unless a certain replication factor is asked for // TODO: we really should not wait for distrib after local? unless a certain replication factor is asked for
public class DistributedUpdateProcessor extends UpdateRequestProcessor { public class DistributedUpdateProcessor extends UpdateRequestProcessor {
@ -116,12 +118,12 @@ public class DistributedUpdateProcessor extends UpdateRequestProcessor {
/** /**
* Request forwarded to a leader of a different shard will be retried up to this amount of times by default * Request forwarded to a leader of a different shard will be retried up to this amount of times by default
*/ */
static final int MAX_RETRIES_ON_FORWARD_DEAULT = 25; static final int MAX_RETRIES_ON_FORWARD_DEAULT = Integer.getInteger("solr.retries.on.forward", 25);
/** /**
* Requests from leader to it's followers will be retried this amount of times by default * Requests from leader to it's followers will be retried this amount of times by default
*/ */
static final int MAX_RETRIES_TO_FOLLOWERS_DEFAULT = 3; static final int MAX_RETRIES_TO_FOLLOWERS_DEFAULT = Integer.getInteger("solr.retries.to.followers", 3);
/** /**
* Values this processor supports for the <code>DISTRIB_UPDATE_PARAM</code>. * Values this processor supports for the <code>DISTRIB_UPDATE_PARAM</code>.
@ -434,6 +436,46 @@ public class DistributedUpdateProcessor extends UpdateRequestProcessor {
return false; return false;
} }
private List<Node> getReplicaNodesForLeader(String shardId, Replica leaderReplica) {
ClusterState clusterState = zkController.getZkStateReader().getClusterState();
String leaderCoreNodeName = leaderReplica.getName();
List<Replica> replicas = clusterState.getCollection(collection)
.getSlice(shardId)
.getReplicas(EnumSet.of(Replica.Type.NRT, Replica.Type.TLOG));
replicas.removeIf((replica) -> replica.getName().equals(leaderCoreNodeName));
if (replicas.isEmpty()) {
return null;
}
// check for test param that lets us miss replicas
String[] skipList = req.getParams().getParams(TEST_DISTRIB_SKIP_SERVERS);
Set<String> skipListSet = null;
if (skipList != null) {
skipListSet = new HashSet<>(skipList.length);
skipListSet.addAll(Arrays.asList(skipList));
log.info("test.distrib.skip.servers was found and contains:" + skipListSet);
}
List<Node> nodes = new ArrayList<>(replicas.size());
skippedCoreNodeNames = new HashSet<>();
ZkShardTerms zkShardTerms = zkController.getShardTerms(collection, shardId);
for (Replica replica : replicas) {
String coreNodeName = replica.getName();
if (skipList != null && skipListSet.contains(replica.getCoreUrl())) {
log.info("check url:" + replica.getCoreUrl() + " against:" + skipListSet + " result:true");
} else if (zkShardTerms.registered(coreNodeName) && zkShardTerms.skipSendingUpdatesTo(coreNodeName)) {
log.debug("skip url:{} cause its term is less than leader", replica.getCoreUrl());
skippedCoreNodeNames.add(replica.getName());
} else if (!clusterState.getLiveNodes().contains(replica.getNodeName())
|| replica.getState() == Replica.State.DOWN) {
skippedCoreNodeNames.add(replica.getName());
} else {
nodes.add(new StdNode(new ZkCoreNodeProps(replica), collection, shardId));
}
}
return nodes;
}
/** For {@link org.apache.solr.common.params.CollectionParams.CollectionAction#SPLITSHARD} */ /** For {@link org.apache.solr.common.params.CollectionParams.CollectionAction#SPLITSHARD} */
private List<Node> getSubShardLeaders(DocCollection coll, String shardId, String docId, SolrInputDocument doc) { private List<Node> getSubShardLeaders(DocCollection coll, String shardId, String docId, SolrInputDocument doc) {
Collection<Slice> allSlices = coll.getSlices(); Collection<Slice> allSlices = coll.getSlices();
@ -521,8 +563,7 @@ public class DistributedUpdateProcessor extends UpdateRequestProcessor {
ZkStateReader.SHARD_ID_PROP, myShardId, ZkStateReader.SHARD_ID_PROP, myShardId,
"routeKey", routeKey + "!"); "routeKey", routeKey + "!");
SolrZkClient zkClient = zkController.getZkClient(); SolrZkClient zkClient = zkController.getZkClient();
DistributedQueue queue = Overseer.getStateUpdateQueue(zkClient); zkController.getOverseer().offerStateUpdate(Utils.toJSON(map));
queue.offer(Utils.toJSON(map));
} catch (KeeperException e) { } catch (KeeperException e) {
log.warn("Exception while removing routing rule for route key: " + routeKey, e); log.warn("Exception while removing routing rule for route key: " + routeKey, e);
} catch (Exception e) { } catch (Exception e) {
@ -1865,27 +1906,33 @@ public class DistributedUpdateProcessor extends UpdateRequestProcessor {
updateCommand = cmd; updateCommand = cmd;
List<Node> nodes = null; List<Node> nodes = null;
boolean singleLeader = false; Replica leaderReplica = null;
if (zkEnabled) { if (zkEnabled) {
zkCheck(); zkCheck();
try {
leaderReplica = zkController.getZkStateReader().getLeaderRetry(collection, cloudDesc.getShardId());
} catch (InterruptedException e) {
Thread.interrupted();
throw new SolrException(ErrorCode.SERVICE_UNAVAILABLE, "Exception finding leader for shard " + cloudDesc.getShardId(), e);
}
isLeader = leaderReplica.getName().equals(cloudDesc.getCoreNodeName());
nodes = getCollectionUrls(collection, EnumSet.of(Replica.Type.TLOG,Replica.Type.NRT)); nodes = getCollectionUrls(collection, EnumSet.of(Replica.Type.TLOG,Replica.Type.NRT), true);
if (nodes == null) { if (nodes == null) {
// This could happen if there are only pull replicas // This could happen if there are only pull replicas
throw new SolrException(SolrException.ErrorCode.SERVER_ERROR, throw new SolrException(SolrException.ErrorCode.SERVER_ERROR,
"Unable to distribute commit operation. No replicas available of types " + Replica.Type.TLOG + " or " + Replica.Type.NRT); "Unable to distribute commit operation. No replicas available of types " + Replica.Type.TLOG + " or " + Replica.Type.NRT);
} }
if (isLeader && nodes.size() == 1 && replicaType != Replica.Type.PULL) {
singleLeader = true; nodes.removeIf((node) -> node.getNodeProps().getNodeName().equals(zkController.getNodeName())
} && node.getNodeProps().getCoreName().equals(req.getCore().getName()));
} }
if (!zkEnabled || req.getParams().getBool(COMMIT_END_POINT, false) || singleLeader) { CompletionService<Exception> completionService = new ExecutorCompletionService<>(req.getCore().getCoreContainer().getUpdateShardHandler().getUpdateExecutor());
Set<Future<Exception>> pending = new HashSet<>();
if (!zkEnabled || (!isLeader && req.getParams().get(COMMIT_END_POINT, "").equals("replicas"))) {
if (replicaType == Replica.Type.TLOG) { if (replicaType == Replica.Type.TLOG) {
try {
Replica leaderReplica = zkController.getZkStateReader().getLeaderRetry(
collection, cloudDesc.getShardId());
isLeader = leaderReplica.getName().equals(cloudDesc.getCoreNodeName());
if (isLeader) { if (isLeader) {
long commitVersion = vinfo.getNewClock(); long commitVersion = vinfo.getNewClock();
cmd.setVersion(commitVersion); cmd.setVersion(commitVersion);
@ -1894,9 +1941,7 @@ public class DistributedUpdateProcessor extends UpdateRequestProcessor {
assert TestInjection.waitForInSyncWithLeader(req.getCore(), assert TestInjection.waitForInSyncWithLeader(req.getCore(),
zkController, collection, cloudDesc.getShardId()) : "Core " + req.getCore() + " not in sync with leader"; zkController, collection, cloudDesc.getShardId()) : "Core " + req.getCore() + " not in sync with leader";
} }
} catch (InterruptedException e) {
throw new SolrException(ErrorCode.SERVICE_UNAVAILABLE, "Exception finding leader for shard " + cloudDesc.getShardId(), e);
}
} else if (replicaType == Replica.Type.PULL) { } else if (replicaType == Replica.Type.PULL) {
log.warn("Commit not supported on replicas of type " + Replica.Type.PULL); log.warn("Commit not supported on replicas of type " + Replica.Type.PULL);
} else { } else {
@ -1905,21 +1950,51 @@ public class DistributedUpdateProcessor extends UpdateRequestProcessor {
long commitVersion = vinfo.getNewClock(); long commitVersion = vinfo.getNewClock();
cmd.setVersion(commitVersion); cmd.setVersion(commitVersion);
} }
doLocalCommit(cmd); doLocalCommit(cmd);
} }
} else { } else {
ModifiableSolrParams params = new ModifiableSolrParams(filterParams(req.getParams())); ModifiableSolrParams params = new ModifiableSolrParams(filterParams(req.getParams()));
if (!req.getParams().getBool(COMMIT_END_POINT, false)) {
params.set(COMMIT_END_POINT, true); List<Node> useNodes = null;
params.set(DISTRIB_UPDATE_PARAM, DistribPhase.FROMLEADER.toString()); if (req.getParams().get(COMMIT_END_POINT) == null) {
useNodes = nodes;
params.set(DISTRIB_UPDATE_PARAM, DistribPhase.TOLEADER.toString());
params.set(COMMIT_END_POINT, "leaders");
if (useNodes != null) {
params.set(DISTRIB_FROM, ZkCoreNodeProps.getCoreUrl( params.set(DISTRIB_FROM, ZkCoreNodeProps.getCoreUrl(
zkController.getBaseUrl(), req.getCore().getName())); zkController.getBaseUrl(), req.getCore().getName()));
if (nodes != null) { cmdDistrib.distribCommit(cmd, useNodes, params);
cmdDistrib.distribCommit(cmd, nodes, params); cmdDistrib.blockAndDoRetries();
}
}
if (isLeader) {
params.set(DISTRIB_UPDATE_PARAM, DistribPhase.FROMLEADER.toString());
params.set(COMMIT_END_POINT, "replicas");
useNodes = getReplicaNodesForLeader(cloudDesc.getShardId(), leaderReplica);
if (useNodes != null) {
params.set(DISTRIB_FROM, ZkCoreNodeProps.getCoreUrl(
zkController.getBaseUrl(), req.getCore().getName()));
cmdDistrib.distribCommit(cmd, useNodes, params);
}
// NRT replicas will always commit
if (vinfo != null) {
long commitVersion = vinfo.getNewClock();
cmd.setVersion(commitVersion);
}
doLocalCommit(cmd);
if (useNodes != null) {
cmdDistrib.blockAndDoRetries(); cmdDistrib.blockAndDoRetries();
} }
} }
} }
} }
private void doLocalCommit(CommitUpdateCommand cmd) throws IOException { private void doLocalCommit(CommitUpdateCommand cmd) throws IOException {
@ -1951,7 +2026,7 @@ public class DistributedUpdateProcessor extends UpdateRequestProcessor {
if (next != null && nodes == null) next.finish(); if (next != null && nodes == null) next.finish();
} }
private List<Node> getCollectionUrls(String collection, EnumSet<Replica.Type> types) { private List<Node> getCollectionUrls(String collection, EnumSet<Replica.Type> types, boolean onlyLeaders) {
ClusterState clusterState = zkController.getClusterState(); ClusterState clusterState = zkController.getClusterState();
final DocCollection docCollection = clusterState.getCollectionOrNull(collection); final DocCollection docCollection = clusterState.getCollectionOrNull(collection);
if (collection == null || docCollection.getSlicesMap() == null) { if (collection == null || docCollection.getSlicesMap() == null) {
@ -1962,7 +2037,14 @@ public class DistributedUpdateProcessor extends UpdateRequestProcessor {
final List<Node> urls = new ArrayList<>(slices.size()); final List<Node> urls = new ArrayList<>(slices.size());
for (Map.Entry<String,Slice> sliceEntry : slices.entrySet()) { for (Map.Entry<String,Slice> sliceEntry : slices.entrySet()) {
Slice replicas = slices.get(sliceEntry.getKey()); Slice replicas = slices.get(sliceEntry.getKey());
if (onlyLeaders) {
Replica replica = docCollection.getLeader(replicas.getName());
if (replica != null) {
ZkCoreNodeProps nodeProps = new ZkCoreNodeProps(replica);
urls.add(new StdNode(nodeProps, collection, replicas.getName()));
}
continue;
}
Map<String,Replica> shardMap = replicas.getReplicasMap(); Map<String,Replica> shardMap = replicas.getReplicasMap();
for (Entry<String,Replica> entry : shardMap.entrySet()) { for (Entry<String,Replica> entry : shardMap.entrySet()) {

View File

@ -2381,7 +2381,7 @@ public class SolrCLI {
protected void deleteCollection(CommandLine cli) throws Exception { protected void deleteCollection(CommandLine cli) throws Exception {
String zkHost = getZkHost(cli); String zkHost = getZkHost(cli);
try (CloudSolrClient cloudSolrClient = new CloudSolrClient.Builder(Collections.singletonList(zkHost), Optional.empty()).build()) { try (CloudSolrClient cloudSolrClient = new CloudSolrClient.Builder(Collections.singletonList(zkHost), Optional.empty()).withSocketTimeout(30000).withConnectionTimeout(15000).build()) {
echoIfVerbose("Connecting to ZooKeeper at " + zkHost, cli); echoIfVerbose("Connecting to ZooKeeper at " + zkHost, cli);
cloudSolrClient.connect(); cloudSolrClient.connect();
deleteCollection(cloudSolrClient, cli); deleteCollection(cloudSolrClient, cli);

View File

@ -16,6 +16,9 @@
*/ */
package org.apache.solr.util; package org.apache.solr.util;
import static org.apache.solr.handler.ReplicationHandler.CMD_DETAILS;
import static org.apache.solr.handler.ReplicationHandler.COMMAND;
import java.lang.invoke.MethodHandles; import java.lang.invoke.MethodHandles;
import java.lang.reflect.Method; import java.lang.reflect.Method;
import java.util.Collections; import java.util.Collections;
@ -24,6 +27,7 @@ import java.util.Random;
import java.util.Set; import java.util.Set;
import java.util.Timer; import java.util.Timer;
import java.util.TimerTask; import java.util.TimerTask;
import java.util.concurrent.ConcurrentHashMap;
import java.util.concurrent.CountDownLatch; import java.util.concurrent.CountDownLatch;
import java.util.concurrent.TimeUnit; import java.util.concurrent.TimeUnit;
import java.util.concurrent.atomic.AtomicInteger; import java.util.concurrent.atomic.AtomicInteger;
@ -50,9 +54,6 @@ import org.apache.solr.update.SolrIndexWriter;
import org.slf4j.Logger; import org.slf4j.Logger;
import org.slf4j.LoggerFactory; import org.slf4j.LoggerFactory;
import static org.apache.solr.handler.ReplicationHandler.CMD_DETAILS;
import static org.apache.solr.handler.ReplicationHandler.COMMAND;
/** /**
* Allows random faults to be injected in running code during test runs. * Allows random faults to be injected in running code during test runs.
@ -116,43 +117,50 @@ public class TestInjection {
} }
} }
public static String nonGracefullClose = null; public volatile static String nonGracefullClose = null;
public static String failReplicaRequests = null; public volatile static String failReplicaRequests = null;
public static String failUpdateRequests = null; public volatile static String failUpdateRequests = null;
public static String nonExistentCoreExceptionAfterUnload = null; public volatile static String nonExistentCoreExceptionAfterUnload = null;
public static String updateLogReplayRandomPause = null; public volatile static String updateLogReplayRandomPause = null;
public static String updateRandomPause = null; public volatile static String updateRandomPause = null;
public static String prepRecoveryOpPauseForever = null; public volatile static String prepRecoveryOpPauseForever = null;
public static String randomDelayInCoreCreation = null; public volatile static String randomDelayInCoreCreation = null;
public static int randomDelayMaxInCoreCreationInSec = 10; public volatile static int randomDelayMaxInCoreCreationInSec = 10;
public static String splitFailureBeforeReplicaCreation = null; public volatile static String splitFailureBeforeReplicaCreation = null;
public static String splitFailureAfterReplicaCreation = null; public volatile static String splitFailureAfterReplicaCreation = null;
public static CountDownLatch splitLatch = null; public volatile static CountDownLatch splitLatch = null;
public static String waitForReplicasInSync = "true:60"; public volatile static String waitForReplicasInSync = "true:60";
public static String failIndexFingerprintRequests = null; public volatile static String failIndexFingerprintRequests = null;
public static String wrongIndexFingerprint = null; public volatile static String wrongIndexFingerprint = null;
private static Set<Timer> timers = Collections.synchronizedSet(new HashSet<Timer>()); private volatile static Set<Timer> timers = Collections.synchronizedSet(new HashSet<Timer>());
private static AtomicInteger countPrepRecoveryOpPauseForever = new AtomicInteger(0); private volatile static AtomicInteger countPrepRecoveryOpPauseForever = new AtomicInteger(0);
public static Integer delayBeforeSlaveCommitRefresh=null; public volatile static Integer delayBeforeSlaveCommitRefresh=null;
public static boolean uifOutOfMemoryError = false; public volatile static boolean uifOutOfMemoryError = false;
private volatile static CountDownLatch notifyPauseForeverDone = new CountDownLatch(1);
public static void notifyPauseForeverDone() {
notifyPauseForeverDone.countDown();
notifyPauseForeverDone = new CountDownLatch(1);
}
public static void reset() { public static void reset() {
nonGracefullClose = null; nonGracefullClose = null;
@ -172,7 +180,8 @@ public class TestInjection {
wrongIndexFingerprint = null; wrongIndexFingerprint = null;
delayBeforeSlaveCommitRefresh = null; delayBeforeSlaveCommitRefresh = null;
uifOutOfMemoryError = false; uifOutOfMemoryError = false;
notifyPauseForeverDone();
newSearcherHooks.clear();
for (Timer timer : timers) { for (Timer timer : timers) {
timer.cancel(); timer.cancel();
} }
@ -371,19 +380,20 @@ public class TestInjection {
} }
public static boolean injectPrepRecoveryOpPauseForever() { public static boolean injectPrepRecoveryOpPauseForever() {
if (prepRecoveryOpPauseForever != null) { String val = prepRecoveryOpPauseForever;
if (val != null) {
Random rand = random(); Random rand = random();
if (null == rand) return true; if (null == rand) return true;
Pair<Boolean,Integer> pair = parseValue(val);
Pair<Boolean,Integer> pair = parseValue(prepRecoveryOpPauseForever);
boolean enabled = pair.first(); boolean enabled = pair.first();
int chanceIn100 = pair.second(); int chanceIn100 = pair.second();
// Prevent for continuous pause forever // Prevent for continuous pause forever
if (enabled && rand.nextInt(100) >= (100 - chanceIn100) && countPrepRecoveryOpPauseForever.get() < 1) { if (enabled && rand.nextInt(100) >= (100 - chanceIn100) && countPrepRecoveryOpPauseForever.get() < 1) {
countPrepRecoveryOpPauseForever.incrementAndGet(); countPrepRecoveryOpPauseForever.incrementAndGet();
log.info("inject pause forever for prep recovery op"); log.info("inject pause forever for prep recovery op");
try { try {
Thread.sleep(Integer.MAX_VALUE); notifyPauseForeverDone.await();
} catch (InterruptedException e) { } catch (InterruptedException e) {
Thread.currentThread().interrupt(); Thread.currentThread().interrupt();
} }
@ -481,9 +491,12 @@ public class TestInjection {
return false; return false;
} }
private static Pair<Boolean,Integer> parseValue(String raw) { private static Pair<Boolean,Integer> parseValue(final String raw) {
if (raw == null) return new Pair<>(false, 0);
Matcher m = ENABLED_PERCENT.matcher(raw); Matcher m = ENABLED_PERCENT.matcher(raw);
if (!m.matches()) throw new RuntimeException("No match, probably bad syntax: " + raw); if (!m.matches()) {
throw new RuntimeException("No match, probably bad syntax: " + raw);
}
String val = m.group(1); String val = m.group(1);
String percent = "100"; String percent = "100";
if (m.groupCount() == 2) { if (m.groupCount() == 2) {
@ -511,4 +524,24 @@ public class TestInjection {
return true; return true;
} }
static Set<Hook> newSearcherHooks = ConcurrentHashMap.newKeySet();
public interface Hook {
public void newSearcher(String collectionName);
public void waitForSearcher(String collection, int cnt, int timeoutms, boolean failOnTimeout) throws InterruptedException;
}
public static boolean newSearcherHook(Hook hook) {
newSearcherHooks.add(hook);
return true;
}
public static boolean injectSearcherHooks(String collectionName) {
for (Hook hook : newSearcherHooks) {
hook.newSearcher(collectionName);
}
return true;
}
} }

View File

@ -61,8 +61,13 @@ public class TimeOut {
public void waitFor(String messageOnTimeOut, Supplier<Boolean> supplier) public void waitFor(String messageOnTimeOut, Supplier<Boolean> supplier)
throws InterruptedException, TimeoutException { throws InterruptedException, TimeoutException {
while (!supplier.get() && !hasTimedOut()) { while (!supplier.get() && !hasTimedOut()) {
Thread.sleep(500); Thread.sleep(250);
} }
if (hasTimedOut()) throw new TimeoutException(messageOnTimeOut); if (hasTimedOut()) throw new TimeoutException(messageOnTimeOut);
} }
@Override
public String toString() {
return "TimeOut [timeoutAt=" + timeoutAt + ", startTime=" + startTime + ", timeSource=" + timeSource + "]";
}
} }

View File

@ -35,6 +35,7 @@
<int name="autoReplicaFailoverWaitAfterExpiration">${autoReplicaFailoverWaitAfterExpiration:10000}</int> <int name="autoReplicaFailoverWaitAfterExpiration">${autoReplicaFailoverWaitAfterExpiration:10000}</int>
<int name="autoReplicaFailoverWorkLoopDelay">${autoReplicaFailoverWorkLoopDelay:10000}</int> <int name="autoReplicaFailoverWorkLoopDelay">${autoReplicaFailoverWorkLoopDelay:10000}</int>
<int name="autoReplicaFailoverBadNodeExpiration">${autoReplicaFailoverBadNodeExpiration:60000}</int> <int name="autoReplicaFailoverBadNodeExpiration">${autoReplicaFailoverBadNodeExpiration:60000}</int>
<int name="createCollectionWaitTimeTillActive">${createCollectionWaitTimeTillActive:30}</int>
</solrcloud> </solrcloud>
<metrics> <metrics>

View File

@ -27,7 +27,7 @@
<shardHandlerFactory name="shardHandlerFactory" class="HttpShardHandlerFactory"> <shardHandlerFactory name="shardHandlerFactory" class="HttpShardHandlerFactory">
<str name="urlScheme">${urlScheme:}</str> <str name="urlScheme">${urlScheme:}</str>
<int name="socketTimeout">${socketTimeout:90000}</int> <int name="socketTimeout">${socketTimeout:15000}</int>
<int name="connTimeout">${connTimeout:15000}</int> <int name="connTimeout">${connTimeout:15000}</int>
</shardHandlerFactory> </shardHandlerFactory>
@ -40,12 +40,12 @@
<str name="host">127.0.0.1</str> <str name="host">127.0.0.1</str>
<int name="hostPort">${hostPort:8983}</int> <int name="hostPort">${hostPort:8983}</int>
<str name="hostContext">${hostContext:solr}</str> <str name="hostContext">${hostContext:solr}</str>
<int name="zkClientTimeout">${solr.zkclienttimeout:30000}</int> <int name="zkClientTimeout">${solr.zkclienttimeout:60000}</int> <!-- This should be high by default - dc's are expensive -->
<bool name="genericCoreNodeNames">${genericCoreNodeNames:true}</bool> <bool name="genericCoreNodeNames">${genericCoreNodeNames:true}</bool>
<int name="leaderVoteWait">${leaderVoteWait:10000}</int> <int name="leaderVoteWait">${leaderVoteWait:15000}</int> <!-- We are running tests - the default should be low, not like production -->
<int name="leaderConflictResolveWait">${leaderConflictResolveWait:180000}</int> <int name="leaderConflictResolveWait">${leaderConflictResolveWait:45000}</int>
<int name="distribUpdateConnTimeout">${distribUpdateConnTimeout:45000}</int> <int name="distribUpdateConnTimeout">${distribUpdateConnTimeout:5000}</int>
<int name="distribUpdateSoTimeout">${distribUpdateSoTimeout:340000}</int> <int name="distribUpdateSoTimeout">${distribUpdateSoTimeout:15000}</int> <!-- We are running tests - the default should be low, not like production -->
<int name="autoReplicaFailoverWaitAfterExpiration">${autoReplicaFailoverWaitAfterExpiration:10000}</int> <int name="autoReplicaFailoverWaitAfterExpiration">${autoReplicaFailoverWaitAfterExpiration:10000}</int>
<int name="autoReplicaFailoverWorkLoopDelay">${autoReplicaFailoverWorkLoopDelay:10000}</int> <int name="autoReplicaFailoverWorkLoopDelay">${autoReplicaFailoverWorkLoopDelay:10000}</int>
<int name="autoReplicaFailoverBadNodeExpiration">${autoReplicaFailoverBadNodeExpiration:60000}</int> <int name="autoReplicaFailoverBadNodeExpiration">${autoReplicaFailoverBadNodeExpiration:60000}</int>

View File

@ -22,9 +22,14 @@ import java.util.ArrayList;
import java.util.Arrays; import java.util.Arrays;
import java.util.Collections; import java.util.Collections;
import java.util.EnumSet; import java.util.EnumSet;
import java.util.HashSet;
import java.util.Iterator; import java.util.Iterator;
import java.util.List; import java.util.List;
import java.util.Map; import java.util.Map;
import java.util.Set;
import java.util.concurrent.Callable;
import java.util.concurrent.ExecutorCompletionService;
import java.util.concurrent.Future;
import org.apache.commons.lang.StringUtils; import org.apache.commons.lang.StringUtils;
import org.apache.lucene.util.LuceneTestCase.Slow; import org.apache.lucene.util.LuceneTestCase.Slow;
@ -38,16 +43,15 @@ import org.apache.solr.client.solrj.response.FacetField;
import org.apache.solr.client.solrj.response.FieldStatsInfo; import org.apache.solr.client.solrj.response.FieldStatsInfo;
import org.apache.solr.client.solrj.response.QueryResponse; import org.apache.solr.client.solrj.response.QueryResponse;
import org.apache.solr.client.solrj.response.RangeFacet; import org.apache.solr.client.solrj.response.RangeFacet;
import org.apache.solr.cloud.ChaosMonkey;
import org.apache.solr.common.EnumFieldValue; import org.apache.solr.common.EnumFieldValue;
import org.apache.solr.common.SolrException; import org.apache.solr.common.SolrException;
import org.apache.solr.common.SolrException.ErrorCode; import org.apache.solr.common.SolrException.ErrorCode;
import org.apache.solr.common.params.CommonParams; import org.apache.solr.common.params.CommonParams;
import org.apache.solr.common.params.FacetParams.FacetRangeMethod;
import org.apache.solr.common.params.ModifiableSolrParams; import org.apache.solr.common.params.ModifiableSolrParams;
import org.apache.solr.common.params.ShardParams; import org.apache.solr.common.params.ShardParams;
import org.apache.solr.common.params.SolrParams; import org.apache.solr.common.params.SolrParams;
import org.apache.solr.common.params.StatsParams; import org.apache.solr.common.params.StatsParams;
import org.apache.solr.common.params.FacetParams.FacetRangeMethod;
import org.apache.solr.common.util.NamedList; import org.apache.solr.common.util.NamedList;
import org.apache.solr.handler.component.ShardResponse; import org.apache.solr.handler.component.ShardResponse;
import org.apache.solr.handler.component.StatsComponentTest.StatSetCombinations; import org.apache.solr.handler.component.StatsComponentTest.StatSetCombinations;
@ -100,6 +104,11 @@ public class TestDistributedSearch extends BaseDistributedSearchTestCase {
// we validate the connection before use on the restarted // we validate the connection before use on the restarted
// server so that we don't use a bad one // server so that we don't use a bad one
System.setProperty("validateAfterInactivity", "200"); System.setProperty("validateAfterInactivity", "200");
System.setProperty("solr.httpclient.retries", "0");
System.setProperty("distribUpdateSoTimeout", "5000");
} }
public TestDistributedSearch() { public TestDistributedSearch() {
@ -109,6 +118,9 @@ public class TestDistributedSearch extends BaseDistributedSearchTestCase {
@Test @Test
public void test() throws Exception { public void test() throws Exception {
assertEquals(clients.size(), jettys.size());
QueryResponse rsp = null; QueryResponse rsp = null;
int backupStress = stress; // make a copy so we can restore int backupStress = stress; // make a copy so we can restore
@ -952,23 +964,30 @@ public class TestDistributedSearch extends BaseDistributedSearchTestCase {
assertEquals("should have an entry for each shard ["+sinfo+"] "+shards, cnt, sinfo.size()); assertEquals("should have an entry for each shard ["+sinfo+"] "+shards, cnt, sinfo.size());
// test shards.tolerant=true // test shards.tolerant=true
for(int numDownServers = 0; numDownServers < jettys.size()-1; numDownServers++)
{ List<JettySolrRunner> upJettys = Collections.synchronizedList(new ArrayList<>(jettys));
List<JettySolrRunner> upJettys = new ArrayList<>(jettys); List<SolrClient> upClients = Collections.synchronizedList(new ArrayList<>(clients));
List<SolrClient> upClients = new ArrayList<>(clients); List<JettySolrRunner> downJettys = Collections.synchronizedList(new ArrayList<>());
List<JettySolrRunner> downJettys = new ArrayList<>(); List<String> upShards = Collections.synchronizedList(new ArrayList<>(Arrays.asList(shardsArr)));
List<String> upShards = new ArrayList<>(Arrays.asList(shardsArr));
for(int i=0; i<numDownServers; i++) int cap = Math.max(upJettys.size() - 1, 1);
{
int numDownServers = random().nextInt(cap);
for (int i = 0; i < numDownServers; i++) {
if (upJettys.size() == 1) {
continue;
}
// shut down some of the jettys // shut down some of the jettys
int indexToRemove = r.nextInt(upJettys.size()); int indexToRemove = r.nextInt(upJettys.size() - 1);
JettySolrRunner downJetty = upJettys.remove(indexToRemove); JettySolrRunner downJetty = upJettys.remove(indexToRemove);
upClients.remove(indexToRemove); upClients.remove(indexToRemove);
upShards.remove(indexToRemove); upShards.remove(indexToRemove);
ChaosMonkey.stop(downJetty); downJetty.stop();
downJettys.add(downJetty); downJettys.add(downJetty);
} }
Thread.sleep(100);
queryPartialResults(upShards, upClients, queryPartialResults(upShards, upClients,
"q", "*:*", "q", "*:*",
"facet", "true", "facet", "true",
@ -995,7 +1014,7 @@ public class TestDistributedSearch extends BaseDistributedSearchTestCase {
"group.query", t1 + ":kings OR " + t1 + ":eggs", "group.query", t1 + ":kings OR " + t1 + ":eggs",
"group.limit", 10, "group.limit", 10,
"sort", i1 + " asc, id asc", "sort", i1 + " asc, id asc",
CommonParams.TIME_ALLOWED, 1, CommonParams.TIME_ALLOWED, 10000,
ShardParams.SHARDS_INFO, "true", ShardParams.SHARDS_INFO, "true",
ShardParams.SHARDS_TOLERANT, "true"); ShardParams.SHARDS_TOLERANT, "true");
@ -1017,10 +1036,10 @@ public class TestDistributedSearch extends BaseDistributedSearchTestCase {
// restart the jettys // restart the jettys
for (JettySolrRunner downJetty : downJettys) { for (JettySolrRunner downJetty : downJettys) {
ChaosMonkey.start(downJetty); downJetty.start();
}
} }
// This index has the same number for every field // This index has the same number for every field
// TODO: This test currently fails because debug info is obtained only // TODO: This test currently fails because debug info is obtained only
@ -1125,17 +1144,22 @@ public class TestDistributedSearch extends BaseDistributedSearchTestCase {
params.remove("distrib"); params.remove("distrib");
setDistributedParams(params); setDistributedParams(params);
if (upClients.size() == 0) {
return;
}
QueryResponse rsp = queryRandomUpServer(params, upClients); QueryResponse rsp = queryRandomUpServer(params, upClients);
comparePartialResponses(rsp, controlRsp, upShards); comparePartialResponses(rsp, controlRsp, upShards);
if (stress > 0) { if (stress > 0) {
log.info("starting stress..."); log.info("starting stress...");
Thread[] threads = new Thread[nThreads]; Set<Future<Object>> pending = new HashSet<>();;
ExecutorCompletionService<Object> cs = new ExecutorCompletionService<>(executor);
Callable[] threads = new Callable[nThreads];
for (int i = 0; i < threads.length; i++) { for (int i = 0; i < threads.length; i++) {
threads[i] = new Thread() { threads[i] = new Callable() {
@Override @Override
public void run() { public Object call() {
for (int j = 0; j < stress; j++) { for (int j = 0; j < stress; j++) {
int which = r.nextInt(upClients.size()); int which = r.nextInt(upClients.size());
SolrClient client = upClients.get(which); SolrClient client = upClients.get(which);
@ -1148,21 +1172,32 @@ public class TestDistributedSearch extends BaseDistributedSearchTestCase {
throw new RuntimeException(e); throw new RuntimeException(e);
} }
} }
return null;
} }
}; };
threads[i].start(); pending.add(cs.submit(threads[i]));
} }
for (Thread thread : threads) { while (pending.size() > 0) {
thread.join(); Future<Object> future = cs.take();
pending.remove(future);
future.get();
} }
} }
} }
protected QueryResponse queryRandomUpServer(ModifiableSolrParams params, List<SolrClient> upClients) throws SolrServerException, IOException { protected QueryResponse queryRandomUpServer(ModifiableSolrParams params, List<SolrClient> upClients)
throws SolrServerException, IOException {
// query a random "up" server // query a random "up" server
int which = r.nextInt(upClients.size()); SolrClient client;
SolrClient client = upClients.get(which); if (upClients.size() == 1) {
client = upClients.get(0);
} else {
int which = r.nextInt(upClients.size() - 1);
client = upClients.get(which);
}
QueryResponse rsp = client.query(params); QueryResponse rsp = client.query(params);
return rsp; return rsp;
} }
@ -1195,7 +1230,7 @@ public class TestDistributedSearch extends BaseDistributedSearchTestCase {
assertTrue("Expected timeAllowedError or to find shardAddress in the up shard info: " + info.toString(), info.get("shardAddress") != null); assertTrue("Expected timeAllowedError or to find shardAddress in the up shard info: " + info.toString(), info.get("shardAddress") != null);
} }
} else { } else {
assertEquals("Expected to find the " + SolrQueryResponse.RESPONSE_HEADER_PARTIAL_RESULTS_KEY + " header set if a shard is down", assertEquals("Expected to find the " + SolrQueryResponse.RESPONSE_HEADER_PARTIAL_RESULTS_KEY + " header set if a shard is down. Response: " + rsp,
Boolean.TRUE, rsp.getHeader().get(SolrQueryResponse.RESPONSE_HEADER_PARTIAL_RESULTS_KEY)); Boolean.TRUE, rsp.getHeader().get(SolrQueryResponse.RESPONSE_HEADER_PARTIAL_RESULTS_KEY));
assertTrue("Expected to find error in the down shard info: " + info.toString(), info.get("error") != null); assertTrue("Expected to find error in the down shard info: " + info.toString(), info.get("error") != null);
} }

View File

@ -16,14 +16,16 @@
*/ */
package org.apache.solr; package org.apache.solr;
import java.io.IOException;
import org.apache.lucene.search.TimeLimitingCollector;
import org.apache.lucene.util.TestUtil; import org.apache.lucene.util.TestUtil;
import org.apache.solr.client.solrj.SolrServerException; import org.apache.solr.client.solrj.SolrServerException;
import org.apache.solr.client.solrj.response.QueryResponse; import org.apache.solr.client.solrj.response.QueryResponse;
import org.apache.solr.common.SolrInputDocument; import org.apache.solr.common.SolrInputDocument;
import org.junit.AfterClass;
import org.junit.Test; import org.junit.Test;
import java.io.IOException;
/** /**
* Tests that highlighting doesn't break on grouped documents * Tests that highlighting doesn't break on grouped documents
* with duplicate unique key fields stored on multiple shards. * with duplicate unique key fields stored on multiple shards.
@ -34,6 +36,12 @@ public class TestHighlightDedupGrouping extends BaseDistributedSearchTestCase {
private static final String group_ti1 = "group_ti1"; private static final String group_ti1 = "group_ti1";
private static final String shard_i1 = "shard_i1"; private static final String shard_i1 = "shard_i1";
@AfterClass
public static void afterClass() throws Exception {
TimeLimitingCollector.getGlobalTimerThread().stopTimer();
TimeLimitingCollector.getGlobalTimerThread().join();
}
@Test @Test
@ShardsFixed(num = 2) @ShardsFixed(num = 2)
public void test() throws Exception { public void test() throws Exception {

View File

@ -57,7 +57,7 @@ public class TestTolerantSearch extends SolrJettyTestBase {
@BeforeClass @BeforeClass
public static void createThings() throws Exception { public static void createThings() throws Exception {
solrHome = createSolrHome(); solrHome = createSolrHome();
createJetty(solrHome.getAbsolutePath()); createAndStartJetty(solrHome.getAbsolutePath());
String url = jetty.getBaseUrl().toString(); String url = jetty.getBaseUrl().toString();
collection1 = getHttpSolrClient(url + "/collection1"); collection1 = getHttpSolrClient(url + "/collection1");
collection2 = getHttpSolrClient(url + "/collection2"); collection2 = getHttpSolrClient(url + "/collection2");

View File

@ -16,6 +16,9 @@
*/ */
package org.apache.solr.cloud; package org.apache.solr.cloud;
import static org.apache.solr.client.solrj.response.RequestStatusState.COMPLETED;
import static org.apache.solr.client.solrj.response.RequestStatusState.FAILED;
import java.lang.invoke.MethodHandles; import java.lang.invoke.MethodHandles;
import java.util.Collection; import java.util.Collection;
import java.util.EnumSet; import java.util.EnumSet;
@ -27,26 +30,21 @@ import org.apache.solr.client.solrj.response.RequestStatusState;
import org.apache.solr.common.cloud.ClusterState; import org.apache.solr.common.cloud.ClusterState;
import org.apache.solr.common.cloud.DocCollection; import org.apache.solr.common.cloud.DocCollection;
import org.apache.solr.common.cloud.Replica; import org.apache.solr.common.cloud.Replica;
import org.apache.solr.util.LogLevel;
import org.junit.Before; import org.junit.Before;
import org.junit.BeforeClass; import org.junit.BeforeClass;
import org.junit.Test; import org.junit.Test;
import org.slf4j.Logger; import org.slf4j.Logger;
import org.slf4j.LoggerFactory; import org.slf4j.LoggerFactory;
import static org.apache.solr.client.solrj.response.RequestStatusState.COMPLETED;
import static org.apache.solr.client.solrj.response.RequestStatusState.FAILED;
/** /**
* *
*/ */
@LogLevel("org.apache.solr.cloud=DEBUG;org.apache.solr.cloud.Overseer=DEBUG;org.apache.solr.cloud.overseer=DEBUG;")
public class AddReplicaTest extends SolrCloudTestCase { public class AddReplicaTest extends SolrCloudTestCase {
private static final Logger log = LoggerFactory.getLogger(MethodHandles.lookup().lookupClass()); private static final Logger log = LoggerFactory.getLogger(MethodHandles.lookup().lookupClass());
@BeforeClass @BeforeClass
public static void setupCluster() throws Exception { public static void setupCluster() throws Exception {
configureCluster(4) configureCluster(3)
.addConfig("conf1", TEST_PATH().resolve("configsets").resolve("cloud-minimal").resolve("conf")) .addConfig("conf1", TEST_PATH().resolve("configsets").resolve("cloud-minimal").resolve("conf"))
.configure(); .configure();
} }
@ -59,13 +57,14 @@ public class AddReplicaTest extends SolrCloudTestCase {
@Test @Test
public void testAddMultipleReplicas() throws Exception { public void testAddMultipleReplicas() throws Exception {
cluster.waitForAllNodes(5);
String collection = "testAddMultipleReplicas"; String collection = "testAddMultipleReplicas";
CloudSolrClient cloudClient = cluster.getSolrClient(); CloudSolrClient cloudClient = cluster.getSolrClient();
CollectionAdminRequest.Create create = CollectionAdminRequest.createCollection(collection, "conf1", 1, 1); CollectionAdminRequest.Create create = CollectionAdminRequest.createCollection(collection, "conf1", 1, 1);
create.setMaxShardsPerNode(2); create.setMaxShardsPerNode(2);
cloudClient.request(create); cloudClient.request(create);
cluster.waitForActiveCollection(collection, 1, 1);
CollectionAdminRequest.AddReplica addReplica = CollectionAdminRequest.addReplicaToShard(collection, "shard1") CollectionAdminRequest.AddReplica addReplica = CollectionAdminRequest.addReplicaToShard(collection, "shard1")
.setNrtReplicas(1) .setNrtReplicas(1)
@ -73,6 +72,9 @@ public class AddReplicaTest extends SolrCloudTestCase {
.setPullReplicas(1); .setPullReplicas(1);
RequestStatusState status = addReplica.processAndWait(collection + "_xyz1", cloudClient, 120); RequestStatusState status = addReplica.processAndWait(collection + "_xyz1", cloudClient, 120);
assertEquals(COMPLETED, status); assertEquals(COMPLETED, status);
cluster.waitForActiveCollection(collection, 1, 4);
DocCollection docCollection = cloudClient.getZkStateReader().getClusterState().getCollectionOrNull(collection); DocCollection docCollection = cloudClient.getZkStateReader().getClusterState().getCollectionOrNull(collection);
assertNotNull(docCollection); assertNotNull(docCollection);
assertEquals(4, docCollection.getReplicas().size()); assertEquals(4, docCollection.getReplicas().size());
@ -110,6 +112,7 @@ public class AddReplicaTest extends SolrCloudTestCase {
.setCreateNodeSet(String.join(",", createNodeSet)); .setCreateNodeSet(String.join(",", createNodeSet));
status = addReplica.processAndWait(collection + "_xyz1", cloudClient, 120); status = addReplica.processAndWait(collection + "_xyz1", cloudClient, 120);
assertEquals(COMPLETED, status); assertEquals(COMPLETED, status);
waitForState("Timedout wait for collection to be created", collection, clusterShape(1, 9));
docCollection = cloudClient.getZkStateReader().getClusterState().getCollectionOrNull(collection); docCollection = cloudClient.getZkStateReader().getClusterState().getCollectionOrNull(collection);
assertNotNull(docCollection); assertNotNull(docCollection);
// sanity check that everything is as before // sanity check that everything is as before
@ -120,9 +123,8 @@ public class AddReplicaTest extends SolrCloudTestCase {
} }
@Test @Test
//commented 2-Aug-2018 @BadApple(bugUrl="https://issues.apache.org/jira/browse/SOLR-12028") // 09-Apr-2018
public void test() throws Exception { public void test() throws Exception {
cluster.waitForAllNodes(5);
String collection = "addreplicatest_coll"; String collection = "addreplicatest_coll";
CloudSolrClient cloudClient = cluster.getSolrClient(); CloudSolrClient cloudClient = cluster.getSolrClient();
@ -131,6 +133,8 @@ public class AddReplicaTest extends SolrCloudTestCase {
create.setMaxShardsPerNode(2); create.setMaxShardsPerNode(2);
cloudClient.request(create); cloudClient.request(create);
cluster.waitForActiveCollection(collection, 2, 2);
ClusterState clusterState = cloudClient.getZkStateReader().getClusterState(); ClusterState clusterState = cloudClient.getZkStateReader().getClusterState();
DocCollection coll = clusterState.getCollection(collection); DocCollection coll = clusterState.getCollection(collection);
String sliceName = coll.getSlices().iterator().next().getName(); String sliceName = coll.getSlices().iterator().next().getName();
@ -140,6 +144,7 @@ public class AddReplicaTest extends SolrCloudTestCase {
CollectionAdminRequest.RequestStatus requestStatus = CollectionAdminRequest.requestStatus("000"); CollectionAdminRequest.RequestStatus requestStatus = CollectionAdminRequest.requestStatus("000");
CollectionAdminRequest.RequestStatusResponse rsp = requestStatus.process(cloudClient); CollectionAdminRequest.RequestStatusResponse rsp = requestStatus.process(cloudClient);
assertNotSame(rsp.getRequestStatus(), COMPLETED); assertNotSame(rsp.getRequestStatus(), COMPLETED);
// wait for async request success // wait for async request success
boolean success = false; boolean success = false;
for (int i = 0; i < 200; i++) { for (int i = 0; i < 200; i++) {
@ -152,11 +157,10 @@ public class AddReplicaTest extends SolrCloudTestCase {
Thread.sleep(500); Thread.sleep(500);
} }
assertTrue(success); assertTrue(success);
Collection<Replica> replicas2 = cloudClient.getZkStateReader().getClusterState().getCollection(collection).getSlice(sliceName).getReplicas(); Collection<Replica> replicas2 = cloudClient.getZkStateReader().getClusterState().getCollection(collection).getSlice(sliceName).getReplicas();
replicas2.removeAll(replicas); replicas2.removeAll(replicas);
assertEquals(1, replicas2.size()); assertEquals(1, replicas2.size());
Replica r = replicas2.iterator().next();
assertNotSame(r.toString(), r.getState(), Replica.State.ACTIVE);
// use waitForFinalState // use waitForFinalState
addReplica.setWaitForFinalState(true); addReplica.setWaitForFinalState(true);

View File

@ -90,7 +90,11 @@ public class AliasIntegrationTest extends SolrCloudTestCase {
public void testProperties() throws Exception { public void testProperties() throws Exception {
CollectionAdminRequest.createCollection("collection1meta", "conf", 2, 1).process(cluster.getSolrClient()); CollectionAdminRequest.createCollection("collection1meta", "conf", 2, 1).process(cluster.getSolrClient());
CollectionAdminRequest.createCollection("collection2meta", "conf", 1, 1).process(cluster.getSolrClient()); CollectionAdminRequest.createCollection("collection2meta", "conf", 1, 1).process(cluster.getSolrClient());
waitForState("Expected collection1 to be created with 2 shards and 1 replica", "collection1meta", clusterShape(2, 1));
cluster.waitForActiveCollection("collection1meta", 2, 2);
cluster.waitForActiveCollection("collection2meta", 1, 1);
waitForState("Expected collection1 to be created with 2 shards and 1 replica", "collection1meta", clusterShape(2, 2));
waitForState("Expected collection2 to be created with 1 shard and 1 replica", "collection2meta", clusterShape(1, 1)); waitForState("Expected collection2 to be created with 1 shard and 1 replica", "collection2meta", clusterShape(1, 1));
ZkStateReader zkStateReader = cluster.getSolrClient().getZkStateReader(); ZkStateReader zkStateReader = cluster.getSolrClient().getZkStateReader();
zkStateReader.createClusterStateWatchersAndUpdate(); zkStateReader.createClusterStateWatchersAndUpdate();
@ -204,7 +208,7 @@ public class AliasIntegrationTest extends SolrCloudTestCase {
@Test @Test
public void testModifyPropertiesV2() throws Exception { public void testModifyPropertiesV2() throws Exception {
final String aliasName = getTestName(); final String aliasName = getSaferTestName();
ZkStateReader zkStateReader = createColectionsAndAlias(aliasName); ZkStateReader zkStateReader = createColectionsAndAlias(aliasName);
final String baseUrl = cluster.getRandomJetty(random()).getBaseUrl().toString(); final String baseUrl = cluster.getRandomJetty(random()).getBaseUrl().toString();
//TODO fix Solr test infra so that this /____v2/ becomes /api/ //TODO fix Solr test infra so that this /____v2/ becomes /api/
@ -226,7 +230,7 @@ public class AliasIntegrationTest extends SolrCloudTestCase {
@Test @Test
public void testModifyPropertiesV1() throws Exception { public void testModifyPropertiesV1() throws Exception {
// note we don't use TZ in this test, thus it's UTC // note we don't use TZ in this test, thus it's UTC
final String aliasName = getTestName(); final String aliasName = getSaferTestName();
ZkStateReader zkStateReader = createColectionsAndAlias(aliasName); ZkStateReader zkStateReader = createColectionsAndAlias(aliasName);
final String baseUrl = cluster.getRandomJetty(random()).getBaseUrl().toString(); final String baseUrl = cluster.getRandomJetty(random()).getBaseUrl().toString();
HttpGet get = new HttpGet(baseUrl + "/admin/collections?action=ALIASPROP" + HttpGet get = new HttpGet(baseUrl + "/admin/collections?action=ALIASPROP" +
@ -241,7 +245,7 @@ public class AliasIntegrationTest extends SolrCloudTestCase {
@Test @Test
public void testModifyPropertiesCAR() throws Exception { public void testModifyPropertiesCAR() throws Exception {
// note we don't use TZ in this test, thus it's UTC // note we don't use TZ in this test, thus it's UTC
final String aliasName = getTestName(); final String aliasName = getSaferTestName();
ZkStateReader zkStateReader = createColectionsAndAlias(aliasName); ZkStateReader zkStateReader = createColectionsAndAlias(aliasName);
CollectionAdminRequest.SetAliasProperty setAliasProperty = CollectionAdminRequest.setAliasProperty(aliasName); CollectionAdminRequest.SetAliasProperty setAliasProperty = CollectionAdminRequest.setAliasProperty(aliasName);
setAliasProperty.addProperty("foo","baz"); setAliasProperty.addProperty("foo","baz");
@ -278,7 +282,11 @@ public class AliasIntegrationTest extends SolrCloudTestCase {
private ZkStateReader createColectionsAndAlias(String aliasName) throws SolrServerException, IOException, KeeperException, InterruptedException { private ZkStateReader createColectionsAndAlias(String aliasName) throws SolrServerException, IOException, KeeperException, InterruptedException {
CollectionAdminRequest.createCollection("collection1meta", "conf", 2, 1).process(cluster.getSolrClient()); CollectionAdminRequest.createCollection("collection1meta", "conf", 2, 1).process(cluster.getSolrClient());
CollectionAdminRequest.createCollection("collection2meta", "conf", 1, 1).process(cluster.getSolrClient()); CollectionAdminRequest.createCollection("collection2meta", "conf", 1, 1).process(cluster.getSolrClient());
waitForState("Expected collection1 to be created with 2 shards and 1 replica", "collection1meta", clusterShape(2, 1));
cluster.waitForActiveCollection("collection1meta", 2, 2);
cluster.waitForActiveCollection("collection2meta", 1, 1);
waitForState("Expected collection1 to be created with 2 shards and 1 replica", "collection1meta", clusterShape(2, 2));
waitForState("Expected collection2 to be created with 1 shard and 1 replica", "collection2meta", clusterShape(1, 1)); waitForState("Expected collection2 to be created with 1 shard and 1 replica", "collection2meta", clusterShape(1, 1));
ZkStateReader zkStateReader = cluster.getSolrClient().getZkStateReader(); ZkStateReader zkStateReader = cluster.getSolrClient().getZkStateReader();
zkStateReader.createClusterStateWatchersAndUpdate(); zkStateReader.createClusterStateWatchersAndUpdate();
@ -326,7 +334,11 @@ public class AliasIntegrationTest extends SolrCloudTestCase {
public void testDeleteAliasWithExistingCollectionName() throws Exception { public void testDeleteAliasWithExistingCollectionName() throws Exception {
CollectionAdminRequest.createCollection("collection_old", "conf", 2, 1).process(cluster.getSolrClient()); CollectionAdminRequest.createCollection("collection_old", "conf", 2, 1).process(cluster.getSolrClient());
CollectionAdminRequest.createCollection("collection_new", "conf", 1, 1).process(cluster.getSolrClient()); CollectionAdminRequest.createCollection("collection_new", "conf", 1, 1).process(cluster.getSolrClient());
waitForState("Expected collection_old to be created with 2 shards and 1 replica", "collection_old", clusterShape(2, 1));
cluster.waitForActiveCollection("collection_old", 2, 2);
cluster.waitForActiveCollection("collection_new", 1, 1);
waitForState("Expected collection_old to be created with 2 shards and 1 replica", "collection_old", clusterShape(2, 2));
waitForState("Expected collection_new to be created with 1 shard and 1 replica", "collection_new", clusterShape(1, 1)); waitForState("Expected collection_new to be created with 1 shard and 1 replica", "collection_new", clusterShape(1, 1));
new UpdateRequest() new UpdateRequest()
@ -399,7 +411,11 @@ public class AliasIntegrationTest extends SolrCloudTestCase {
public void testDeleteOneOfTwoCollectionsAliased() throws Exception { public void testDeleteOneOfTwoCollectionsAliased() throws Exception {
CollectionAdminRequest.createCollection("collection_one", "conf", 2, 1).process(cluster.getSolrClient()); CollectionAdminRequest.createCollection("collection_one", "conf", 2, 1).process(cluster.getSolrClient());
CollectionAdminRequest.createCollection("collection_two", "conf", 1, 1).process(cluster.getSolrClient()); CollectionAdminRequest.createCollection("collection_two", "conf", 1, 1).process(cluster.getSolrClient());
waitForState("Expected collection_one to be created with 2 shards and 1 replica", "collection_one", clusterShape(2, 1));
cluster.waitForActiveCollection("collection_one", 2, 2);
cluster.waitForActiveCollection("collection_two", 1, 1);
waitForState("Expected collection_one to be created with 2 shards and 1 replica", "collection_one", clusterShape(2, 2));
waitForState("Expected collection_two to be created with 1 shard and 1 replica", "collection_two", clusterShape(1, 1)); waitForState("Expected collection_two to be created with 1 shard and 1 replica", "collection_two", clusterShape(1, 1));
new UpdateRequest() new UpdateRequest()
@ -439,8 +455,9 @@ public class AliasIntegrationTest extends SolrCloudTestCase {
// was deleted (and, assuming that it only points to collection_old). // was deleted (and, assuming that it only points to collection_old).
try { try {
cluster.getSolrClient().query("collection_one", new SolrQuery("*:*")); cluster.getSolrClient().query("collection_one", new SolrQuery("*:*"));
} catch (SolrServerException se) { fail("should have failed");
assertTrue(se.getMessage().contains("No live SolrServers")); } catch (SolrServerException | SolrException se) {
} }
// Clean up // Clean up
@ -464,7 +481,11 @@ public class AliasIntegrationTest extends SolrCloudTestCase {
public void test() throws Exception { public void test() throws Exception {
CollectionAdminRequest.createCollection("collection1", "conf", 2, 1).process(cluster.getSolrClient()); CollectionAdminRequest.createCollection("collection1", "conf", 2, 1).process(cluster.getSolrClient());
CollectionAdminRequest.createCollection("collection2", "conf", 1, 1).process(cluster.getSolrClient()); CollectionAdminRequest.createCollection("collection2", "conf", 1, 1).process(cluster.getSolrClient());
waitForState("Expected collection1 to be created with 2 shards and 1 replica", "collection1", clusterShape(2, 1));
cluster.waitForActiveCollection("collection1", 2, 2);
cluster.waitForActiveCollection("collection2", 1, 1);
waitForState("Expected collection1 to be created with 2 shards and 1 replica", "collection1", clusterShape(2, 2));
waitForState("Expected collection2 to be created with 1 shard and 1 replica", "collection2", clusterShape(1, 1)); waitForState("Expected collection2 to be created with 1 shard and 1 replica", "collection2", clusterShape(1, 1));
new UpdateRequest() new UpdateRequest()
@ -495,6 +516,8 @@ public class AliasIntegrationTest extends SolrCloudTestCase {
// test alias pointing to two collections. collection2 first because it's not on every node // test alias pointing to two collections. collection2 first because it's not on every node
CollectionAdminRequest.createAlias("testalias2", "collection2,collection1").process(cluster.getSolrClient()); CollectionAdminRequest.createAlias("testalias2", "collection2,collection1").process(cluster.getSolrClient());
Thread.sleep(100);
searchSeveralWays("testalias2", new SolrQuery("*:*"), 5); searchSeveralWays("testalias2", new SolrQuery("*:*"), 5);
/////////////// ///////////////
@ -618,7 +641,9 @@ public class AliasIntegrationTest extends SolrCloudTestCase {
@Test @Test
public void testErrorChecks() throws Exception { public void testErrorChecks() throws Exception {
CollectionAdminRequest.createCollection("testErrorChecks-collection", "conf", 2, 1).process(cluster.getSolrClient()); CollectionAdminRequest.createCollection("testErrorChecks-collection", "conf", 2, 1).process(cluster.getSolrClient());
waitForState("Expected testErrorChecks-collection to be created with 2 shards and 1 replica", "testErrorChecks-collection", clusterShape(2, 1));
cluster.waitForActiveCollection("testErrorChecks-collection", 2, 2);
waitForState("Expected testErrorChecks-collection to be created with 2 shards and 1 replica", "testErrorChecks-collection", clusterShape(2, 2));
ignoreException("."); ignoreException(".");

View File

@ -56,8 +56,6 @@ public class AssignBackwardCompatibilityTest extends SolrCloudTestCase {
} }
@Test @Test
//05-Jul-2018 @BadApple(bugUrl="https://issues.apache.org/jira/browse/SOLR-12028") // 21-May-2018
@BadApple(bugUrl="https://issues.apache.org/jira/browse/SOLR-12028") // 2-Aug-2018
public void test() throws IOException, SolrServerException, KeeperException, InterruptedException { public void test() throws IOException, SolrServerException, KeeperException, InterruptedException {
Set<String> coreNames = new HashSet<>(); Set<String> coreNames = new HashSet<>();
Set<String> coreNodeNames = new HashSet<>(); Set<String> coreNodeNames = new HashSet<>();
@ -81,6 +79,7 @@ public class AssignBackwardCompatibilityTest extends SolrCloudTestCase {
DocCollection dc = getCollectionState(COLLECTION); DocCollection dc = getCollectionState(COLLECTION);
Replica replica = getRandomReplica(dc.getSlice("shard1"), (r) -> r.getState() == Replica.State.ACTIVE); Replica replica = getRandomReplica(dc.getSlice("shard1"), (r) -> r.getState() == Replica.State.ACTIVE);
CollectionAdminRequest.deleteReplica(COLLECTION, "shard1", replica.getName()).process(cluster.getSolrClient()); CollectionAdminRequest.deleteReplica(COLLECTION, "shard1", replica.getName()).process(cluster.getSolrClient());
coreNames.remove(replica.getCoreName());
numLiveReplicas--; numLiveReplicas--;
} else { } else {
CollectionAdminResponse response = CollectionAdminRequest.addReplicaToShard(COLLECTION, "shard1") CollectionAdminResponse response = CollectionAdminRequest.addReplicaToShard(COLLECTION, "shard1")

View File

@ -40,7 +40,7 @@ public class AsyncCallRequestStatusResponseTest extends SolrCloudTestCase {
String asyncId = String asyncId =
CollectionAdminRequest.createCollection("asynccall", "conf", 2, 1).processAsync(cluster.getSolrClient()); CollectionAdminRequest.createCollection("asynccall", "conf", 2, 1).processAsync(cluster.getSolrClient());
waitForState("Expected collection 'asynccall' to have 2 shards and 1 replica", "asynccall", clusterShape(2, 1)); waitForState("Expected collection 'asynccall' to have 2 shards and 1 replica", "asynccall", clusterShape(2, 2));
int tries = 0; int tries = 0;
while (true) { while (true) {

View File

@ -67,7 +67,7 @@ public class BasicDistributedZk2Test extends AbstractFullDistribZkTestBase {
@Override @Override
protected boolean useTlogReplicas() { protected boolean useTlogReplicas() {
return onlyLeaderIndexes; return false; // TODO: tlog replicas makes commits take way to long due to what is likely a bug and it's TestInjection use
} }
@Test @Test
@ -351,7 +351,7 @@ public class BasicDistributedZk2Test extends AbstractFullDistribZkTestBase {
// query("q","matchesnothing","fl","*,score", "debugQuery", "true"); // query("q","matchesnothing","fl","*,score", "debugQuery", "true");
// this should trigger a recovery phase on deadShard // this should trigger a recovery phase on deadShard
ChaosMonkey.start(deadShard.jetty); deadShard.jetty.start();
// make sure we have published we are recovering // make sure we have published we are recovering
Thread.sleep(1500); Thread.sleep(1500);
@ -381,7 +381,7 @@ public class BasicDistributedZk2Test extends AbstractFullDistribZkTestBase {
Thread.sleep(1500); Thread.sleep(1500);
ChaosMonkey.start(deadShard.jetty); deadShard.jetty.start();
// make sure we have published we are recovering // make sure we have published we are recovering
Thread.sleep(1500); Thread.sleep(1500);

View File

@ -28,12 +28,16 @@ import java.util.Map;
import java.util.Set; import java.util.Set;
import java.util.concurrent.Callable; import java.util.concurrent.Callable;
import java.util.concurrent.CompletionService; import java.util.concurrent.CompletionService;
import java.util.concurrent.CountDownLatch;
import java.util.concurrent.ExecutorCompletionService; import java.util.concurrent.ExecutorCompletionService;
import java.util.concurrent.Future; import java.util.concurrent.Future;
import java.util.concurrent.SynchronousQueue; import java.util.concurrent.SynchronousQueue;
import java.util.concurrent.ThreadPoolExecutor; import java.util.concurrent.ThreadPoolExecutor;
import java.util.concurrent.TimeUnit; import java.util.concurrent.TimeUnit;
import java.util.concurrent.TimeoutException;
import java.util.concurrent.atomic.AtomicInteger; import java.util.concurrent.atomic.AtomicInteger;
import java.util.concurrent.atomic.AtomicLong;
import java.util.concurrent.atomic.AtomicReference;
import org.apache.commons.lang.StringUtils; import org.apache.commons.lang.StringUtils;
import org.apache.lucene.util.IOUtils; import org.apache.lucene.util.IOUtils;
@ -74,7 +78,9 @@ import org.apache.solr.common.params.UpdateParams;
import org.apache.solr.common.util.ExecutorUtil; import org.apache.solr.common.util.ExecutorUtil;
import org.apache.solr.common.util.NamedList; import org.apache.solr.common.util.NamedList;
import org.apache.solr.util.DefaultSolrThreadFactory; import org.apache.solr.util.DefaultSolrThreadFactory;
import org.apache.solr.util.RTimer; import org.apache.solr.util.TestInjection;
import org.apache.solr.util.TestInjection.Hook;
import org.junit.BeforeClass;
import org.junit.Test; import org.junit.Test;
import org.slf4j.Logger; import org.slf4j.Logger;
import org.slf4j.LoggerFactory; import org.slf4j.LoggerFactory;
@ -86,7 +92,6 @@ import org.slf4j.LoggerFactory;
*/ */
@Slow @Slow
@SuppressSSL(bugUrl = "https://issues.apache.org/jira/browse/SOLR-5776") @SuppressSSL(bugUrl = "https://issues.apache.org/jira/browse/SOLR-5776")
// DO NOT ENABLE @LuceneTestCase.BadApple(bugUrl="https://issues.apache.org/jira/browse/SOLR-12028") // 2018-06-18
public class BasicDistributedZkTest extends AbstractFullDistribZkTestBase { public class BasicDistributedZkTest extends AbstractFullDistribZkTestBase {
private static final Logger log = LoggerFactory.getLogger(MethodHandles.lookup().lookupClass()); private static final Logger log = LoggerFactory.getLogger(MethodHandles.lookup().lookupClass());
@ -94,6 +99,7 @@ public class BasicDistributedZkTest extends AbstractFullDistribZkTestBase {
private static final String DEFAULT_COLLECTION = "collection1"; private static final String DEFAULT_COLLECTION = "collection1";
private final boolean onlyLeaderIndexes = random().nextBoolean(); private final boolean onlyLeaderIndexes = random().nextBoolean();
String t1="a_t"; String t1="a_t";
String i1="a_i1"; String i1="a_i1";
String tlong = "other_tl1"; String tlong = "other_tl1";
@ -108,13 +114,37 @@ public class BasicDistributedZkTest extends AbstractFullDistribZkTestBase {
private AtomicInteger nodeCounter = new AtomicInteger(); private AtomicInteger nodeCounter = new AtomicInteger();
ThreadPoolExecutor executor = new ExecutorUtil.MDCAwareThreadPoolExecutor(0,
Integer.MAX_VALUE, 5, TimeUnit.SECONDS, new SynchronousQueue<Runnable>(),
new DefaultSolrThreadFactory("testExecutor"));
CompletionService<Object> completionService; CompletionService<Object> completionService;
Set<Future<Object>> pending; Set<Future<Object>> pending;
private static Hook newSearcherHook = new Hook() {
volatile CountDownLatch latch;
AtomicReference<String> collection = new AtomicReference<>();
@Override
public void newSearcher(String collectionName) {
String c = collection.get();
if (c != null && c.equals(collectionName)) {
log.info("Hook detected newSearcher");
try {
latch.countDown();
} catch (NullPointerException e) {
}
}
}
public void waitForSearcher(String collection, int cnt, int timeoutms, boolean failOnTimeout) throws InterruptedException {
latch = new CountDownLatch(cnt);
this.collection.set(collection);
boolean timeout = !latch.await(timeoutms, TimeUnit.MILLISECONDS);
if (timeout && failOnTimeout) {
fail("timed out waiting for new searcher event " + latch.getCount());
}
}
};
public BasicDistributedZkTest() { public BasicDistributedZkTest() {
// we need DVs on point fields to compute stats & facets // we need DVs on point fields to compute stats & facets
if (Boolean.getBoolean(NUMERIC_POINTS_SYSPROP)) System.setProperty(NUMERIC_DOCVALUES_SYSPROP,"true"); if (Boolean.getBoolean(NUMERIC_POINTS_SYSPROP)) System.setProperty(NUMERIC_DOCVALUES_SYSPROP,"true");
@ -125,9 +155,14 @@ public class BasicDistributedZkTest extends AbstractFullDistribZkTestBase {
} }
@BeforeClass
public static void beforeBDZKTClass() {
TestInjection.newSearcherHook(newSearcherHook);
}
@Override @Override
protected boolean useTlogReplicas() { protected boolean useTlogReplicas() {
return onlyLeaderIndexes; return false; // TODO: tlog replicas makes commits take way to long due to what is likely a bug and it's TestInjection use
} }
@Override @Override
@ -149,8 +184,6 @@ public class BasicDistributedZkTest extends AbstractFullDistribZkTestBase {
@Test @Test
@ShardsFixed(num = 4) @ShardsFixed(num = 4)
//DO NOT ENABLE @BadApple(bugUrl="https://issues.apache.org/jira/browse/SOLR-12028") // 12-Jun-2018
@BadApple(bugUrl="https://issues.apache.org/jira/browse/SOLR-12028") // 14-Oct-2018
public void test() throws Exception { public void test() throws Exception {
// setLoggingLevel(null); // setLoggingLevel(null);
@ -345,7 +378,12 @@ public class BasicDistributedZkTest extends AbstractFullDistribZkTestBase {
params.set("commitWithin", 10); params.set("commitWithin", 10);
add(cloudClient, params , getDoc("id", 300), getDoc("id", 301)); add(cloudClient, params , getDoc("id", 300), getDoc("id", 301));
waitForDocCount(before + 2, 30000, "add commitWithin did not work"); newSearcherHook.waitForSearcher(DEFAULT_COLLECTION, 2, 20000, false);
ClusterState clusterState = getCommonCloudSolrClient().getZkStateReader().getClusterState();
DocCollection dColl = clusterState.getCollection(DEFAULT_COLLECTION);
assertSliceCounts("should have found 2 docs, 300 and 301", before + 2, dColl);
// try deleteById commitWithin // try deleteById commitWithin
UpdateRequest deleteByIdReq = new UpdateRequest(); UpdateRequest deleteByIdReq = new UpdateRequest();
@ -353,7 +391,9 @@ public class BasicDistributedZkTest extends AbstractFullDistribZkTestBase {
deleteByIdReq.setCommitWithin(10); deleteByIdReq.setCommitWithin(10);
deleteByIdReq.process(cloudClient); deleteByIdReq.process(cloudClient);
waitForDocCount(before + 1, 30000, "deleteById commitWithin did not work"); newSearcherHook.waitForSearcher(DEFAULT_COLLECTION, 2, 20000, false);
assertSliceCounts("deleteById commitWithin did not work", before + 1, dColl);
// try deleteByQuery commitWithin // try deleteByQuery commitWithin
UpdateRequest deleteByQueryReq = new UpdateRequest(); UpdateRequest deleteByQueryReq = new UpdateRequest();
@ -361,7 +401,10 @@ public class BasicDistributedZkTest extends AbstractFullDistribZkTestBase {
deleteByQueryReq.setCommitWithin(10); deleteByQueryReq.setCommitWithin(10);
deleteByQueryReq.process(cloudClient); deleteByQueryReq.process(cloudClient);
waitForDocCount(before, 30000, "deleteByQuery commitWithin did not work"); newSearcherHook.waitForSearcher(DEFAULT_COLLECTION, 2, 20000, false);
assertSliceCounts("deleteByQuery commitWithin did not work", before, dColl);
// TODO: This test currently fails because debug info is obtained only // TODO: This test currently fails because debug info is obtained only
// on shards with matches. // on shards with matches.
@ -384,24 +427,41 @@ public class BasicDistributedZkTest extends AbstractFullDistribZkTestBase {
testStopAndStartCoresInOneInstance(); testStopAndStartCoresInOneInstance();
} }
// Insure that total docs found is the expected number. private void assertSliceCounts(String msg, long expected, DocCollection dColl) throws Exception {
long found = checkSlicesSameCounts(dColl);
if (found != expected) {
// we get one do over in a bad race
Thread.sleep(1000);
found = checkSlicesSameCounts(dColl);
}
assertEquals(msg, expected, checkSlicesSameCounts(dColl));
}
// Ensure that total docs found is the expected number.
private void waitForDocCount(long expectedNumFound, long waitMillis, String failureMessage) private void waitForDocCount(long expectedNumFound, long waitMillis, String failureMessage)
throws Exception { throws Exception {
RTimer timer = new RTimer(); AtomicLong total = new AtomicLong(-1);
long timeout = (long)timer.getTime() + waitMillis; try {
getCommonCloudSolrClient().getZkStateReader().waitForState(DEFAULT_COLLECTION, waitMillis, TimeUnit.MILLISECONDS, (n, c) -> {
ClusterState clusterState = getCommonCloudSolrClient().getZkStateReader().getClusterState(); long docTotal;
DocCollection dColl = clusterState.getCollection(DEFAULT_COLLECTION); try {
long docTotal = -1; // Could use this for 0 hits too! docTotal = checkSlicesSameCounts(c);
} catch (SolrServerException | IOException e) {
while (docTotal != expectedNumFound && timeout > (long) timer.getTime()) { throw new RuntimeException(e);
docTotal = checkSlicesSameCounts(dColl);
if (docTotal != expectedNumFound) {
Thread.sleep(100);
} }
total.set(docTotal);
if (docTotal == expectedNumFound) {
return true;
}
return false;
});
} catch (TimeoutException | InterruptedException e) {
} }
// We could fail here if we broke out of the above because we exceeded the time allowed. // We could fail here if we broke out of the above because we exceeded the time allowed.
assertEquals(failureMessage, expectedNumFound, docTotal); assertEquals(failureMessage, expectedNumFound, total.get());
// This should be redundant, but it caught a test error after all. // This should be redundant, but it caught a test error after all.
for (SolrClient client : clients) { for (SolrClient client : clients) {
@ -557,11 +617,10 @@ public class BasicDistributedZkTest extends AbstractFullDistribZkTestBase {
} }
} }
ChaosMonkey.stop(cloudJettys.get(0).jetty); cloudJettys.get(0).jetty.stop();
printLayout(); printLayout();
Thread.sleep(5000); cloudJettys.get(0).jetty.start();
ChaosMonkey.start(cloudJettys.get(0).jetty);
cloudClient.getZkStateReader().forceUpdateCollection("multiunload2"); cloudClient.getZkStateReader().forceUpdateCollection("multiunload2");
try { try {
cloudClient.getZkStateReader().getLeaderRetry("multiunload2", "shard1", 30000); cloudClient.getZkStateReader().getLeaderRetry("multiunload2", "shard1", 30000);
@ -803,6 +862,8 @@ public class BasicDistributedZkTest extends AbstractFullDistribZkTestBase {
for (String coreName : resp.getCollectionCoresStatus().keySet()) { for (String coreName : resp.getCollectionCoresStatus().keySet()) {
collectionClients.add(createNewSolrClient(coreName, jettys.get(0).getBaseUrl().toString())); collectionClients.add(createNewSolrClient(coreName, jettys.get(0).getBaseUrl().toString()));
} }
} }
SolrClient client1 = collectionClients.get(0); SolrClient client1 = collectionClients.get(0);
@ -864,15 +925,36 @@ public class BasicDistributedZkTest extends AbstractFullDistribZkTestBase {
String leader = props.getCoreUrl(); String leader = props.getCoreUrl();
unloadClient.request(unloadCmd); testExecutor.execute(new Runnable() {
int tries = 50; @Override
while (leader.equals(zkStateReader.getLeaderUrl(oneInstanceCollection2, "shard1", 10000))) { public void run() {
Thread.sleep(100); try {
if (tries-- == 0) { unloadClient.request(unloadCmd);
fail("Leader never changed"); } catch (SolrServerException e) {
throw new RuntimeException(e);
} catch (IOException e) {
throw new RuntimeException(e);
} }
} }
});
try {
getCommonCloudSolrClient().getZkStateReader().waitForState(oneInstanceCollection2, 20000, TimeUnit.MILLISECONDS, (n, c) -> {
try {
if (leader.equals(zkStateReader.getLeaderUrl(oneInstanceCollection2, "shard1", 10000))) {
return false;
}
} catch (InterruptedException e) {
throw new RuntimeException(e);
}
return true;
});
} catch (TimeoutException | InterruptedException e) {
fail("Leader never changed");
}
} }
IOUtils.close(collectionClients); IOUtils.close(collectionClients);
@ -1036,10 +1118,10 @@ public class BasicDistributedZkTest extends AbstractFullDistribZkTestBase {
long collection2Docs = otherCollectionClients.get("collection2").get(0) long collection2Docs = otherCollectionClients.get("collection2").get(0)
.query(new SolrQuery("*:*")).getResults().getNumFound(); .query(new SolrQuery("*:*")).getResults().getNumFound();
System.out.println("found2: "+ collection2Docs);
long collection3Docs = otherCollectionClients.get("collection3").get(0) long collection3Docs = otherCollectionClients.get("collection3").get(0)
.query(new SolrQuery("*:*")).getResults().getNumFound(); .query(new SolrQuery("*:*")).getResults().getNumFound();
System.out.println("found3: "+ collection3Docs);
SolrQuery query = new SolrQuery("*:*"); SolrQuery query = new SolrQuery("*:*");
query.set("collection", "collection2,collection3"); query.set("collection", "collection2,collection3");

View File

@ -115,7 +115,7 @@ public class BasicZkTest extends AbstractZkTestCase {
// try a reconnect from disconnect // try a reconnect from disconnect
zkServer = new ZkTestServer(zkDir, zkPort); zkServer = new ZkTestServer(zkDir, zkPort);
zkServer.run(); zkServer.run(false);
Thread.sleep(300); Thread.sleep(300);

View File

@ -23,7 +23,6 @@ import java.util.Set;
import org.apache.lucene.util.LuceneTestCase; import org.apache.lucene.util.LuceneTestCase;
import org.apache.lucene.util.LuceneTestCase.Slow; import org.apache.lucene.util.LuceneTestCase.Slow;
import org.apache.solr.SolrTestCaseJ4.SuppressObjectReleaseTracker;
import org.apache.solr.SolrTestCaseJ4.SuppressSSL; import org.apache.solr.SolrTestCaseJ4.SuppressSSL;
import org.apache.solr.client.solrj.SolrQuery; import org.apache.solr.client.solrj.SolrQuery;
import org.apache.solr.client.solrj.impl.CloudSolrClient; import org.apache.solr.client.solrj.impl.CloudSolrClient;
@ -35,8 +34,6 @@ import org.junit.Test;
@Slow @Slow
@SuppressSSL(bugUrl = "https://issues.apache.org/jira/browse/SOLR-5776") @SuppressSSL(bugUrl = "https://issues.apache.org/jira/browse/SOLR-5776")
//@ThreadLeakLingering(linger = 60000)
@SuppressObjectReleaseTracker(bugUrl="Testing purposes")
public class ChaosMonkeyNothingIsSafeTest extends AbstractFullDistribZkTestBase { public class ChaosMonkeyNothingIsSafeTest extends AbstractFullDistribZkTestBase {
private static final int FAIL_TOLERANCE = 100; private static final int FAIL_TOLERANCE = 100;
@ -48,6 +45,9 @@ public class ChaosMonkeyNothingIsSafeTest extends AbstractFullDistribZkTestBase
public static void beforeSuperClass() { public static void beforeSuperClass() {
schemaString = "schema15.xml"; // we need a string id schemaString = "schema15.xml"; // we need a string id
System.setProperty("solr.autoCommit.maxTime", "15000"); System.setProperty("solr.autoCommit.maxTime", "15000");
System.clearProperty("solr.httpclient.retries");
System.clearProperty("solr.retries.on.forward");
System.clearProperty("solr.retries.to.followers");
setErrorHook(); setErrorHook();
} }
@ -57,11 +57,23 @@ public class ChaosMonkeyNothingIsSafeTest extends AbstractFullDistribZkTestBase
clearErrorHook(); clearErrorHook();
} }
@Override
protected void destroyServers() throws Exception {
super.destroyServers();
}
protected static final String[] fieldNames = new String[]{"f_i", "f_f", "f_d", "f_l", "f_dt"}; protected static final String[] fieldNames = new String[]{"f_i", "f_f", "f_d", "f_l", "f_dt"};
protected static final RandVal[] randVals = new RandVal[]{rint, rfloat, rdouble, rlong, rdate}; protected static final RandVal[] randVals = new RandVal[]{rint, rfloat, rdouble, rlong, rdate};
private int clientSoTimeout = 60000; private int clientSoTimeout = 60000;
private volatile FullThrottleStoppableIndexingThread ftIndexThread;
private final boolean runFullThrottle;
public String[] getFieldNames() { public String[] getFieldNames() {
return fieldNames; return fieldNames;
} }
@ -78,6 +90,16 @@ public class ChaosMonkeyNothingIsSafeTest extends AbstractFullDistribZkTestBase
useFactory("solr.StandardDirectoryFactory"); useFactory("solr.StandardDirectoryFactory");
} }
@Override
public void distribTearDown() throws Exception {
try {
ftIndexThread.safeStop();
} catch (NullPointerException e) {
// okay
}
super.distribTearDown();
}
public ChaosMonkeyNothingIsSafeTest() { public ChaosMonkeyNothingIsSafeTest() {
super(); super();
sliceCount = Integer.parseInt(System.getProperty("solr.tests.cloud.cm.slicecount", "-1")); sliceCount = Integer.parseInt(System.getProperty("solr.tests.cloud.cm.slicecount", "-1"));
@ -94,11 +116,15 @@ public class ChaosMonkeyNothingIsSafeTest extends AbstractFullDistribZkTestBase
fixShardCount(numShards); fixShardCount(numShards);
// TODO: we only do this sometimes so that we can sometimes compare against control,
// it's currently hard to know what requests failed when using ConcurrentSolrUpdateServer
runFullThrottle = random().nextBoolean();
} }
@Override @Override
protected boolean useTlogReplicas() { protected boolean useTlogReplicas() {
return onlyLeaderIndexes; return false; // TODO: tlog replicas makes commits take way to long due to what is likely a bug and it's TestInjection use
} }
@Override @Override
@ -119,9 +145,9 @@ public class ChaosMonkeyNothingIsSafeTest extends AbstractFullDistribZkTestBase
// None of the operations used here are particularly costly, so this should work. // None of the operations used here are particularly costly, so this should work.
// Using this low timeout will also help us catch index stalling. // Using this low timeout will also help us catch index stalling.
clientSoTimeout = 5000; clientSoTimeout = 5000;
cloudClient = createCloudClient(DEFAULT_COLLECTION);
boolean testSuccessful = false; boolean testSuccessful = false;
try { try (CloudSolrClient ourCloudClient = createCloudClient(DEFAULT_COLLECTION)) {
handle.clear(); handle.clear();
handle.put("timestamp", SKIPVAL); handle.put("timestamp", SKIPVAL);
ZkStateReader zkStateReader = cloudClient.getZkStateReader(); ZkStateReader zkStateReader = cloudClient.getZkStateReader();
@ -155,13 +181,9 @@ public class ChaosMonkeyNothingIsSafeTest extends AbstractFullDistribZkTestBase
searchThread.start(); searchThread.start();
} }
// TODO: we only do this sometimes so that we can sometimes compare against control,
// it's currently hard to know what requests failed when using ConcurrentSolrUpdateServer
boolean runFullThrottle = random().nextBoolean();
if (runFullThrottle) { if (runFullThrottle) {
FullThrottleStoppableIndexingThread ftIndexThread = ftIndexThread =
new FullThrottleStoppableIndexingThread(controlClient, cloudClient, clients, "ft1", true, this.clientSoTimeout); new FullThrottleStoppableIndexingThread(cloudClient.getHttpClient(),controlClient, cloudClient, clients, "ft1", true, this.clientSoTimeout);
threads.add(ftIndexThread);
ftIndexThread.start(); ftIndexThread.start();
} }
@ -189,6 +211,11 @@ public class ChaosMonkeyNothingIsSafeTest extends AbstractFullDistribZkTestBase
// ideally this should go into chaosMonkey // ideally this should go into chaosMonkey
restartZk(1000 * (5 + random().nextInt(4))); restartZk(1000 * (5 + random().nextInt(4)));
if (runFullThrottle) {
ftIndexThread.safeStop();
}
for (StoppableThread indexThread : threads) { for (StoppableThread indexThread : threads) {
indexThread.safeStop(); indexThread.safeStop();
} }
@ -219,7 +246,6 @@ public class ChaosMonkeyNothingIsSafeTest extends AbstractFullDistribZkTestBase
zkStateReader.updateLiveNodes(); zkStateReader.updateLiveNodes();
assertTrue(zkStateReader.getClusterState().getLiveNodes().size() > 0); assertTrue(zkStateReader.getClusterState().getLiveNodes().size() > 0);
// we expect full throttle fails, but cloud client should not easily fail // we expect full throttle fails, but cloud client should not easily fail
for (StoppableThread indexThread : threads) { for (StoppableThread indexThread : threads) {
if (indexThread instanceof StoppableIndexingThread && !(indexThread instanceof FullThrottleStoppableIndexingThread)) { if (indexThread instanceof StoppableIndexingThread && !(indexThread instanceof FullThrottleStoppableIndexingThread)) {
@ -230,6 +256,10 @@ public class ChaosMonkeyNothingIsSafeTest extends AbstractFullDistribZkTestBase
} }
waitForThingsToLevelOut(20);
commit();
Set<String> addFails = getAddFails(indexTreads); Set<String> addFails = getAddFails(indexTreads);
Set<String> deleteFails = getDeleteFails(indexTreads); Set<String> deleteFails = getDeleteFails(indexTreads);
// full throttle thread can // full throttle thread can
@ -253,7 +283,7 @@ public class ChaosMonkeyNothingIsSafeTest extends AbstractFullDistribZkTestBase
// sometimes we restart zookeeper as well // sometimes we restart zookeeper as well
if (random().nextBoolean()) { if (random().nextBoolean()) {
restartZk(1000 * (5 + random().nextInt(4))); // restartZk(1000 * (5 + random().nextInt(4)));
} }
try (CloudSolrClient client = createCloudClient("collection1", 30000)) { try (CloudSolrClient client = createCloudClient("collection1", 30000)) {

View File

@ -25,7 +25,6 @@ import java.util.Set;
import java.util.concurrent.TimeUnit; import java.util.concurrent.TimeUnit;
import org.apache.lucene.util.LuceneTestCase.Slow; import org.apache.lucene.util.LuceneTestCase.Slow;
import org.apache.solr.SolrTestCaseJ4.SuppressObjectReleaseTracker;
import org.apache.solr.SolrTestCaseJ4.SuppressSSL; import org.apache.solr.SolrTestCaseJ4.SuppressSSL;
import org.apache.solr.client.solrj.SolrQuery; import org.apache.solr.client.solrj.SolrQuery;
import org.apache.solr.client.solrj.impl.CloudSolrClient; import org.apache.solr.client.solrj.impl.CloudSolrClient;
@ -43,12 +42,8 @@ import org.junit.Test;
import org.slf4j.Logger; import org.slf4j.Logger;
import org.slf4j.LoggerFactory; import org.slf4j.LoggerFactory;
import com.carrotsearch.randomizedtesting.annotations.ThreadLeakLingering;
@Slow @Slow
@SuppressSSL(bugUrl = "https://issues.apache.org/jira/browse/SOLR-5776") @SuppressSSL(bugUrl = "https://issues.apache.org/jira/browse/SOLR-5776")
@ThreadLeakLingering(linger = 60000)
@SuppressObjectReleaseTracker(bugUrl="Testing purposes")
public class ChaosMonkeyNothingIsSafeWithPullReplicasTest extends AbstractFullDistribZkTestBase { public class ChaosMonkeyNothingIsSafeWithPullReplicasTest extends AbstractFullDistribZkTestBase {
private static final int FAIL_TOLERANCE = 100; private static final int FAIL_TOLERANCE = 100;
@ -71,6 +66,9 @@ public class ChaosMonkeyNothingIsSafeWithPullReplicasTest extends AbstractFullDi
if (usually()) { if (usually()) {
System.setProperty("solr.autoCommit.maxTime", "15000"); System.setProperty("solr.autoCommit.maxTime", "15000");
} }
System.clearProperty("solr.httpclient.retries");
System.clearProperty("solr.retries.on.forward");
System.clearProperty("solr.retries.to.followers");
TestInjection.waitForReplicasInSync = null; TestInjection.waitForReplicasInSync = null;
setErrorHook(); setErrorHook();
} }
@ -85,7 +83,11 @@ public class ChaosMonkeyNothingIsSafeWithPullReplicasTest extends AbstractFullDi
protected static final String[] fieldNames = new String[]{"f_i", "f_f", "f_d", "f_l", "f_dt"}; protected static final String[] fieldNames = new String[]{"f_i", "f_f", "f_d", "f_l", "f_dt"};
protected static final RandVal[] randVals = new RandVal[]{rint, rfloat, rdouble, rlong, rdate}; protected static final RandVal[] randVals = new RandVal[]{rint, rfloat, rdouble, rlong, rdate};
private int clientSoTimeout = 60000; private int clientSoTimeout;
private volatile FullThrottleStoppableIndexingThread ftIndexThread;
private final boolean runFullThrottle;
public String[] getFieldNames() { public String[] getFieldNames() {
return fieldNames; return fieldNames;
@ -103,6 +105,16 @@ public class ChaosMonkeyNothingIsSafeWithPullReplicasTest extends AbstractFullDi
useFactory("solr.StandardDirectoryFactory"); useFactory("solr.StandardDirectoryFactory");
} }
@Override
public void distribTearDown() throws Exception {
try {
ftIndexThread.safeStop();
} catch (NullPointerException e) {
// okay
}
super.distribTearDown();
}
public ChaosMonkeyNothingIsSafeWithPullReplicasTest() { public ChaosMonkeyNothingIsSafeWithPullReplicasTest() {
super(); super();
numPullReplicas = random().nextInt(TEST_NIGHTLY ? 2 : 1) + 1; numPullReplicas = random().nextInt(TEST_NIGHTLY ? 2 : 1) + 1;
@ -116,12 +128,12 @@ public class ChaosMonkeyNothingIsSafeWithPullReplicasTest extends AbstractFullDi
fixShardCount(numNodes); fixShardCount(numNodes);
log.info("Starting ChaosMonkey test with {} shards and {} nodes", sliceCount, numNodes); log.info("Starting ChaosMonkey test with {} shards and {} nodes", sliceCount, numNodes);
runFullThrottle = random().nextBoolean();
} }
@Override @Override
protected boolean useTlogReplicas() { protected boolean useTlogReplicas() {
return useTlogReplicas; return false; // TODO: tlog replicas makes commits take way to long due to what is likely a bug and it's TestInjection use
} }
@Override @Override
@ -140,8 +152,8 @@ public class ChaosMonkeyNothingIsSafeWithPullReplicasTest extends AbstractFullDi
public void test() throws Exception { public void test() throws Exception {
// None of the operations used here are particularly costly, so this should work. // None of the operations used here are particularly costly, so this should work.
// Using this low timeout will also help us catch index stalling. // Using this low timeout will also help us catch index stalling.
clientSoTimeout = 5000; clientSoTimeout = 8000;
cloudClient = createCloudClient(DEFAULT_COLLECTION);
DocCollection docCollection = cloudClient.getZkStateReader().getClusterState().getCollection(DEFAULT_COLLECTION); DocCollection docCollection = cloudClient.getZkStateReader().getClusterState().getCollection(DEFAULT_COLLECTION);
assertEquals(this.sliceCount, docCollection.getSlices().size()); assertEquals(this.sliceCount, docCollection.getSlices().size());
Slice s = docCollection.getSlice("shard1"); Slice s = docCollection.getSlice("shard1");
@ -163,8 +175,6 @@ public class ChaosMonkeyNothingIsSafeWithPullReplicasTest extends AbstractFullDi
waitForRecoveriesToFinish(false); waitForRecoveriesToFinish(false);
// we cannot do delete by query
// as it's not supported for recovery
del("*:*"); del("*:*");
List<StoppableThread> threads = new ArrayList<>(); List<StoppableThread> threads = new ArrayList<>();
@ -172,7 +182,7 @@ public class ChaosMonkeyNothingIsSafeWithPullReplicasTest extends AbstractFullDi
int threadCount = TEST_NIGHTLY ? 3 : 1; int threadCount = TEST_NIGHTLY ? 3 : 1;
int i = 0; int i = 0;
for (i = 0; i < threadCount; i++) { for (i = 0; i < threadCount; i++) {
StoppableIndexingThread indexThread = new StoppableIndexingThread(controlClient, cloudClient, Integer.toString(i), true); StoppableIndexingThread indexThread = new StoppableIndexingThread(controlClient, cloudClient, Integer.toString(i), true, 35, 1, true);
threads.add(indexThread); threads.add(indexThread);
indexTreads.add(indexThread); indexTreads.add(indexThread);
indexThread.start(); indexThread.start();
@ -192,13 +202,9 @@ public class ChaosMonkeyNothingIsSafeWithPullReplicasTest extends AbstractFullDi
commitThread.start(); commitThread.start();
} }
// TODO: we only do this sometimes so that we can sometimes compare against control,
// it's currently hard to know what requests failed when using ConcurrentSolrUpdateServer
boolean runFullThrottle = random().nextBoolean();
if (runFullThrottle) { if (runFullThrottle) {
FullThrottleStoppableIndexingThread ftIndexThread = ftIndexThread =
new FullThrottleStoppableIndexingThread(controlClient, cloudClient, clients, "ft1", true, this.clientSoTimeout); new FullThrottleStoppableIndexingThread(cloudClient.getHttpClient(), controlClient, cloudClient, clients, "ft1", true, this.clientSoTimeout);
threads.add(ftIndexThread);
ftIndexThread.start(); ftIndexThread.start();
} }
@ -213,7 +219,7 @@ public class ChaosMonkeyNothingIsSafeWithPullReplicasTest extends AbstractFullDi
runTimes = new int[] {5000, 6000, 10000, 15000, 25000, 30000, runTimes = new int[] {5000, 6000, 10000, 15000, 25000, 30000,
30000, 45000, 90000, 120000}; 30000, 45000, 90000, 120000};
} else { } else {
runTimes = new int[] {5000, 7000, 15000}; runTimes = new int[] {5000, 7000, 10000};
} }
runLength = runTimes[random().nextInt(runTimes.length - 1)]; runLength = runTimes[random().nextInt(runTimes.length - 1)];
} }
@ -225,6 +231,10 @@ public class ChaosMonkeyNothingIsSafeWithPullReplicasTest extends AbstractFullDi
// ideally this should go into chaosMonkey // ideally this should go into chaosMonkey
restartZk(1000 * (5 + random().nextInt(4))); restartZk(1000 * (5 + random().nextInt(4)));
if (runFullThrottle) {
ftIndexThread.safeStop();
}
for (StoppableThread indexThread : threads) { for (StoppableThread indexThread : threads) {
indexThread.safeStop(); indexThread.safeStop();
} }

View File

@ -38,6 +38,9 @@ public class ChaosMonkeySafeLeaderTest extends AbstractFullDistribZkTestBase {
public static void beforeSuperClass() { public static void beforeSuperClass() {
schemaString = "schema15.xml"; // we need a string id schemaString = "schema15.xml"; // we need a string id
System.setProperty("solr.autoCommit.maxTime", "15000"); System.setProperty("solr.autoCommit.maxTime", "15000");
System.clearProperty("solr.httpclient.retries");
System.clearProperty("solr.retries.on.forward");
System.clearProperty("solr.retries.to.followers");
setErrorHook(); setErrorHook();
} }
@ -81,7 +84,6 @@ public class ChaosMonkeySafeLeaderTest extends AbstractFullDistribZkTestBase {
} }
@Test @Test
// 29-June-2018 @BadApple(bugUrl="https://issues.apache.org/jira/browse/SOLR-12028")
public void test() throws Exception { public void test() throws Exception {
handle.clear(); handle.clear();
@ -170,7 +172,7 @@ public class ChaosMonkeySafeLeaderTest extends AbstractFullDistribZkTestBase {
if (random().nextBoolean()) { if (random().nextBoolean()) {
zkServer.shutdown(); zkServer.shutdown();
zkServer = new ZkTestServer(zkServer.getZkDir(), zkServer.getPort()); zkServer = new ZkTestServer(zkServer.getZkDir(), zkServer.getPort());
zkServer.run(); zkServer.run(false);
} }
try (CloudSolrClient client = createCloudClient("collection1")) { try (CloudSolrClient client = createCloudClient("collection1")) {

View File

@ -23,7 +23,6 @@ import java.util.List;
import java.util.concurrent.TimeUnit; import java.util.concurrent.TimeUnit;
import org.apache.lucene.util.LuceneTestCase.Slow; import org.apache.lucene.util.LuceneTestCase.Slow;
import org.apache.solr.SolrTestCaseJ4.SuppressObjectReleaseTracker;
import org.apache.solr.client.solrj.SolrQuery; import org.apache.solr.client.solrj.SolrQuery;
import org.apache.solr.client.solrj.SolrServerException; import org.apache.solr.client.solrj.SolrServerException;
import org.apache.solr.client.solrj.impl.CloudSolrClient; import org.apache.solr.client.solrj.impl.CloudSolrClient;
@ -42,7 +41,6 @@ import org.slf4j.Logger;
import org.slf4j.LoggerFactory; import org.slf4j.LoggerFactory;
@Slow @Slow
@SuppressObjectReleaseTracker(bugUrl="Testing purposes")
public class ChaosMonkeySafeLeaderWithPullReplicasTest extends AbstractFullDistribZkTestBase { public class ChaosMonkeySafeLeaderWithPullReplicasTest extends AbstractFullDistribZkTestBase {
private static final Logger log = LoggerFactory.getLogger(MethodHandles.lookup().lookupClass()); private static final Logger log = LoggerFactory.getLogger(MethodHandles.lookup().lookupClass());
@ -60,7 +58,7 @@ public class ChaosMonkeySafeLeaderWithPullReplicasTest extends AbstractFullDistr
@Override @Override
protected boolean useTlogReplicas() { protected boolean useTlogReplicas() {
return useTlogReplicas; return false; // TODO: tlog replicas makes commits take way to long due to what is likely a bug and it's TestInjection use
} }
@BeforeClass @BeforeClass
@ -69,6 +67,9 @@ public class ChaosMonkeySafeLeaderWithPullReplicasTest extends AbstractFullDistr
if (usually()) { if (usually()) {
System.setProperty("solr.autoCommit.maxTime", "15000"); System.setProperty("solr.autoCommit.maxTime", "15000");
} }
System.clearProperty("solr.httpclient.retries");
System.clearProperty("solr.retries.on.forward");
System.clearProperty("solr.retries.to.followers");
TestInjection.waitForReplicasInSync = null; TestInjection.waitForReplicasInSync = null;
setErrorHook(); setErrorHook();
} }
@ -99,8 +100,8 @@ public class ChaosMonkeySafeLeaderWithPullReplicasTest extends AbstractFullDistr
public ChaosMonkeySafeLeaderWithPullReplicasTest() { public ChaosMonkeySafeLeaderWithPullReplicasTest() {
super(); super();
numPullReplicas = random().nextInt(TEST_NIGHTLY ? 3 : 2) + 1;; numPullReplicas = random().nextInt(TEST_NIGHTLY ? 3 : 2) + 1;
numRealtimeOrTlogReplicas = random().nextInt(TEST_NIGHTLY ? 3 : 2) + 1;; numRealtimeOrTlogReplicas = random().nextInt(TEST_NIGHTLY ? 3 : 2) + 1;
sliceCount = Integer.parseInt(System.getProperty("solr.tests.cloud.cm.slicecount", "-1")); sliceCount = Integer.parseInt(System.getProperty("solr.tests.cloud.cm.slicecount", "-1"));
if (sliceCount == -1) { if (sliceCount == -1) {
sliceCount = random().nextInt(TEST_NIGHTLY ? 3 : 2) + 1; sliceCount = random().nextInt(TEST_NIGHTLY ? 3 : 2) + 1;
@ -219,7 +220,7 @@ public class ChaosMonkeySafeLeaderWithPullReplicasTest extends AbstractFullDistr
if (random().nextBoolean()) { if (random().nextBoolean()) {
zkServer.shutdown(); zkServer.shutdown();
zkServer = new ZkTestServer(zkServer.getZkDir(), zkServer.getPort()); zkServer = new ZkTestServer(zkServer.getZkDir(), zkServer.getPort());
zkServer.run(); zkServer.run(false);
} }
try (CloudSolrClient client = createCloudClient("collection1")) { try (CloudSolrClient client = createCloudClient("collection1")) {

View File

@ -36,10 +36,12 @@ import org.apache.solr.common.cloud.Slice;
import org.apache.solr.common.cloud.SolrZkClient; import org.apache.solr.common.cloud.SolrZkClient;
import org.apache.solr.common.cloud.ZkStateReader; import org.apache.solr.common.cloud.ZkStateReader;
import org.apache.solr.core.CloudConfig; import org.apache.solr.core.CloudConfig;
import org.apache.solr.handler.component.HttpShardHandler;
import org.apache.solr.handler.component.HttpShardHandlerFactory; import org.apache.solr.handler.component.HttpShardHandlerFactory;
import org.apache.solr.update.UpdateShardHandler; import org.apache.solr.update.UpdateShardHandler;
import org.apache.solr.update.UpdateShardHandlerConfig; import org.apache.solr.update.UpdateShardHandlerConfig;
import org.apache.zookeeper.KeeperException; import org.apache.zookeeper.KeeperException;
import org.junit.BeforeClass;
import org.junit.Ignore; import org.junit.Ignore;
import org.junit.Test; import org.junit.Test;
import org.slf4j.Logger; import org.slf4j.Logger;
@ -57,6 +59,13 @@ public class ChaosMonkeyShardSplitTest extends ShardSplitTest {
static final int TIMEOUT = 10000; static final int TIMEOUT = 10000;
private AtomicInteger killCounter = new AtomicInteger(); private AtomicInteger killCounter = new AtomicInteger();
@BeforeClass
public static void beforeSuperClass() {
System.clearProperty("solr.httpclient.retries");
System.clearProperty("solr.retries.on.forward");
System.clearProperty("solr.retries.to.followers");
}
@Test @Test
public void test() throws Exception { public void test() throws Exception {
waitForThingsToLevelOut(15); waitForThingsToLevelOut(15);
@ -100,7 +109,7 @@ public class ChaosMonkeyShardSplitTest extends ShardSplitTest {
// kill the leader // kill the leader
CloudJettyRunner leaderJetty = shardToLeaderJetty.get("shard1"); CloudJettyRunner leaderJetty = shardToLeaderJetty.get("shard1");
chaosMonkey.killJetty(leaderJetty); leaderJetty.jetty.stop();
Thread.sleep(2000); Thread.sleep(2000);
@ -122,7 +131,7 @@ public class ChaosMonkeyShardSplitTest extends ShardSplitTest {
} }
// bring back dead node // bring back dead node
ChaosMonkey.start(deadJetty.jetty); // he is not the leader anymore deadJetty.jetty.start(); // he is not the leader anymore
waitTillRecovered(); waitTillRecovered();
@ -251,7 +260,7 @@ public class ChaosMonkeyShardSplitTest extends ShardSplitTest {
LeaderElector overseerElector = new LeaderElector(zkClient); LeaderElector overseerElector = new LeaderElector(zkClient);
UpdateShardHandler updateShardHandler = new UpdateShardHandler(UpdateShardHandlerConfig.DEFAULT); UpdateShardHandler updateShardHandler = new UpdateShardHandler(UpdateShardHandlerConfig.DEFAULT);
// TODO: close Overseer // TODO: close Overseer
Overseer overseer = new Overseer(new HttpShardHandlerFactory().getShardHandler(), updateShardHandler, "/admin/cores", Overseer overseer = new Overseer((HttpShardHandler) new HttpShardHandlerFactory().getShardHandler(), updateShardHandler, "/admin/cores",
reader, null, new CloudConfig.CloudConfigBuilder("127.0.0.1", 8983, "solr").build()); reader, null, new CloudConfig.CloudConfigBuilder("127.0.0.1", 8983, "solr").build());
overseer.close(); overseer.close();
ElectionContext ec = new OverseerElectionContext(zkClient, overseer, ElectionContext ec = new OverseerElectionContext(zkClient, overseer,

View File

@ -96,13 +96,13 @@ public class CleanupOldIndexTest extends SolrCloudTestCase {
assertTrue(oldIndexDir2.isDirectory()); assertTrue(oldIndexDir2.isDirectory());
// bring shard replica down // bring shard replica down
ChaosMonkey.stop(jetty); jetty.stop();
// wait a moment - lets allow some docs to be indexed so replication time is non 0 // wait a moment - lets allow some docs to be indexed so replication time is non 0
Thread.sleep(waitTimes[random().nextInt(waitTimes.length - 1)]); Thread.sleep(waitTimes[random().nextInt(waitTimes.length - 1)]);
// bring shard replica up // bring shard replica up
ChaosMonkey.start(jetty); jetty.start();
// make sure replication can start // make sure replication can start
Thread.sleep(3000); Thread.sleep(3000);

View File

@ -136,12 +136,12 @@ public class CloudTestUtils {
boolean requireLeaders) { boolean requireLeaders) {
return (liveNodes, collectionState) -> { return (liveNodes, collectionState) -> {
if (collectionState == null) { if (collectionState == null) {
log.trace("-- null collection"); log.info("-- null collection");
return false; return false;
} }
Collection<Slice> slices = withInactive ? collectionState.getSlices() : collectionState.getActiveSlices(); Collection<Slice> slices = withInactive ? collectionState.getSlices() : collectionState.getActiveSlices();
if (slices.size() != expectedShards) { if (slices.size() != expectedShards) {
log.trace("-- wrong number of slices, expected={}, found={}: {}", expectedShards, collectionState.getSlices().size(), collectionState.getSlices()); log.info("-- wrong number of slices, expected={}, found={}: {}", expectedShards, collectionState.getSlices().size(), collectionState.getSlices());
return false; return false;
} }
Set<String> leaderless = new HashSet<>(); Set<String> leaderless = new HashSet<>();
@ -160,14 +160,14 @@ public class CloudTestUtils {
activeReplicas++; activeReplicas++;
} }
if (activeReplicas != expectedReplicas) { if (activeReplicas != expectedReplicas) {
log.trace("-- wrong number of active replicas in slice {}, expected={}, found={}", slice.getName(), expectedReplicas, activeReplicas); log.info("-- wrong number of active replicas in slice {}, expected={}, found={}", slice.getName(), expectedReplicas, activeReplicas);
return false; return false;
} }
} }
if (leaderless.isEmpty()) { if (leaderless.isEmpty()) {
return true; return true;
} else { } else {
log.trace("-- shards without leaders: {}", leaderless); log.info("-- shards without leaders: {}", leaderless);
return false; return false;
} }
}; };

View File

@ -22,6 +22,7 @@ import java.util.Map;
import java.util.Set; import java.util.Set;
import org.apache.lucene.util.LuceneTestCase.Slow; import org.apache.lucene.util.LuceneTestCase.Slow;
import org.apache.solr.client.solrj.embedded.JettySolrRunner;
import org.apache.solr.client.solrj.request.CollectionAdminRequest; import org.apache.solr.client.solrj.request.CollectionAdminRequest;
import org.apache.solr.common.cloud.ClusterState; import org.apache.solr.common.cloud.ClusterState;
import org.apache.solr.common.cloud.DocCollection; import org.apache.solr.common.cloud.DocCollection;
@ -44,7 +45,6 @@ public class ClusterStateUpdateTest extends SolrCloudTestCase {
configureCluster(3) configureCluster(3)
.addConfig("conf", configset("cloud-minimal")) .addConfig("conf", configset("cloud-minimal"))
.configure(); .configure();
} }
@BeforeClass @BeforeClass
@ -112,7 +112,7 @@ public class ClusterStateUpdateTest extends SolrCloudTestCase {
assertEquals(3, liveNodes.size()); assertEquals(3, liveNodes.size());
// shut down node 2 // shut down node 2
cluster.stopJettySolrRunner(2); JettySolrRunner j = cluster.stopJettySolrRunner(2);
// slight pause (15s timeout) for watch to trigger // slight pause (15s timeout) for watch to trigger
for(int i = 0; i < (5 * 15); i++) { for(int i = 0; i < (5 * 15); i++) {
@ -122,6 +122,8 @@ public class ClusterStateUpdateTest extends SolrCloudTestCase {
Thread.sleep(200); Thread.sleep(200);
} }
cluster.waitForJettyToStop(j);
assertEquals(2, zkController2.getClusterState().getLiveNodes().size()); assertEquals(2, zkController2.getClusterState().getLiveNodes().size());
cluster.getJettySolrRunner(1).stop(); cluster.getJettySolrRunner(1).stop();

Some files were not shown because too many files have changed in this diff Show More