mirror of https://github.com/apache/lucene.git
SOLR-12801: Make massive improvements to the tests.
SOLR-12804: Remove static modifier from Overseer queue access. SOLR-12896: Introduce more checks for shutdown and closed to improve clean close and shutdown. (Partial) SOLR-12897: Introduce AlreadyClosedException to clean up silly close / shutdown logging. (Partial) SOLR-12898: Replace cluster state polling with ZkStateReader#waitFor. (Partial) SOLR-12923: The new AutoScaling tests are way too flaky and need special attention. (Partial) SOLR-12932: ant test (without badapples=false) should pass easily for developers. (Partial) SOLR-12933: Fix SolrCloud distributed commit.
This commit is contained in:
parent
81c092d826
commit
75b1831967
|
@ -90,5 +90,9 @@ grant {
|
|||
permission javax.security.auth.kerberos.ServicePermission "HTTP/127.0.0.1@EXAMPLE.COM", "initiate";
|
||||
permission javax.security.auth.kerberos.ServicePermission "HTTP/127.0.0.1@EXAMPLE.COM", "accept";
|
||||
permission javax.security.auth.kerberos.DelegationPermission "\"HTTP/127.0.0.1@EXAMPLE.COM\" \"krbtgt/EXAMPLE.COM@EXAMPLE.COM\"";
|
||||
|
||||
// java 8 accessibility requires this perm - should not after 8 I believe (rrd4j is the root reason we hit an accessibility code path)
|
||||
permission java.awt.AWTPermission "listenToAllAWTEvents";
|
||||
permission java.awt.AWTPermission "accessEventQueue";
|
||||
|
||||
};
|
||||
|
|
|
@ -131,15 +131,14 @@ New Features
|
|||
----------------------
|
||||
|
||||
(No Changes)
|
||||
Other Changes
|
||||
----------------------
|
||||
|
||||
* SOLR-12972: deprecate unused SolrIndexConfig.luceneVersion (Christine Poerschke)
|
||||
|
||||
Bug Fixes
|
||||
----------------------
|
||||
|
||||
* SOLR-12546: CVSResponseWriter omits useDocValuesAsStored=true field when fl=*
|
||||
(Munendra S N via Mikhail Khludnev)
|
||||
|
||||
* SOLR-12933: Fix SolrCloud distributed commit. (Mark Miller)
|
||||
|
||||
Improvements
|
||||
----------------------
|
||||
|
@ -149,6 +148,25 @@ Improvements
|
|||
* SOLR-12992: When using binary format, ExportWriter to directly copy BytesRef instead of
|
||||
creating new String (noble)
|
||||
|
||||
* SOLR-12898: Replace cluster state polling with ZkStateReader#waitFor. (Mark Miller)
|
||||
|
||||
* SOLR-12897: Introduce AlreadyClosedException to clean up silly close / shutdown logging. (Mark Miller)
|
||||
|
||||
* SOLR-12896: Introduce more checks for shutdown and closed to improve clean close and shutdown. (Mark Miller)
|
||||
|
||||
* SOLR-12804: Remove static modifier from Overseer queue access. (Mark Miller)
|
||||
|
||||
Other Changes
|
||||
----------------------
|
||||
|
||||
* SOLR-12972: deprecate unused SolrIndexConfig.luceneVersion (Christine Poerschke)
|
||||
|
||||
* SOLR-12801: Make massive improvements to the tests. (Mark Miller)
|
||||
|
||||
* SOLR-12923: The new AutoScaling tests are way too flaky and need special attention. (Mark Miller)
|
||||
|
||||
* SOLR-12932: ant test (without badapples=false) should pass easily for developers. (Mark Miller)
|
||||
|
||||
================== 7.6.0 ==================
|
||||
|
||||
Consult the LUCENE_CHANGES.txt file for additional, low level, changes in this release.
|
||||
|
|
|
@ -20,6 +20,7 @@ import java.io.IOException;
|
|||
import java.util.ArrayList;
|
||||
import java.util.Collections;
|
||||
import java.util.HashSet;
|
||||
import java.util.concurrent.TimeoutException;
|
||||
|
||||
import org.apache.solr.analytics.util.AnalyticsResponseHeadings;
|
||||
import org.apache.solr.analytics.util.MedianCalculator;
|
||||
|
@ -29,11 +30,11 @@ import org.apache.solr.client.solrj.request.CollectionAdminRequest;
|
|||
import org.apache.solr.client.solrj.request.QueryRequest;
|
||||
import org.apache.solr.client.solrj.request.UpdateRequest;
|
||||
import org.apache.solr.client.solrj.response.QueryResponse;
|
||||
import org.apache.solr.cloud.AbstractDistribZkTestBase;
|
||||
import org.apache.solr.cloud.SolrCloudTestCase;
|
||||
import org.apache.solr.common.params.ModifiableSolrParams;
|
||||
import org.apache.solr.common.util.NamedList;
|
||||
import org.junit.BeforeClass;
|
||||
import org.junit.After;
|
||||
import org.junit.Before;
|
||||
|
||||
public class LegacyAbstractAnalyticsCloudTest extends SolrCloudTestCase {
|
||||
|
||||
|
@ -41,19 +42,23 @@ public class LegacyAbstractAnalyticsCloudTest extends SolrCloudTestCase {
|
|||
protected static final int TIMEOUT = DEFAULT_TIMEOUT;
|
||||
protected static final String id = "id";
|
||||
|
||||
@BeforeClass
|
||||
public static void setupCollection() throws Exception {
|
||||
@Before
|
||||
public void setupCollection() throws Exception {
|
||||
configureCluster(4)
|
||||
.addConfig("conf", configset("cloud-analytics"))
|
||||
.configure();
|
||||
|
||||
CollectionAdminRequest.createCollection(COLLECTIONORALIAS, "conf", 2, 1).process(cluster.getSolrClient());
|
||||
AbstractDistribZkTestBase.waitForRecoveriesToFinish(COLLECTIONORALIAS, cluster.getSolrClient().getZkStateReader(),
|
||||
false, true, TIMEOUT);
|
||||
cleanIndex();
|
||||
cluster.waitForActiveCollection(COLLECTIONORALIAS, 2, 2);
|
||||
}
|
||||
|
||||
@After
|
||||
public void teardownCollection() throws Exception {
|
||||
cluster.deleteAllCollections();
|
||||
shutdownCluster();
|
||||
}
|
||||
|
||||
public static void cleanIndex() throws Exception {
|
||||
public void cleanIndex() throws Exception {
|
||||
new UpdateRequest()
|
||||
.deleteByQuery("*:*")
|
||||
.commit(cluster.getSolrClient(), COLLECTIONORALIAS);
|
||||
|
@ -81,7 +86,7 @@ public class LegacyAbstractAnalyticsCloudTest extends SolrCloudTestCase {
|
|||
}
|
||||
}
|
||||
|
||||
protected NamedList<Object> queryLegacyCloudAnalytics(String[] testParams) throws SolrServerException, IOException, InterruptedException {
|
||||
protected NamedList<Object> queryLegacyCloudAnalytics(String[] testParams) throws SolrServerException, IOException, InterruptedException, TimeoutException {
|
||||
ModifiableSolrParams params = new ModifiableSolrParams();
|
||||
params.set("q", "*:*");
|
||||
params.set("indent", "true");
|
||||
|
|
|
@ -21,7 +21,7 @@ import java.util.List;
|
|||
|
||||
import org.apache.solr.client.solrj.request.UpdateRequest;
|
||||
import org.apache.solr.common.util.NamedList;
|
||||
import org.junit.BeforeClass;
|
||||
import org.junit.Before;
|
||||
import org.junit.Test;
|
||||
|
||||
public class LegacyNoFacetCloudTest extends LegacyAbstractAnalyticsCloudTest {
|
||||
|
@ -57,16 +57,20 @@ public class LegacyNoFacetCloudTest extends LegacyAbstractAnalyticsCloudTest {
|
|||
static ArrayList<String> stringTestStart;
|
||||
static long stringMissing = 0;
|
||||
|
||||
@BeforeClass
|
||||
public static void populate() throws Exception {
|
||||
cleanIndex();
|
||||
|
||||
@Before
|
||||
public void populate() throws Exception {
|
||||
intTestStart = new ArrayList<>();
|
||||
longTestStart = new ArrayList<>();
|
||||
floatTestStart = new ArrayList<>();
|
||||
doubleTestStart = new ArrayList<>();
|
||||
dateTestStart = new ArrayList<>();
|
||||
stringTestStart = new ArrayList<>();
|
||||
intMissing = 0;
|
||||
longMissing = 0;
|
||||
doubleMissing = 0;
|
||||
floatMissing = 0;
|
||||
dateMissing = 0;
|
||||
stringMissing = 0;
|
||||
|
||||
UpdateRequest req = new UpdateRequest();
|
||||
for (int j = 0; j < NUM_LOOPS; ++j) {
|
||||
|
|
|
@ -24,7 +24,7 @@ import java.util.List;
|
|||
import org.apache.solr.client.solrj.request.UpdateRequest;
|
||||
import org.apache.solr.common.util.NamedList;
|
||||
import org.junit.Assert;
|
||||
import org.junit.BeforeClass;
|
||||
import org.junit.Before;
|
||||
import org.junit.Test;
|
||||
|
||||
|
||||
|
@ -85,9 +85,8 @@ public class LegacyFieldFacetCloudTest extends LegacyAbstractAnalyticsFacetCloud
|
|||
private static ArrayList<ArrayList<Integer>> multiDateTestStart;
|
||||
private static ArrayList<Long> multiDateTestMissing;
|
||||
|
||||
@BeforeClass
|
||||
public static void beforeClass() throws Exception {
|
||||
cleanIndex();
|
||||
@Before
|
||||
public void beforeTest() throws Exception {
|
||||
|
||||
//INT
|
||||
intDateTestStart = new ArrayList<>();
|
||||
|
|
|
@ -24,7 +24,7 @@ import java.util.List;
|
|||
|
||||
import org.apache.solr.client.solrj.request.UpdateRequest;
|
||||
import org.apache.solr.common.util.NamedList;
|
||||
import org.junit.BeforeClass;
|
||||
import org.junit.Before;
|
||||
import org.junit.Test;
|
||||
|
||||
public class LegacyFieldFacetExtrasCloudTest extends LegacyAbstractAnalyticsFacetCloudTest {
|
||||
|
@ -42,9 +42,8 @@ public class LegacyFieldFacetExtrasCloudTest extends LegacyAbstractAnalyticsFace
|
|||
static ArrayList<ArrayList<Integer>> intDoubleTestStart;
|
||||
static ArrayList<ArrayList<Integer>> intStringTestStart;
|
||||
|
||||
@BeforeClass
|
||||
public static void beforeClass() throws Exception {
|
||||
cleanIndex();
|
||||
@Before
|
||||
public void beforeTest() throws Exception {
|
||||
|
||||
//INT
|
||||
intLongTestStart = new ArrayList<>();
|
||||
|
|
|
@ -22,7 +22,7 @@ import java.util.List;
|
|||
|
||||
import org.apache.solr.client.solrj.request.UpdateRequest;
|
||||
import org.apache.solr.common.util.NamedList;
|
||||
import org.junit.BeforeClass;
|
||||
import org.junit.Before;
|
||||
import org.junit.Test;
|
||||
|
||||
public class LegacyQueryFacetCloudTest extends LegacyAbstractAnalyticsFacetCloudTest {
|
||||
|
@ -39,9 +39,8 @@ public class LegacyQueryFacetCloudTest extends LegacyAbstractAnalyticsFacetCloud
|
|||
private static ArrayList<ArrayList<Long>> longTestStart = new ArrayList<>();
|
||||
private static ArrayList<ArrayList<Float>> floatTestStart = new ArrayList<>();
|
||||
|
||||
@BeforeClass
|
||||
public static void beforeClass() throws Exception {
|
||||
cleanIndex();
|
||||
@Before
|
||||
public void beforeTest() throws Exception {
|
||||
|
||||
//INT
|
||||
int1TestStart.add(new ArrayList<Integer>());
|
||||
|
|
|
@ -21,7 +21,7 @@ import java.util.List;
|
|||
|
||||
import org.apache.solr.client.solrj.request.UpdateRequest;
|
||||
import org.apache.solr.common.util.NamedList;
|
||||
import org.junit.BeforeClass;
|
||||
import org.junit.Before;
|
||||
import org.junit.Test;
|
||||
|
||||
|
||||
|
@ -44,9 +44,8 @@ public class LegacyRangeFacetCloudTest extends LegacyAbstractAnalyticsFacetCloud
|
|||
static ArrayList<ArrayList<Float>> floatDoubleTestStart;
|
||||
static ArrayList<ArrayList<Float>> floatDateTestStart;
|
||||
|
||||
@BeforeClass
|
||||
public static void beforeClass() throws Exception {
|
||||
cleanIndex();
|
||||
@Before
|
||||
public void beforeTest() throws Exception {
|
||||
|
||||
//INT
|
||||
intLongTestStart = new ArrayList<>();
|
||||
|
|
|
@ -52,7 +52,7 @@ public class TestContentStreamDataSource extends AbstractDataImportHandlerTestCa
|
|||
super.setUp();
|
||||
instance = new SolrInstance("inst", null);
|
||||
instance.setUp();
|
||||
jetty = createJetty(instance);
|
||||
jetty = createAndStartJetty(instance);
|
||||
}
|
||||
|
||||
@Override
|
||||
|
@ -173,7 +173,7 @@ public class TestContentStreamDataSource extends AbstractDataImportHandlerTestCa
|
|||
|
||||
}
|
||||
|
||||
private JettySolrRunner createJetty(SolrInstance instance) throws Exception {
|
||||
private JettySolrRunner createAndStartJetty(SolrInstance instance) throws Exception {
|
||||
Properties nodeProperties = new Properties();
|
||||
nodeProperties.setProperty("solr.data.dir", instance.getDataDir());
|
||||
JettySolrRunner jetty = new JettySolrRunner(instance.getHomeDir(), nodeProperties, buildJettyConfig("/solr"));
|
||||
|
|
|
@ -127,7 +127,7 @@ public class TestSolrEntityProcessorEndToEnd extends AbstractDataImportHandlerTe
|
|||
// data source solr instance
|
||||
instance = new SolrInstance();
|
||||
instance.setUp();
|
||||
jetty = createJetty(instance);
|
||||
jetty = createAndStartJetty(instance);
|
||||
}
|
||||
|
||||
@Override
|
||||
|
@ -362,7 +362,7 @@ public class TestSolrEntityProcessorEndToEnd extends AbstractDataImportHandlerTe
|
|||
}
|
||||
}
|
||||
|
||||
private JettySolrRunner createJetty(SolrInstance instance) throws Exception {
|
||||
private JettySolrRunner createAndStartJetty(SolrInstance instance) throws Exception {
|
||||
Properties nodeProperties = new Properties();
|
||||
nodeProperties.setProperty("solr.data.dir", instance.getDataDir());
|
||||
JettySolrRunner jetty = new JettySolrRunner(instance.getHomeDir(), nodeProperties, buildJettyConfig("/solr"));
|
||||
|
|
|
@ -26,7 +26,6 @@ import java.util.List;
|
|||
import java.util.Locale;
|
||||
import java.util.Map;
|
||||
|
||||
import org.apache.solr.cloud.AbstractZkTestCase;
|
||||
import org.apache.solr.cloud.ZkTestServer;
|
||||
import org.apache.solr.common.params.ModifiableSolrParams;
|
||||
import org.apache.solr.common.util.SuppressForbidden;
|
||||
|
@ -62,7 +61,7 @@ public class TestZKPropertiesWriter extends AbstractDataImportHandlerTestCase {
|
|||
System.setProperty("zkHost", zkServer.getZkAddress());
|
||||
System.setProperty("jetty.port", "0000");
|
||||
|
||||
AbstractZkTestCase.buildZooKeeper(zkServer.getZkHost(), zkServer.getZkAddress(), getFile("dih/solr"),
|
||||
zkServer.buildZooKeeper(getFile("dih/solr"),
|
||||
"dataimport-solrconfig.xml", "dataimport-schema.xml");
|
||||
|
||||
//initCore("solrconfig.xml", "schema.xml", getFile("dih/solr").getAbsolutePath());
|
||||
|
|
|
@ -18,14 +18,13 @@ package org.apache.solr.ltr;
|
|||
|
||||
import java.util.Iterator;
|
||||
import java.util.Map;
|
||||
import java.util.concurrent.Executor;
|
||||
import java.util.concurrent.ExecutorService;
|
||||
import java.util.concurrent.Semaphore;
|
||||
import java.util.concurrent.SynchronousQueue;
|
||||
import java.util.concurrent.TimeUnit;
|
||||
|
||||
import org.apache.solr.common.util.ExecutorUtil;
|
||||
import org.apache.solr.common.util.NamedList;
|
||||
import org.apache.solr.util.DefaultSolrThreadFactory;
|
||||
import org.apache.solr.core.CloseHook;
|
||||
import org.apache.solr.core.SolrCore;
|
||||
import org.apache.solr.util.SolrPluginUtils;
|
||||
import org.apache.solr.util.plugin.NamedListInitializedPlugin;
|
||||
|
||||
|
@ -58,7 +57,7 @@ import org.apache.solr.util.plugin.NamedListInitializedPlugin;
|
|||
* <code>totalPoolThreads</code> imposes a contention between the queries if
|
||||
* <code>(totalPoolThreads < numThreadsPerRequest * total parallel queries)</code>.
|
||||
*/
|
||||
final public class LTRThreadModule implements NamedListInitializedPlugin {
|
||||
final public class LTRThreadModule extends CloseHook implements NamedListInitializedPlugin {
|
||||
|
||||
public static LTRThreadModule getInstance(NamedList args) {
|
||||
|
||||
|
@ -103,13 +102,10 @@ final public class LTRThreadModule implements NamedListInitializedPlugin {
|
|||
// settings
|
||||
private int totalPoolThreads = 1;
|
||||
private int numThreadsPerRequest = 1;
|
||||
private int maxPoolSize = Integer.MAX_VALUE;
|
||||
private long keepAliveTimeSeconds = 10;
|
||||
private String threadNamePrefix = "ltrExecutor";
|
||||
|
||||
// implementation
|
||||
private Semaphore ltrSemaphore;
|
||||
private Executor createWeightScoreExecutor;
|
||||
private volatile ExecutorService createWeightScoreExecutor;
|
||||
|
||||
public LTRThreadModule() {
|
||||
}
|
||||
|
@ -132,13 +128,6 @@ final public class LTRThreadModule implements NamedListInitializedPlugin {
|
|||
} else {
|
||||
ltrSemaphore = null;
|
||||
}
|
||||
createWeightScoreExecutor = new ExecutorUtil.MDCAwareThreadPoolExecutor(
|
||||
0,
|
||||
maxPoolSize,
|
||||
keepAliveTimeSeconds, TimeUnit.SECONDS, // terminate idle threads after 10 sec
|
||||
new SynchronousQueue<Runnable>(), // directly hand off tasks
|
||||
new DefaultSolrThreadFactory(threadNamePrefix)
|
||||
);
|
||||
}
|
||||
|
||||
private void validate() {
|
||||
|
@ -161,18 +150,6 @@ final public class LTRThreadModule implements NamedListInitializedPlugin {
|
|||
this.numThreadsPerRequest = numThreadsPerRequest;
|
||||
}
|
||||
|
||||
public void setMaxPoolSize(int maxPoolSize) {
|
||||
this.maxPoolSize = maxPoolSize;
|
||||
}
|
||||
|
||||
public void setKeepAliveTimeSeconds(long keepAliveTimeSeconds) {
|
||||
this.keepAliveTimeSeconds = keepAliveTimeSeconds;
|
||||
}
|
||||
|
||||
public void setThreadNamePrefix(String threadNamePrefix) {
|
||||
this.threadNamePrefix = threadNamePrefix;
|
||||
}
|
||||
|
||||
public Semaphore createQuerySemaphore() {
|
||||
return (numThreadsPerRequest > 1 ? new Semaphore(numThreadsPerRequest) : null);
|
||||
}
|
||||
|
@ -189,4 +166,18 @@ final public class LTRThreadModule implements NamedListInitializedPlugin {
|
|||
createWeightScoreExecutor.execute(command);
|
||||
}
|
||||
|
||||
@Override
|
||||
public void preClose(SolrCore core) {
|
||||
ExecutorUtil.shutdownAndAwaitTermination(createWeightScoreExecutor);
|
||||
}
|
||||
|
||||
@Override
|
||||
public void postClose(SolrCore core) {
|
||||
|
||||
}
|
||||
|
||||
public void setExecutor(ExecutorService sharedExecutor) {
|
||||
this.createWeightScoreExecutor = sharedExecutor;
|
||||
}
|
||||
|
||||
}
|
||||
|
|
|
@ -204,7 +204,10 @@ public class LTRFeatureLoggerTransformerFactory extends TransformerFactory {
|
|||
"searcher is null");
|
||||
}
|
||||
leafContexts = searcher.getTopReaderContext().leaves();
|
||||
|
||||
if (threadManager != null) {
|
||||
threadManager.setExecutor(context.getRequest().getCore().getCoreContainer().getUpdateShardHandler().getUpdateExecutor());
|
||||
}
|
||||
|
||||
// Setup LTRScoringQuery
|
||||
scoringQuery = SolrQueryRequestContextUtils.getScoringQuery(req);
|
||||
docsWereNotReranked = (scoringQuery == null);
|
||||
|
|
|
@ -162,7 +162,9 @@ public class LTRQParserPlugin extends QParserPlugin implements ResourceLoaderAwa
|
|||
final String fvStoreName = SolrQueryRequestContextUtils.getFvStoreName(req);
|
||||
// Check if features are requested and if the model feature store and feature-transform feature store are the same
|
||||
final boolean featuresRequestedFromSameStore = (modelFeatureStoreName.equals(fvStoreName) || fvStoreName == null) ? extractFeatures:false;
|
||||
|
||||
if (threadManager != null) {
|
||||
threadManager.setExecutor(req.getCore().getCoreContainer().getUpdateShardHandler().getUpdateExecutor());
|
||||
}
|
||||
final LTRScoringQuery scoringQuery = new LTRScoringQuery(ltrScoringModel,
|
||||
extractEFIParams(localParams),
|
||||
featuresRequestedFromSameStore, threadManager);
|
||||
|
|
|
@ -25,7 +25,6 @@ import org.apache.solr.client.solrj.embedded.JettySolrRunner;
|
|||
import org.apache.solr.client.solrj.request.CollectionAdminRequest;
|
||||
import org.apache.solr.client.solrj.response.CollectionAdminResponse;
|
||||
import org.apache.solr.client.solrj.response.QueryResponse;
|
||||
import org.apache.solr.cloud.AbstractDistribZkTestBase;
|
||||
import org.apache.solr.cloud.MiniSolrCloudCluster;
|
||||
import org.apache.solr.common.SolrInputDocument;
|
||||
import org.apache.solr.common.cloud.ZkStateReader;
|
||||
|
@ -232,7 +231,7 @@ public class TestLTROnSolrCloud extends TestRerankBase {
|
|||
fail("Could not create collection. Response" + response.toString());
|
||||
}
|
||||
ZkStateReader zkStateReader = solrCluster.getSolrClient().getZkStateReader();
|
||||
AbstractDistribZkTestBase.waitForRecoveriesToFinish(name, zkStateReader, false, true, 100);
|
||||
solrCluster.waitForActiveCollection(name, numShards, numShards * numReplicas);
|
||||
}
|
||||
|
||||
|
||||
|
|
|
@ -38,8 +38,10 @@ public class JettyConfig {
|
|||
public final Map<Class<? extends Filter>, String> extraFilters;
|
||||
|
||||
public final SSLConfig sslConfig;
|
||||
|
||||
public final int portRetryTime;
|
||||
|
||||
private JettyConfig(int port, String context, boolean stopAtShutdown, Long waitForLoadingCoresToFinishMs, Map<ServletHolder, String> extraServlets,
|
||||
private JettyConfig(int port, int portRetryTime, String context, boolean stopAtShutdown, Long waitForLoadingCoresToFinishMs, Map<ServletHolder, String> extraServlets,
|
||||
Map<Class<? extends Filter>, String> extraFilters, SSLConfig sslConfig) {
|
||||
this.port = port;
|
||||
this.context = context;
|
||||
|
@ -48,6 +50,7 @@ public class JettyConfig {
|
|||
this.extraServlets = extraServlets;
|
||||
this.extraFilters = extraFilters;
|
||||
this.sslConfig = sslConfig;
|
||||
this.portRetryTime = portRetryTime;
|
||||
}
|
||||
|
||||
public static Builder builder() {
|
||||
|
@ -74,6 +77,7 @@ public class JettyConfig {
|
|||
Map<ServletHolder, String> extraServlets = new TreeMap<>();
|
||||
Map<Class<? extends Filter>, String> extraFilters = new LinkedHashMap<>();
|
||||
SSLConfig sslConfig = null;
|
||||
int portRetryTime = 60;
|
||||
|
||||
public Builder setPort(int port) {
|
||||
this.port = port;
|
||||
|
@ -121,9 +125,15 @@ public class JettyConfig {
|
|||
this.sslConfig = sslConfig;
|
||||
return this;
|
||||
}
|
||||
|
||||
public Builder withPortRetryTime(int portRetryTime) {
|
||||
this.portRetryTime = portRetryTime;
|
||||
return this;
|
||||
}
|
||||
|
||||
|
||||
public JettyConfig build() {
|
||||
return new JettyConfig(port, context, stopAtShutdown, waitForLoadingCoresToFinishMs, extraServlets, extraFilters, sslConfig);
|
||||
return new JettyConfig(port, portRetryTime, context, stopAtShutdown, waitForLoadingCoresToFinishMs, extraServlets, extraFilters, sslConfig);
|
||||
}
|
||||
|
||||
}
|
||||
|
|
|
@ -16,18 +16,9 @@
|
|||
*/
|
||||
package org.apache.solr.client.solrj.embedded;
|
||||
|
||||
import javax.servlet.DispatcherType;
|
||||
import javax.servlet.Filter;
|
||||
import javax.servlet.FilterChain;
|
||||
import javax.servlet.FilterConfig;
|
||||
import javax.servlet.ServletException;
|
||||
import javax.servlet.ServletRequest;
|
||||
import javax.servlet.ServletResponse;
|
||||
import javax.servlet.http.HttpServlet;
|
||||
import javax.servlet.http.HttpServletRequest;
|
||||
import javax.servlet.http.HttpServletResponse;
|
||||
import java.io.IOException;
|
||||
import java.lang.invoke.MethodHandles;
|
||||
import java.net.BindException;
|
||||
import java.net.MalformedURLException;
|
||||
import java.net.URL;
|
||||
import java.util.ArrayList;
|
||||
|
@ -41,10 +32,24 @@ import java.util.concurrent.TimeUnit;
|
|||
import java.util.concurrent.atomic.AtomicInteger;
|
||||
import java.util.concurrent.atomic.AtomicLong;
|
||||
|
||||
import javax.servlet.DispatcherType;
|
||||
import javax.servlet.Filter;
|
||||
import javax.servlet.FilterChain;
|
||||
import javax.servlet.FilterConfig;
|
||||
import javax.servlet.ServletException;
|
||||
import javax.servlet.ServletRequest;
|
||||
import javax.servlet.ServletResponse;
|
||||
import javax.servlet.http.HttpServlet;
|
||||
import javax.servlet.http.HttpServletRequest;
|
||||
import javax.servlet.http.HttpServletResponse;
|
||||
|
||||
import org.apache.solr.client.solrj.SolrClient;
|
||||
import org.apache.solr.client.solrj.cloud.SocketProxy;
|
||||
import org.apache.solr.client.solrj.impl.HttpSolrClient;
|
||||
import org.apache.solr.common.util.TimeSource;
|
||||
import org.apache.solr.core.CoreContainer;
|
||||
import org.apache.solr.servlet.SolrDispatchFilter;
|
||||
import org.apache.solr.util.TimeOut;
|
||||
import org.eclipse.jetty.server.Connector;
|
||||
import org.eclipse.jetty.server.HttpConfiguration;
|
||||
import org.eclipse.jetty.server.HttpConnectionFactory;
|
||||
|
@ -61,6 +66,7 @@ import org.eclipse.jetty.servlet.Source;
|
|||
import org.eclipse.jetty.util.component.LifeCycle;
|
||||
import org.eclipse.jetty.util.ssl.SslContextFactory;
|
||||
import org.eclipse.jetty.util.thread.QueuedThreadPool;
|
||||
import org.eclipse.jetty.util.thread.ReservedThreadExecutor;
|
||||
import org.slf4j.Logger;
|
||||
import org.slf4j.LoggerFactory;
|
||||
import org.slf4j.MDC;
|
||||
|
@ -80,8 +86,8 @@ public class JettySolrRunner {
|
|||
|
||||
Server server;
|
||||
|
||||
FilterHolder dispatchFilter;
|
||||
FilterHolder debugFilter;
|
||||
volatile FilterHolder dispatchFilter;
|
||||
volatile FilterHolder debugFilter;
|
||||
|
||||
private boolean waitOnSolr = false;
|
||||
private int jettyPort = -1;
|
||||
|
@ -98,6 +104,16 @@ public class JettySolrRunner {
|
|||
|
||||
private int proxyPort = -1;
|
||||
|
||||
private final boolean enableProxy;
|
||||
|
||||
private SocketProxy proxy;
|
||||
|
||||
private String protocol;
|
||||
|
||||
private String host;
|
||||
|
||||
private volatile boolean started = false;
|
||||
|
||||
public static class DebugFilter implements Filter {
|
||||
private static final Logger log = LoggerFactory.getLogger(MethodHandles.lookup().lookupClass());
|
||||
|
||||
|
@ -189,7 +205,7 @@ public class JettySolrRunner {
|
|||
public JettySolrRunner(String solrHome, JettyConfig config) {
|
||||
this(solrHome, new Properties(), config);
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* Construct a JettySolrRunner
|
||||
*
|
||||
|
@ -200,10 +216,33 @@ public class JettySolrRunner {
|
|||
* @param config the configuration
|
||||
*/
|
||||
public JettySolrRunner(String solrHome, Properties nodeProperties, JettyConfig config) {
|
||||
this(solrHome, nodeProperties, config, false);
|
||||
}
|
||||
|
||||
/**
|
||||
* Construct a JettySolrRunner
|
||||
*
|
||||
* After construction, you must start the jetty with {@link #start()}
|
||||
*
|
||||
* @param solrHome the solrHome to use
|
||||
* @param nodeProperties the container properties
|
||||
* @param config the configuration
|
||||
* @param enableProxy enables proxy feature to disable connections
|
||||
*/
|
||||
public JettySolrRunner(String solrHome, Properties nodeProperties, JettyConfig config, boolean enableProxy) {
|
||||
this.enableProxy = enableProxy;
|
||||
this.solrHome = solrHome;
|
||||
this.config = config;
|
||||
this.nodeProperties = nodeProperties;
|
||||
|
||||
if (enableProxy) {
|
||||
try {
|
||||
proxy = new SocketProxy(0, config.sslConfig != null && config.sslConfig.isSSLMode());
|
||||
} catch (Exception e) {
|
||||
throw new RuntimeException(e);
|
||||
}
|
||||
setProxyPort(proxy.getListenPort());
|
||||
}
|
||||
|
||||
this.init(this.config.port);
|
||||
}
|
||||
|
@ -213,7 +252,7 @@ public class JettySolrRunner {
|
|||
QueuedThreadPool qtp = new QueuedThreadPool();
|
||||
qtp.setMaxThreads(THREAD_POOL_MAX_THREADS);
|
||||
qtp.setIdleTimeout(THREAD_POOL_MAX_IDLE_TIME_MS);
|
||||
qtp.setStopTimeout((int) TimeUnit.MINUTES.toMillis(1));
|
||||
qtp.setReservedThreads(0);
|
||||
server = new Server(qtp);
|
||||
server.manage(qtp);
|
||||
server.setStopAtShutdown(config.stopAtShutdown);
|
||||
|
@ -246,7 +285,7 @@ public class JettySolrRunner {
|
|||
connector.setPort(port);
|
||||
connector.setHost("127.0.0.1");
|
||||
connector.setIdleTimeout(THREAD_POOL_MAX_IDLE_TIME_MS);
|
||||
|
||||
connector.setStopTimeout(0);
|
||||
server.setConnectors(new Connector[] {connector});
|
||||
server.setSessionIdManager(new DefaultSessionIdManager(server, new Random()));
|
||||
} else {
|
||||
|
@ -271,10 +310,7 @@ public class JettySolrRunner {
|
|||
|
||||
@Override
|
||||
public void lifeCycleStarting(LifeCycle arg0) {
|
||||
synchronized (JettySolrRunner.this) {
|
||||
waitOnSolr = true;
|
||||
JettySolrRunner.this.notify();
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
@Override
|
||||
|
@ -306,6 +342,11 @@ public class JettySolrRunner {
|
|||
dispatchFilter.setHeldClass(SolrDispatchFilter.class);
|
||||
dispatchFilter.setInitParameter("excludePatterns", excludePatterns);
|
||||
root.addFilter(dispatchFilter, "*", EnumSet.of(DispatcherType.REQUEST));
|
||||
|
||||
synchronized (JettySolrRunner.this) {
|
||||
waitOnSolr = true;
|
||||
JettySolrRunner.this.notify();
|
||||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
|
@ -344,15 +385,19 @@ public class JettySolrRunner {
|
|||
}
|
||||
|
||||
public String getNodeName() {
|
||||
if (getCoreContainer() == null) {
|
||||
return null;
|
||||
}
|
||||
return getCoreContainer().getZkController().getNodeName();
|
||||
}
|
||||
|
||||
public boolean isRunning() {
|
||||
return server.isRunning();
|
||||
return server.isRunning() && dispatchFilter != null && dispatchFilter.isRunning();
|
||||
}
|
||||
|
||||
public boolean isStopped() {
|
||||
return server.isStopped();
|
||||
return (server.isStopped() && dispatchFilter == null) || (server.isStopped() && dispatchFilter.isStopped()
|
||||
&& ((QueuedThreadPool) server.getThreadPool()).isStopped());
|
||||
}
|
||||
|
||||
// ------------------------------------------------------------------------------------------------
|
||||
|
@ -382,31 +427,53 @@ public class JettySolrRunner {
|
|||
// Do not let Jetty/Solr pollute the MDC for this thread
|
||||
Map<String, String> prevContext = MDC.getCopyOfContextMap();
|
||||
MDC.clear();
|
||||
|
||||
log.info("Start Jetty (original configured port={})", this.config.port);
|
||||
|
||||
try {
|
||||
int port = reusePort && jettyPort != -1 ? jettyPort : this.config.port;
|
||||
|
||||
// if started before, make a new server
|
||||
if (startedBefore) {
|
||||
waitOnSolr = false;
|
||||
int port = reusePort ? jettyPort : this.config.port;
|
||||
init(port);
|
||||
} else {
|
||||
startedBefore = true;
|
||||
}
|
||||
|
||||
if (!server.isRunning()) {
|
||||
server.start();
|
||||
if (config.portRetryTime > 0) {
|
||||
retryOnPortBindFailure(config.portRetryTime, port);
|
||||
} else {
|
||||
server.start();
|
||||
}
|
||||
}
|
||||
synchronized (JettySolrRunner.this) {
|
||||
int cnt = 0;
|
||||
while (!waitOnSolr) {
|
||||
while (!waitOnSolr || !dispatchFilter.isRunning() || getCoreContainer() == null) {
|
||||
this.wait(100);
|
||||
if (cnt++ == 5) {
|
||||
if (cnt++ == 15) {
|
||||
throw new RuntimeException("Jetty/Solr unresponsive");
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if (config.waitForLoadingCoresToFinishMs != null && config.waitForLoadingCoresToFinishMs > 0L) waitForLoadingCoresToFinish(config.waitForLoadingCoresToFinishMs);
|
||||
if (config.waitForLoadingCoresToFinishMs != null && config.waitForLoadingCoresToFinishMs > 0L) {
|
||||
waitForLoadingCoresToFinish(config.waitForLoadingCoresToFinishMs);
|
||||
}
|
||||
|
||||
setProtocolAndHost();
|
||||
|
||||
if (enableProxy) {
|
||||
if (started) {
|
||||
proxy.reopen();
|
||||
} else {
|
||||
proxy.open(getBaseUrl().toURI());
|
||||
}
|
||||
}
|
||||
|
||||
} finally {
|
||||
started = true;
|
||||
if (prevContext != null) {
|
||||
MDC.setContextMap(prevContext);
|
||||
} else {
|
||||
|
@ -415,6 +482,43 @@ public class JettySolrRunner {
|
|||
}
|
||||
}
|
||||
|
||||
|
||||
private void setProtocolAndHost() {
|
||||
String protocol = null;
|
||||
|
||||
Connector[] conns = server.getConnectors();
|
||||
if (0 == conns.length) {
|
||||
throw new IllegalStateException("Jetty Server has no Connectors");
|
||||
}
|
||||
ServerConnector c = (ServerConnector) conns[0];
|
||||
|
||||
protocol = c.getDefaultProtocol().startsWith("SSL") ? "https" : "http";
|
||||
|
||||
this.protocol = protocol;
|
||||
this.host = c.getHost();
|
||||
}
|
||||
|
||||
private void retryOnPortBindFailure(int portRetryTime, int port) throws Exception, InterruptedException {
|
||||
TimeOut timeout = new TimeOut(portRetryTime, TimeUnit.SECONDS, TimeSource.NANO_TIME);
|
||||
int tryCnt = 1;
|
||||
while (true) {
|
||||
try {
|
||||
log.info("Trying to start Jetty on port {} try number {} ...", port, tryCnt++);
|
||||
server.start();
|
||||
break;
|
||||
} catch (BindException e) {
|
||||
log.info("Port is in use, will try again until timeout of " + timeout);
|
||||
server.stop();
|
||||
Thread.sleep(3000);
|
||||
if (!timeout.hasTimedOut()) {
|
||||
continue;
|
||||
}
|
||||
|
||||
throw e;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Stop the Jetty server
|
||||
*
|
||||
|
@ -422,11 +526,33 @@ public class JettySolrRunner {
|
|||
*/
|
||||
public void stop() throws Exception {
|
||||
// Do not let Jetty/Solr pollute the MDC for this thread
|
||||
Map<String, String> prevContext = MDC.getCopyOfContextMap();
|
||||
Map<String,String> prevContext = MDC.getCopyOfContextMap();
|
||||
MDC.clear();
|
||||
try {
|
||||
Filter filter = dispatchFilter.getFilter();
|
||||
|
||||
// we want to shutdown outside of jetty cutting us off
|
||||
SolrDispatchFilter sdf = getSolrDispatchFilter();
|
||||
Thread shutdownThead = null;
|
||||
if (sdf != null) {
|
||||
shutdownThead = new Thread() {
|
||||
|
||||
public void run() {
|
||||
try {
|
||||
sdf.close();
|
||||
} catch (Throwable t) {
|
||||
log.error("Error shutting down Solr", t);
|
||||
}
|
||||
}
|
||||
|
||||
};
|
||||
sdf.closeOnDestroy(false);
|
||||
shutdownThead.start();
|
||||
}
|
||||
|
||||
QueuedThreadPool qtp = (QueuedThreadPool) server.getThreadPool();
|
||||
ReservedThreadExecutor rte = qtp.getBean(ReservedThreadExecutor.class);
|
||||
|
||||
server.stop();
|
||||
|
||||
if (server.getState().equals(Server.FAILED)) {
|
||||
|
@ -438,9 +564,48 @@ public class JettySolrRunner {
|
|||
}
|
||||
}
|
||||
|
||||
server.join();
|
||||
// stop timeout is 0, so we will interrupt right away
|
||||
while(!qtp.isStopped()) {
|
||||
qtp.stop();
|
||||
if (qtp.isStopped()) {
|
||||
Thread.sleep(50);
|
||||
}
|
||||
}
|
||||
|
||||
// we tried to kill everything, now we wait for executor to stop
|
||||
qtp.setStopTimeout(Integer.MAX_VALUE);
|
||||
qtp.stop();
|
||||
qtp.join();
|
||||
|
||||
if (rte != null) {
|
||||
// we try and wait for the reserved thread executor, but it doesn't always seem to work
|
||||
// so we actually set 0 reserved threads at creation
|
||||
|
||||
rte.stop();
|
||||
|
||||
TimeOut timeout = new TimeOut(30, TimeUnit.SECONDS, TimeSource.NANO_TIME);
|
||||
timeout.waitFor("Timeout waiting for reserved executor to stop.", ()
|
||||
-> rte.isStopped());
|
||||
}
|
||||
|
||||
if (shutdownThead != null) {
|
||||
shutdownThead.join();
|
||||
}
|
||||
|
||||
do {
|
||||
try {
|
||||
server.join();
|
||||
} catch (InterruptedException e) {
|
||||
// ignore
|
||||
}
|
||||
} while (!server.isStopped());
|
||||
|
||||
} finally {
|
||||
if (prevContext != null) {
|
||||
if (enableProxy) {
|
||||
proxy.close();
|
||||
}
|
||||
|
||||
if (prevContext != null) {
|
||||
MDC.setContextMap(prevContext);
|
||||
} else {
|
||||
MDC.clear();
|
||||
|
@ -461,15 +626,30 @@ public class JettySolrRunner {
|
|||
return ((ServerConnector) conns[0]).getLocalPort();
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* Returns the Local Port of the jetty Server.
|
||||
*
|
||||
* @exception RuntimeException if there is no Connector
|
||||
*/
|
||||
public int getLocalPort() {
|
||||
return getLocalPort(false);
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns the Local Port of the jetty Server.
|
||||
*
|
||||
* @param internalPort pass true to get the true jetty port rather than the proxy port if configured
|
||||
*
|
||||
* @exception RuntimeException if there is no Connector
|
||||
*/
|
||||
public int getLocalPort(boolean internalPort) {
|
||||
if (jettyPort == -1) {
|
||||
throw new IllegalStateException("You cannot get the port until this instance has started");
|
||||
}
|
||||
if (internalPort ) {
|
||||
return jettyPort;
|
||||
}
|
||||
return (proxyPort != -1) ? proxyPort : jettyPort;
|
||||
}
|
||||
|
||||
|
@ -481,29 +661,27 @@ public class JettySolrRunner {
|
|||
public void setProxyPort(int proxyPort) {
|
||||
this.proxyPort = proxyPort;
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* Returns a base URL consisting of the protocol, host, and port for a
|
||||
* Connector in use by the Jetty Server contained in this runner.
|
||||
*/
|
||||
public URL getBaseUrl() {
|
||||
String protocol = null;
|
||||
try {
|
||||
Connector[] conns = server.getConnectors();
|
||||
if (0 == conns.length) {
|
||||
throw new IllegalStateException("Jetty Server has no Connectors");
|
||||
}
|
||||
ServerConnector c = (ServerConnector) conns[0];
|
||||
if (c.getLocalPort() < 0) {
|
||||
throw new IllegalStateException("Jetty Connector is not open: " +
|
||||
c.getLocalPort());
|
||||
}
|
||||
protocol = c.getDefaultProtocol().startsWith("SSL") ? "https" : "http";
|
||||
return new URL(protocol, c.getHost(), c.getLocalPort(), config.context);
|
||||
|
||||
return new URL(protocol, host, jettyPort, config.context);
|
||||
} catch (MalformedURLException e) {
|
||||
throw new IllegalStateException
|
||||
("Java could not make sense of protocol: " + protocol, e);
|
||||
throw new RuntimeException(e);
|
||||
}
|
||||
}
|
||||
/**
|
||||
* Returns a base URL consisting of the protocol, host, and port for a
|
||||
* Connector in use by the Jetty Server contained in this runner.
|
||||
*/
|
||||
public URL getProxyBaseUrl() {
|
||||
try {
|
||||
return new URL(protocol, host, getLocalPort(), config.context);
|
||||
} catch (MalformedURLException e) {
|
||||
throw new RuntimeException(e);
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -568,7 +746,11 @@ public class JettySolrRunner {
|
|||
CoreContainer cores = solrFilter.getCores();
|
||||
if (cores != null) {
|
||||
cores.waitForLoadingCoresToFinish(timeoutMs);
|
||||
} else {
|
||||
throw new IllegalStateException("The CoreContainer is not set!");
|
||||
}
|
||||
} else {
|
||||
throw new IllegalStateException("The dispatchFilter is not set!");
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -583,4 +765,8 @@ public class JettySolrRunner {
|
|||
this.delayValue = delay;
|
||||
}
|
||||
}
|
||||
|
||||
public SocketProxy getProxy() {
|
||||
return proxy;
|
||||
}
|
||||
}
|
||||
|
|
|
@ -73,6 +73,7 @@ public abstract class ElectionContext implements Closeable {
|
|||
|
||||
public ElectionContext(final String coreNodeName,
|
||||
final String electionPath, final String leaderPath, final ZkNodeProps leaderProps, final SolrZkClient zkClient) {
|
||||
assert zkClient != null;
|
||||
this.id = coreNodeName;
|
||||
this.electionPath = electionPath;
|
||||
this.leaderPath = leaderPath;
|
||||
|
@ -116,6 +117,7 @@ class ShardLeaderElectionContextBase extends ElectionContext {
|
|||
protected String collection;
|
||||
protected LeaderElector leaderElector;
|
||||
protected ZkStateReader zkStateReader;
|
||||
protected ZkController zkController;
|
||||
private Integer leaderZkNodeParentVersion;
|
||||
|
||||
// Prevents a race between cancelling and becoming leader.
|
||||
|
@ -123,15 +125,29 @@ class ShardLeaderElectionContextBase extends ElectionContext {
|
|||
|
||||
public ShardLeaderElectionContextBase(LeaderElector leaderElector,
|
||||
final String shardId, final String collection, final String coreNodeName,
|
||||
ZkNodeProps props, ZkStateReader zkStateReader) {
|
||||
ZkNodeProps props, ZkController zkController) {
|
||||
super(coreNodeName, ZkStateReader.COLLECTIONS_ZKNODE + "/" + collection
|
||||
+ "/leader_elect/" + shardId, ZkStateReader.getShardLeadersPath(
|
||||
collection, shardId), props, zkStateReader.getZkClient());
|
||||
collection, shardId), props, zkController.getZkClient());
|
||||
this.leaderElector = leaderElector;
|
||||
this.zkStateReader = zkController.getZkStateReader();
|
||||
this.zkClient = zkStateReader.getZkClient();
|
||||
this.zkStateReader = zkStateReader;
|
||||
this.zkController = zkController;
|
||||
this.shardId = shardId;
|
||||
this.collection = collection;
|
||||
|
||||
String parent = new Path(leaderPath).getParent().toString();
|
||||
ZkCmdExecutor zcmd = new ZkCmdExecutor(30000);
|
||||
// only if /collections/{collection} exists already do we succeed in creating this path
|
||||
log.info("make sure parent is created {}", parent);
|
||||
try {
|
||||
zcmd.ensureExists(parent, (byte[])null, CreateMode.PERSISTENT, zkClient, 2);
|
||||
} catch (KeeperException e) {
|
||||
throw new RuntimeException(e);
|
||||
} catch (InterruptedException e) {
|
||||
Thread.currentThread().interrupt();
|
||||
throw new RuntimeException(e);
|
||||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
|
@ -171,21 +187,12 @@ class ShardLeaderElectionContextBase extends ElectionContext {
|
|||
void runLeaderProcess(boolean weAreReplacement, int pauseBeforeStartMs)
|
||||
throws KeeperException, InterruptedException, IOException {
|
||||
// register as leader - if an ephemeral is already there, wait to see if it goes away
|
||||
|
||||
if (!zkClient.exists(ZkStateReader.COLLECTIONS_ZKNODE + "/" + collection, true)) {
|
||||
log.info("Will not register as leader because collection appears to be gone.");
|
||||
return;
|
||||
}
|
||||
|
||||
String parent = new Path(leaderPath).getParent().toString();
|
||||
ZkCmdExecutor zcmd = new ZkCmdExecutor(30000);
|
||||
// only if /collections/{collection} exists already do we succeed in creating this path
|
||||
zcmd.ensureExists(parent, (byte[])null, CreateMode.PERSISTENT, zkClient, 2);
|
||||
|
||||
String parent = new Path(leaderPath).getParent().toString();
|
||||
try {
|
||||
RetryUtil.retryOnThrowable(NodeExistsException.class, 60000, 5000, () -> {
|
||||
synchronized (lock) {
|
||||
log.debug("Creating leader registration node {} after winning as {}", leaderPath, leaderSeqPath);
|
||||
log.info("Creating leader registration node {} after winning as {}", leaderPath, leaderSeqPath);
|
||||
List<Op> ops = new ArrayList<>(2);
|
||||
|
||||
// We use a multi operation to get the parent nodes version, which will
|
||||
|
@ -210,6 +217,9 @@ class ShardLeaderElectionContextBase extends ElectionContext {
|
|||
assert leaderZkNodeParentVersion != null;
|
||||
}
|
||||
});
|
||||
} catch (NoNodeException e) {
|
||||
log.info("Will not register as leader because it seems the election is no longer taking place.");
|
||||
return;
|
||||
} catch (Throwable t) {
|
||||
if (t instanceof OutOfMemoryError) {
|
||||
throw (OutOfMemoryError) t;
|
||||
|
@ -235,7 +245,9 @@ class ShardLeaderElectionContextBase extends ElectionContext {
|
|||
ZkStateReader.BASE_URL_PROP, leaderProps.get(ZkStateReader.BASE_URL_PROP),
|
||||
ZkStateReader.CORE_NAME_PROP, leaderProps.get(ZkStateReader.CORE_NAME_PROP),
|
||||
ZkStateReader.STATE_PROP, Replica.State.ACTIVE.toString());
|
||||
Overseer.getStateUpdateQueue(zkClient).offer(Utils.toJSON(m));
|
||||
assert zkController != null;
|
||||
assert zkController.getOverseer() != null;
|
||||
zkController.getOverseer().offerStateUpdate(Utils.toJSON(m));
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -254,7 +266,6 @@ class ShardLeaderElectionContextBase extends ElectionContext {
|
|||
final class ShardLeaderElectionContext extends ShardLeaderElectionContextBase {
|
||||
private static final Logger log = LoggerFactory.getLogger(MethodHandles.lookup().lookupClass());
|
||||
|
||||
private final ZkController zkController;
|
||||
private final CoreContainer cc;
|
||||
private final SyncStrategy syncStrategy;
|
||||
|
||||
|
@ -264,8 +275,7 @@ final class ShardLeaderElectionContext extends ShardLeaderElectionContextBase {
|
|||
final String shardId, final String collection,
|
||||
final String coreNodeName, ZkNodeProps props, ZkController zkController, CoreContainer cc) {
|
||||
super(leaderElector, shardId, collection, coreNodeName, props,
|
||||
zkController.getZkStateReader());
|
||||
this.zkController = zkController;
|
||||
zkController);
|
||||
this.cc = cc;
|
||||
syncStrategy = new SyncStrategy(cc);
|
||||
}
|
||||
|
@ -304,11 +314,8 @@ final class ShardLeaderElectionContext extends ShardLeaderElectionContextBase {
|
|||
ActionThrottle lt;
|
||||
try (SolrCore core = cc.getCore(coreName)) {
|
||||
if (core == null ) {
|
||||
if (cc.isShutDown()) {
|
||||
return;
|
||||
} else {
|
||||
throw new SolrException(ErrorCode.SERVER_ERROR, "SolrCore not found:" + coreName + " in " + cc.getLoadedCoreNames());
|
||||
}
|
||||
// shutdown or removed
|
||||
return;
|
||||
}
|
||||
MDCLoggingContext.setCore(core);
|
||||
lt = core.getUpdateHandler().getSolrCoreState().getLeaderThrottle();
|
||||
|
@ -326,7 +333,7 @@ final class ShardLeaderElectionContext extends ShardLeaderElectionContextBase {
|
|||
// Clear the leader in clusterstate. We only need to worry about this if there is actually more than one replica.
|
||||
ZkNodeProps m = new ZkNodeProps(Overseer.QUEUE_OPERATION, OverseerAction.LEADER.toLower(),
|
||||
ZkStateReader.SHARD_ID_PROP, shardId, ZkStateReader.COLLECTION_PROP, collection);
|
||||
Overseer.getStateUpdateQueue(zkClient).offer(Utils.toJSON(m));
|
||||
zkController.getOverseer().getStateUpdateQueue().offer(Utils.toJSON(m));
|
||||
}
|
||||
|
||||
boolean allReplicasInLine = false;
|
||||
|
@ -349,13 +356,7 @@ final class ShardLeaderElectionContext extends ShardLeaderElectionContextBase {
|
|||
try (SolrCore core = cc.getCore(coreName)) {
|
||||
|
||||
if (core == null) {
|
||||
if (!zkController.getCoreContainer().isShutDown()) {
|
||||
cancelElection();
|
||||
throw new SolrException(ErrorCode.SERVER_ERROR,
|
||||
"SolrCore not found:" + coreName + " in " + cc.getLoadedCoreNames());
|
||||
} else {
|
||||
return;
|
||||
}
|
||||
return;
|
||||
}
|
||||
|
||||
replicaType = core.getCoreDescriptor().getCloudDescriptor().getReplicaType();
|
||||
|
@ -698,7 +699,8 @@ final class ShardLeaderElectionContext extends ShardLeaderElectionContextBase {
|
|||
final class OverseerElectionContext extends ElectionContext {
|
||||
private static final Logger log = LoggerFactory.getLogger(MethodHandles.lookup().lookupClass());
|
||||
private final SolrZkClient zkClient;
|
||||
private Overseer overseer;
|
||||
private final Overseer overseer;
|
||||
private volatile boolean isClosed = false;
|
||||
|
||||
public OverseerElectionContext(SolrZkClient zkClient, Overseer overseer, final String zkNodeName) {
|
||||
super(zkNodeName, Overseer.OVERSEER_ELECT, Overseer.OVERSEER_ELECT + "/leader", null, zkClient);
|
||||
|
@ -732,8 +734,10 @@ final class OverseerElectionContext extends ElectionContext {
|
|||
log.warn("Wait interrupted ", e);
|
||||
}
|
||||
}
|
||||
if (!overseer.getZkController().isClosed() && !overseer.getZkController().getCoreContainer().isShutDown()) {
|
||||
overseer.start(id);
|
||||
synchronized (this) {
|
||||
if (!this.isClosed && !overseer.getZkController().getCoreContainer().isShutDown()) {
|
||||
overseer.start(id);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -744,7 +748,8 @@ final class OverseerElectionContext extends ElectionContext {
|
|||
}
|
||||
|
||||
@Override
|
||||
public void close() {
|
||||
public synchronized void close() {
|
||||
this.isClosed = true;
|
||||
overseer.close();
|
||||
}
|
||||
|
||||
|
|
|
@ -26,6 +26,7 @@ import java.util.regex.Matcher;
|
|||
import java.util.regex.Pattern;
|
||||
|
||||
import org.apache.solr.cloud.ZkController.ContextKey;
|
||||
import org.apache.solr.common.AlreadyClosedException;
|
||||
import org.apache.solr.common.SolrException;
|
||||
import org.apache.solr.common.cloud.SolrZkClient;
|
||||
import org.apache.solr.common.cloud.ZkCmdExecutor;
|
||||
|
@ -346,6 +347,8 @@ public class LeaderElector {
|
|||
try {
|
||||
// am I the next leader?
|
||||
checkIfIamLeader(context, true);
|
||||
} catch (AlreadyClosedException e) {
|
||||
|
||||
} catch (Exception e) {
|
||||
if (!zkClient.isClosed()) {
|
||||
log.warn("", e);
|
||||
|
|
|
@ -16,6 +16,8 @@
|
|||
*/
|
||||
package org.apache.solr.cloud;
|
||||
|
||||
import static org.apache.solr.common.params.CommonParams.ID;
|
||||
|
||||
import java.io.Closeable;
|
||||
import java.io.IOException;
|
||||
import java.lang.invoke.MethodHandles;
|
||||
|
@ -26,7 +28,6 @@ import java.util.List;
|
|||
import java.util.Map;
|
||||
import java.util.Set;
|
||||
|
||||
import com.codahale.metrics.Timer;
|
||||
import org.apache.solr.client.solrj.cloud.SolrCloudManager;
|
||||
import org.apache.solr.client.solrj.impl.ClusterStateProvider;
|
||||
import org.apache.solr.cloud.api.collections.OverseerCollectionMessageHandler;
|
||||
|
@ -39,9 +40,11 @@ import org.apache.solr.cloud.overseer.ReplicaMutator;
|
|||
import org.apache.solr.cloud.overseer.SliceMutator;
|
||||
import org.apache.solr.cloud.overseer.ZkStateWriter;
|
||||
import org.apache.solr.cloud.overseer.ZkWriteCommand;
|
||||
import org.apache.solr.common.AlreadyClosedException;
|
||||
import org.apache.solr.common.SolrCloseable;
|
||||
import org.apache.solr.common.SolrException;
|
||||
import org.apache.solr.common.cloud.ClusterState;
|
||||
import org.apache.solr.common.cloud.ConnectionManager;
|
||||
import org.apache.solr.common.cloud.SolrZkClient;
|
||||
import org.apache.solr.common.cloud.ZkNodeProps;
|
||||
import org.apache.solr.common.cloud.ZkStateReader;
|
||||
|
@ -53,7 +56,7 @@ import org.apache.solr.common.util.Utils;
|
|||
import org.apache.solr.core.CloudConfig;
|
||||
import org.apache.solr.core.CoreContainer;
|
||||
import org.apache.solr.handler.admin.CollectionsHandler;
|
||||
import org.apache.solr.handler.component.ShardHandler;
|
||||
import org.apache.solr.handler.component.HttpShardHandler;
|
||||
import org.apache.solr.logging.MDCLoggingContext;
|
||||
import org.apache.solr.update.UpdateShardHandler;
|
||||
import org.apache.zookeeper.CreateMode;
|
||||
|
@ -61,7 +64,7 @@ import org.apache.zookeeper.KeeperException;
|
|||
import org.slf4j.Logger;
|
||||
import org.slf4j.LoggerFactory;
|
||||
|
||||
import static org.apache.solr.common.params.CommonParams.ID;
|
||||
import com.codahale.metrics.Timer;
|
||||
|
||||
/**
|
||||
* Cluster leader. Responsible for processing state updates, node assignments, creating/deleting
|
||||
|
@ -107,7 +110,7 @@ public class Overseer implements SolrCloseable {
|
|||
public ClusterStateUpdater(final ZkStateReader reader, final String myId, Stats zkStats) {
|
||||
this.zkClient = reader.getZkClient();
|
||||
this.zkStats = zkStats;
|
||||
this.stateUpdateQueue = getStateUpdateQueue(zkClient, zkStats);
|
||||
this.stateUpdateQueue = getStateUpdateQueue(zkStats);
|
||||
this.workQueue = getInternalWorkQueue(zkClient, zkStats);
|
||||
this.failureMap = getFailureMap(zkClient);
|
||||
this.runningMap = getRunningMap(zkClient);
|
||||
|
@ -188,6 +191,8 @@ public class Overseer implements SolrCloseable {
|
|||
// the workQueue is empty now, use stateUpdateQueue as fallback queue
|
||||
fallbackQueue = stateUpdateQueue;
|
||||
fallbackQueueSize = 0;
|
||||
} catch (AlreadyClosedException e) {
|
||||
return;
|
||||
} catch (KeeperException.SessionExpiredException e) {
|
||||
log.warn("Solr cannot talk to ZK, exiting Overseer work queue loop", e);
|
||||
return;
|
||||
|
@ -211,6 +216,8 @@ public class Overseer implements SolrCloseable {
|
|||
} catch (InterruptedException e) {
|
||||
Thread.currentThread().interrupt();
|
||||
return;
|
||||
} catch (AlreadyClosedException e) {
|
||||
|
||||
} catch (Exception e) {
|
||||
log.error("Exception in Overseer main queue loop", e);
|
||||
}
|
||||
|
@ -247,6 +254,8 @@ public class Overseer implements SolrCloseable {
|
|||
} catch (InterruptedException e) {
|
||||
Thread.currentThread().interrupt();
|
||||
return;
|
||||
} catch (AlreadyClosedException e) {
|
||||
|
||||
} catch (Exception e) {
|
||||
log.error("Exception in Overseer main queue loop", e);
|
||||
refreshClusterState = true; // it might have been a bad version error
|
||||
|
@ -308,8 +317,10 @@ public class Overseer implements SolrCloseable {
|
|||
byte[] data;
|
||||
try {
|
||||
data = zkClient.getData(path, null, stat, true);
|
||||
} catch (AlreadyClosedException e) {
|
||||
return;
|
||||
} catch (Exception e) {
|
||||
log.error("could not read the "+path+" data" ,e);
|
||||
log.warn("Error communicating with ZooKeeper", e);
|
||||
return;
|
||||
}
|
||||
try {
|
||||
|
@ -437,6 +448,11 @@ public class Overseer implements SolrCloseable {
|
|||
} catch (InterruptedException e) {
|
||||
success = false;
|
||||
Thread.currentThread().interrupt();
|
||||
} catch (AlreadyClosedException e) {
|
||||
success = false;
|
||||
} catch (Exception e) {
|
||||
success = false;
|
||||
log.warn("Unexpected exception", e);
|
||||
} finally {
|
||||
timerContext.stop();
|
||||
if (success) {
|
||||
|
@ -495,7 +511,7 @@ public class Overseer implements SolrCloseable {
|
|||
|
||||
private final ZkStateReader reader;
|
||||
|
||||
private final ShardHandler shardHandler;
|
||||
private final HttpShardHandler shardHandler;
|
||||
|
||||
private final UpdateShardHandler updateShardHandler;
|
||||
|
||||
|
@ -507,11 +523,11 @@ public class Overseer implements SolrCloseable {
|
|||
|
||||
private Stats stats;
|
||||
private String id;
|
||||
private boolean closed;
|
||||
private volatile boolean closed;
|
||||
private CloudConfig config;
|
||||
|
||||
// overseer not responsible for closing reader
|
||||
public Overseer(ShardHandler shardHandler,
|
||||
public Overseer(HttpShardHandler shardHandler,
|
||||
UpdateShardHandler updateShardHandler, String adminPath,
|
||||
final ZkStateReader reader, ZkController zkController, CloudConfig config)
|
||||
throws KeeperException, InterruptedException {
|
||||
|
@ -541,7 +557,7 @@ public class Overseer implements SolrCloseable {
|
|||
|
||||
ThreadGroup ccTg = new ThreadGroup("Overseer collection creation process.");
|
||||
|
||||
OverseerNodePrioritizer overseerPrioritizer = new OverseerNodePrioritizer(reader, adminPath, shardHandler.getShardHandlerFactory());
|
||||
OverseerNodePrioritizer overseerPrioritizer = new OverseerNodePrioritizer(reader, getStateUpdateQueue(), adminPath, shardHandler.getShardHandlerFactory(), updateShardHandler.getDefaultHttpClient());
|
||||
overseerCollectionConfigSetProcessor = new OverseerCollectionConfigSetProcessor(reader, id, shardHandler, adminPath, stats, Overseer.this, overseerPrioritizer);
|
||||
ccThread = new OverseerThread(ccTg, overseerCollectionConfigSetProcessor, "OverseerCollectionConfigSetProcessor-" + id);
|
||||
ccThread.setDaemon(true);
|
||||
|
@ -554,9 +570,8 @@ public class Overseer implements SolrCloseable {
|
|||
updaterThread.start();
|
||||
ccThread.start();
|
||||
triggerThread.start();
|
||||
if (this.id != null) {
|
||||
assert ObjectReleaseTracker.track(this);
|
||||
}
|
||||
|
||||
assert ObjectReleaseTracker.track(this);
|
||||
}
|
||||
|
||||
public Stats getStats() {
|
||||
|
@ -595,16 +610,13 @@ public class Overseer implements SolrCloseable {
|
|||
}
|
||||
|
||||
public synchronized void close() {
|
||||
if (closed) return;
|
||||
if (this.id != null) {
|
||||
log.info("Overseer (id=" + id + ") closing");
|
||||
}
|
||||
|
||||
doClose();
|
||||
this.closed = true;
|
||||
if (this.id != null) {
|
||||
assert ObjectReleaseTracker.release(this);
|
||||
}
|
||||
doClose();
|
||||
|
||||
assert ObjectReleaseTracker.release(this);
|
||||
}
|
||||
|
||||
@Override
|
||||
|
@ -660,11 +672,10 @@ public class Overseer implements SolrCloseable {
|
|||
* <p>
|
||||
* This method will create the /overseer znode in ZooKeeper if it does not exist already.
|
||||
*
|
||||
* @param zkClient the {@link SolrZkClient} to be used for reading/writing to the queue
|
||||
* @return a {@link ZkDistributedQueue} object
|
||||
*/
|
||||
public static ZkDistributedQueue getStateUpdateQueue(final SolrZkClient zkClient) {
|
||||
return getStateUpdateQueue(zkClient, new Stats());
|
||||
ZkDistributedQueue getStateUpdateQueue() {
|
||||
return getStateUpdateQueue(new Stats());
|
||||
}
|
||||
|
||||
/**
|
||||
|
@ -672,13 +683,15 @@ public class Overseer implements SolrCloseable {
|
|||
* This method should not be used directly by anyone other than the Overseer itself.
|
||||
* This method will create the /overseer znode in ZooKeeper if it does not exist already.
|
||||
*
|
||||
* @param zkClient the {@link SolrZkClient} to be used for reading/writing to the queue
|
||||
* @param zkStats a {@link Stats} object which tracks statistics for all zookeeper operations performed by this queue
|
||||
* @return a {@link ZkDistributedQueue} object
|
||||
*/
|
||||
static ZkDistributedQueue getStateUpdateQueue(final SolrZkClient zkClient, Stats zkStats) {
|
||||
createOverseerNode(zkClient);
|
||||
return new ZkDistributedQueue(zkClient, "/overseer/queue", zkStats, STATE_UPDATE_MAX_QUEUE);
|
||||
ZkDistributedQueue getStateUpdateQueue(Stats zkStats) {
|
||||
return new ZkDistributedQueue(reader.getZkClient(), "/overseer/queue", zkStats, STATE_UPDATE_MAX_QUEUE, new ConnectionManager.IsClosed(){
|
||||
public boolean isClosed() {
|
||||
return Overseer.this.isClosed() || zkController.getCoreContainer().isShutDown();
|
||||
}
|
||||
});
|
||||
}
|
||||
|
||||
/**
|
||||
|
@ -697,31 +710,26 @@ public class Overseer implements SolrCloseable {
|
|||
* @return a {@link ZkDistributedQueue} object
|
||||
*/
|
||||
static ZkDistributedQueue getInternalWorkQueue(final SolrZkClient zkClient, Stats zkStats) {
|
||||
createOverseerNode(zkClient);
|
||||
return new ZkDistributedQueue(zkClient, "/overseer/queue-work", zkStats);
|
||||
}
|
||||
|
||||
/* Internal map for failed tasks, not to be used outside of the Overseer */
|
||||
static DistributedMap getRunningMap(final SolrZkClient zkClient) {
|
||||
createOverseerNode(zkClient);
|
||||
return new DistributedMap(zkClient, "/overseer/collection-map-running");
|
||||
}
|
||||
|
||||
/* Size-limited map for successfully completed tasks*/
|
||||
static DistributedMap getCompletedMap(final SolrZkClient zkClient) {
|
||||
createOverseerNode(zkClient);
|
||||
return new SizeLimitedDistributedMap(zkClient, "/overseer/collection-map-completed", NUM_RESPONSES_TO_STORE, (child) -> getAsyncIdsMap(zkClient).remove(child));
|
||||
}
|
||||
|
||||
/* Map for failed tasks, not to be used outside of the Overseer */
|
||||
static DistributedMap getFailureMap(final SolrZkClient zkClient) {
|
||||
createOverseerNode(zkClient);
|
||||
return new SizeLimitedDistributedMap(zkClient, "/overseer/collection-map-failure", NUM_RESPONSES_TO_STORE, (child) -> getAsyncIdsMap(zkClient).remove(child));
|
||||
}
|
||||
|
||||
/* Map of async IDs currently in use*/
|
||||
static DistributedMap getAsyncIdsMap(final SolrZkClient zkClient) {
|
||||
createOverseerNode(zkClient);
|
||||
return new DistributedMap(zkClient, "/overseer/async_ids");
|
||||
}
|
||||
|
||||
|
@ -740,7 +748,7 @@ public class Overseer implements SolrCloseable {
|
|||
* @param zkClient the {@link SolrZkClient} to be used for reading/writing to the queue
|
||||
* @return a {@link ZkDistributedQueue} object
|
||||
*/
|
||||
static OverseerTaskQueue getCollectionQueue(final SolrZkClient zkClient) {
|
||||
OverseerTaskQueue getCollectionQueue(final SolrZkClient zkClient) {
|
||||
return getCollectionQueue(zkClient, new Stats());
|
||||
}
|
||||
|
||||
|
@ -758,8 +766,7 @@ public class Overseer implements SolrCloseable {
|
|||
* @param zkClient the {@link SolrZkClient} to be used for reading/writing to the queue
|
||||
* @return a {@link ZkDistributedQueue} object
|
||||
*/
|
||||
static OverseerTaskQueue getCollectionQueue(final SolrZkClient zkClient, Stats zkStats) {
|
||||
createOverseerNode(zkClient);
|
||||
OverseerTaskQueue getCollectionQueue(final SolrZkClient zkClient, Stats zkStats) {
|
||||
return new OverseerTaskQueue(zkClient, "/overseer/collection-queue-work", zkStats);
|
||||
}
|
||||
|
||||
|
@ -778,7 +785,7 @@ public class Overseer implements SolrCloseable {
|
|||
* @param zkClient the {@link SolrZkClient} to be used for reading/writing to the queue
|
||||
* @return a {@link ZkDistributedQueue} object
|
||||
*/
|
||||
static OverseerTaskQueue getConfigSetQueue(final SolrZkClient zkClient) {
|
||||
OverseerTaskQueue getConfigSetQueue(final SolrZkClient zkClient) {
|
||||
return getConfigSetQueue(zkClient, new Stats());
|
||||
}
|
||||
|
||||
|
@ -801,15 +808,14 @@ public class Overseer implements SolrCloseable {
|
|||
* @param zkClient the {@link SolrZkClient} to be used for reading/writing to the queue
|
||||
* @return a {@link ZkDistributedQueue} object
|
||||
*/
|
||||
static OverseerTaskQueue getConfigSetQueue(final SolrZkClient zkClient, Stats zkStats) {
|
||||
OverseerTaskQueue getConfigSetQueue(final SolrZkClient zkClient, Stats zkStats) {
|
||||
// For now, we use the same queue as the collection queue, but ensure
|
||||
// that the actions are prefixed with a unique string.
|
||||
createOverseerNode(zkClient);
|
||||
return getCollectionQueue(zkClient, zkStats);
|
||||
}
|
||||
|
||||
|
||||
private static void createOverseerNode(final SolrZkClient zkClient) {
|
||||
private void createOverseerNode(final SolrZkClient zkClient) {
|
||||
try {
|
||||
zkClient.create("/overseer", new byte[0], CreateMode.PERSISTENT, true);
|
||||
} catch (KeeperException.NodeExistsException e) {
|
||||
|
@ -823,6 +829,7 @@ public class Overseer implements SolrCloseable {
|
|||
throw new RuntimeException(e);
|
||||
}
|
||||
}
|
||||
|
||||
public static boolean isLegacy(ZkStateReader stateReader) {
|
||||
String legacyProperty = stateReader.getClusterProperty(ZkStateReader.LEGACY_CLOUD, "false");
|
||||
return "true".equals(legacyProperty);
|
||||
|
@ -837,4 +844,11 @@ public class Overseer implements SolrCloseable {
|
|||
return reader;
|
||||
}
|
||||
|
||||
public void offerStateUpdate(byte[] data) throws KeeperException, InterruptedException {
|
||||
if (zkController.getZkClient().isClosed()) {
|
||||
throw new AlreadyClosedException();
|
||||
}
|
||||
getStateUpdateQueue().offer(data);
|
||||
}
|
||||
|
||||
}
|
||||
|
|
|
@ -16,16 +16,16 @@
|
|||
*/
|
||||
package org.apache.solr.cloud;
|
||||
|
||||
import static org.apache.solr.cloud.OverseerConfigSetMessageHandler.CONFIGSETS_ACTION_PREFIX;
|
||||
|
||||
import java.io.IOException;
|
||||
|
||||
import org.apache.commons.io.IOUtils;
|
||||
import org.apache.solr.cloud.api.collections.OverseerCollectionMessageHandler;
|
||||
import org.apache.solr.common.cloud.ZkNodeProps;
|
||||
import org.apache.solr.common.cloud.ZkStateReader;
|
||||
import org.apache.solr.handler.component.ShardHandler;
|
||||
import org.apache.solr.handler.component.ShardHandlerFactory;
|
||||
|
||||
import static org.apache.solr.cloud.OverseerConfigSetMessageHandler.CONFIGSETS_ACTION_PREFIX;
|
||||
import org.apache.solr.handler.component.HttpShardHandler;
|
||||
import org.apache.solr.handler.component.HttpShardHandlerFactory;
|
||||
|
||||
/**
|
||||
* An {@link OverseerTaskProcessor} that handles:
|
||||
|
@ -35,18 +35,18 @@ import static org.apache.solr.cloud.OverseerConfigSetMessageHandler.CONFIGSETS_A
|
|||
public class OverseerCollectionConfigSetProcessor extends OverseerTaskProcessor {
|
||||
|
||||
public OverseerCollectionConfigSetProcessor(ZkStateReader zkStateReader, String myId,
|
||||
final ShardHandler shardHandler,
|
||||
final HttpShardHandler shardHandler,
|
||||
String adminPath, Stats stats, Overseer overseer,
|
||||
OverseerNodePrioritizer overseerNodePrioritizer) {
|
||||
this(
|
||||
zkStateReader,
|
||||
myId,
|
||||
shardHandler.getShardHandlerFactory(),
|
||||
(HttpShardHandlerFactory) shardHandler.getShardHandlerFactory(),
|
||||
adminPath,
|
||||
stats,
|
||||
overseer,
|
||||
overseerNodePrioritizer,
|
||||
Overseer.getCollectionQueue(zkStateReader.getZkClient(), stats),
|
||||
overseer.getCollectionQueue(zkStateReader.getZkClient(), stats),
|
||||
Overseer.getRunningMap(zkStateReader.getZkClient()),
|
||||
Overseer.getCompletedMap(zkStateReader.getZkClient()),
|
||||
Overseer.getFailureMap(zkStateReader.getZkClient())
|
||||
|
@ -54,7 +54,7 @@ public class OverseerCollectionConfigSetProcessor extends OverseerTaskProcessor
|
|||
}
|
||||
|
||||
protected OverseerCollectionConfigSetProcessor(ZkStateReader zkStateReader, String myId,
|
||||
final ShardHandlerFactory shardHandlerFactory,
|
||||
final HttpShardHandlerFactory shardHandlerFactory,
|
||||
String adminPath,
|
||||
Stats stats,
|
||||
Overseer overseer,
|
||||
|
@ -79,7 +79,7 @@ public class OverseerCollectionConfigSetProcessor extends OverseerTaskProcessor
|
|||
private static OverseerMessageHandlerSelector getOverseerMessageHandlerSelector(
|
||||
ZkStateReader zkStateReader,
|
||||
String myId,
|
||||
final ShardHandlerFactory shardHandlerFactory,
|
||||
final HttpShardHandlerFactory shardHandlerFactory,
|
||||
String adminPath,
|
||||
Stats stats,
|
||||
Overseer overseer,
|
||||
|
|
|
@ -20,6 +20,7 @@ import java.lang.invoke.MethodHandles;
|
|||
import java.util.List;
|
||||
import java.util.Map;
|
||||
|
||||
import org.apache.http.client.HttpClient;
|
||||
import org.apache.solr.cloud.overseer.OverseerAction;
|
||||
import org.apache.solr.common.cloud.SolrZkClient;
|
||||
import org.apache.solr.common.cloud.ZkNodeProps;
|
||||
|
@ -28,6 +29,7 @@ import org.apache.solr.common.params.CoreAdminParams;
|
|||
import org.apache.solr.common.params.CoreAdminParams.CoreAdminAction;
|
||||
import org.apache.solr.common.params.ModifiableSolrParams;
|
||||
import org.apache.solr.common.util.Utils;
|
||||
import org.apache.solr.handler.component.HttpShardHandlerFactory;
|
||||
import org.apache.solr.handler.component.ShardHandler;
|
||||
import org.apache.solr.handler.component.ShardHandlerFactory;
|
||||
import org.apache.solr.handler.component.ShardRequest;
|
||||
|
@ -49,10 +51,16 @@ public class OverseerNodePrioritizer {
|
|||
private final String adminPath;
|
||||
private final ShardHandlerFactory shardHandlerFactory;
|
||||
|
||||
public OverseerNodePrioritizer(ZkStateReader zkStateReader, String adminPath, ShardHandlerFactory shardHandlerFactory) {
|
||||
private ZkDistributedQueue stateUpdateQueue;
|
||||
|
||||
private HttpClient httpClient;
|
||||
|
||||
public OverseerNodePrioritizer(ZkStateReader zkStateReader, ZkDistributedQueue stateUpdateQueue, String adminPath, ShardHandlerFactory shardHandlerFactory, HttpClient httpClient) {
|
||||
this.zkStateReader = zkStateReader;
|
||||
this.adminPath = adminPath;
|
||||
this.shardHandlerFactory = shardHandlerFactory;
|
||||
this.stateUpdateQueue = stateUpdateQueue;
|
||||
this.httpClient = httpClient;
|
||||
}
|
||||
|
||||
public synchronized void prioritizeOverseerNodes(String overseerId) throws Exception {
|
||||
|
@ -88,7 +96,7 @@ public class OverseerNodePrioritizer {
|
|||
invokeOverseerOp(electionNodes.get(1), "rejoin");//ask second inline to go behind
|
||||
}
|
||||
//now ask the current leader to QUIT , so that the designate can takeover
|
||||
Overseer.getStateUpdateQueue(zkStateReader.getZkClient()).offer(
|
||||
stateUpdateQueue.offer(
|
||||
Utils.toJSON(new ZkNodeProps(Overseer.QUEUE_OPERATION, OverseerAction.QUIT.toLower(),
|
||||
ID, OverseerTaskProcessor.getLeaderId(zkStateReader.getZkClient()))));
|
||||
|
||||
|
@ -96,7 +104,7 @@ public class OverseerNodePrioritizer {
|
|||
|
||||
private void invokeOverseerOp(String electionNode, String op) {
|
||||
ModifiableSolrParams params = new ModifiableSolrParams();
|
||||
ShardHandler shardHandler = shardHandlerFactory.getShardHandler();
|
||||
ShardHandler shardHandler = ((HttpShardHandlerFactory)shardHandlerFactory).getShardHandler(httpClient);
|
||||
params.set(CoreAdminParams.ACTION, CoreAdminAction.OVERSEEROP.toString());
|
||||
params.set("op", op);
|
||||
params.set("qt", adminPath);
|
||||
|
|
|
@ -19,6 +19,7 @@ package org.apache.solr.cloud;
|
|||
import java.io.Closeable;
|
||||
import java.lang.invoke.MethodHandles;
|
||||
import java.util.ArrayList;
|
||||
import java.util.Collections;
|
||||
import java.util.HashMap;
|
||||
import java.util.HashSet;
|
||||
import java.util.LinkedHashMap;
|
||||
|
@ -36,6 +37,7 @@ import org.apache.commons.io.IOUtils;
|
|||
import org.apache.solr.client.solrj.SolrResponse;
|
||||
import org.apache.solr.cloud.Overseer.LeaderStatus;
|
||||
import org.apache.solr.cloud.OverseerTaskQueue.QueueEvent;
|
||||
import org.apache.solr.common.AlreadyClosedException;
|
||||
import org.apache.solr.common.SolrException;
|
||||
import org.apache.solr.common.cloud.SolrZkClient;
|
||||
import org.apache.solr.common.cloud.ZkNodeProps;
|
||||
|
@ -86,13 +88,13 @@ public class OverseerTaskProcessor implements Runnable, Closeable {
|
|||
// List of completed tasks. This is used to clean up workQueue in zk.
|
||||
final private HashMap<String, QueueEvent> completedTasks;
|
||||
|
||||
private String myId;
|
||||
private volatile String myId;
|
||||
|
||||
private ZkStateReader zkStateReader;
|
||||
private volatile ZkStateReader zkStateReader;
|
||||
|
||||
private boolean isClosed;
|
||||
|
||||
private Stats stats;
|
||||
private volatile Stats stats;
|
||||
|
||||
// Set of tasks that have been picked up for processing but not cleaned up from zk work-queue.
|
||||
// It may contain tasks that have completed execution, have been entered into the completed/failed map in zk but not
|
||||
|
@ -102,7 +104,7 @@ public class OverseerTaskProcessor implements Runnable, Closeable {
|
|||
// be executed because they are blocked or the execution queue is full
|
||||
// This is an optimization to ensure that we do not read the same tasks
|
||||
// again and again from ZK.
|
||||
final private Map<String, QueueEvent> blockedTasks = new LinkedHashMap<>();
|
||||
final private Map<String, QueueEvent> blockedTasks = Collections.synchronizedMap(new LinkedHashMap<>());
|
||||
final private Predicate<String> excludedTasks = new Predicate<String>() {
|
||||
@Override
|
||||
public boolean test(String s) {
|
||||
|
@ -170,6 +172,8 @@ public class OverseerTaskProcessor implements Runnable, Closeable {
|
|||
// We don't need to handle this. This is just a fail-safe which comes in handy in skipping already processed
|
||||
// async calls.
|
||||
SolrException.log(log, "", e);
|
||||
} catch (AlreadyClosedException e) {
|
||||
return;
|
||||
} catch (InterruptedException e) {
|
||||
Thread.currentThread().interrupt();
|
||||
}
|
||||
|
@ -181,6 +185,8 @@ public class OverseerTaskProcessor implements Runnable, Closeable {
|
|||
|
||||
try {
|
||||
prioritizer.prioritizeOverseerNodes(myId);
|
||||
} catch (AlreadyClosedException e) {
|
||||
return;
|
||||
} catch (Exception e) {
|
||||
if (!zkStateReader.getZkClient().isClosed()) {
|
||||
log.error("Unable to prioritize overseer ", e);
|
||||
|
@ -203,14 +209,14 @@ public class OverseerTaskProcessor implements Runnable, Closeable {
|
|||
continue; // not a no, not a yes, try asking again
|
||||
}
|
||||
|
||||
log.debug("Cleaning up work-queue. #Running tasks: {}", runningTasks.size());
|
||||
log.debug("Cleaning up work-queue. #Running tasks: {} #Completed tasks: {}", runningTasksSize(), completedTasks.size());
|
||||
cleanUpWorkQueue();
|
||||
|
||||
printTrackingMaps();
|
||||
|
||||
boolean waited = false;
|
||||
|
||||
while (runningTasks.size() > MAX_PARALLEL_TASKS) {
|
||||
while (runningTasksSize() > MAX_PARALLEL_TASKS) {
|
||||
synchronized (waitLock) {
|
||||
waitLock.wait(100);//wait for 100 ms or till a task is complete
|
||||
}
|
||||
|
@ -229,7 +235,7 @@ public class OverseerTaskProcessor implements Runnable, Closeable {
|
|||
// to clear out at least a few items in the queue before we read more items
|
||||
if (heads.size() < MAX_BLOCKED_TASKS) {
|
||||
//instead of reading MAX_PARALLEL_TASKS items always, we should only fetch as much as we can execute
|
||||
int toFetch = Math.min(MAX_BLOCKED_TASKS - heads.size(), MAX_PARALLEL_TASKS - runningTasks.size());
|
||||
int toFetch = Math.min(MAX_BLOCKED_TASKS - heads.size(), MAX_PARALLEL_TASKS - runningTasksSize());
|
||||
List<QueueEvent> newTasks = workQueue.peekTopN(toFetch, excludedTasks, 2000L);
|
||||
log.debug("Got {} tasks from work-queue : [{}]", newTasks.size(), newTasks);
|
||||
heads.addAll(newTasks);
|
||||
|
@ -251,7 +257,7 @@ public class OverseerTaskProcessor implements Runnable, Closeable {
|
|||
for (QueueEvent head : heads) {
|
||||
if (!tooManyTasks) {
|
||||
synchronized (runningTasks) {
|
||||
tooManyTasks = runningTasks.size() >= MAX_PARALLEL_TASKS;
|
||||
tooManyTasks = runningTasksSize() >= MAX_PARALLEL_TASKS;
|
||||
}
|
||||
}
|
||||
if (tooManyTasks) {
|
||||
|
@ -260,7 +266,9 @@ public class OverseerTaskProcessor implements Runnable, Closeable {
|
|||
blockedTasks.put(head.getId(), head);
|
||||
continue;
|
||||
}
|
||||
if (runningZKTasks.contains(head.getId())) continue;
|
||||
synchronized (runningZKTasks) {
|
||||
if (runningZKTasks.contains(head.getId())) continue;
|
||||
}
|
||||
final ZkNodeProps message = ZkNodeProps.load(head.getBytes());
|
||||
final String asyncId = message.getStr(ASYNC);
|
||||
if (hasLeftOverItems) {
|
||||
|
@ -316,6 +324,8 @@ public class OverseerTaskProcessor implements Runnable, Closeable {
|
|||
} catch (InterruptedException e) {
|
||||
Thread.currentThread().interrupt();
|
||||
return;
|
||||
} catch (AlreadyClosedException e) {
|
||||
|
||||
} catch (Exception e) {
|
||||
SolrException.log(log, "", e);
|
||||
}
|
||||
|
@ -325,11 +335,19 @@ public class OverseerTaskProcessor implements Runnable, Closeable {
|
|||
}
|
||||
}
|
||||
|
||||
private int runningTasksSize() {
|
||||
synchronized (runningTasks) {
|
||||
return runningTasks.size();
|
||||
}
|
||||
}
|
||||
|
||||
private void cleanUpWorkQueue() throws KeeperException, InterruptedException {
|
||||
synchronized (completedTasks) {
|
||||
for (String id : completedTasks.keySet()) {
|
||||
workQueue.remove(completedTasks.get(id));
|
||||
runningZKTasks.remove(id);
|
||||
synchronized (runningTasks) {
|
||||
runningZKTasks.remove(id);
|
||||
}
|
||||
}
|
||||
completedTasks.clear();
|
||||
}
|
||||
|
@ -502,6 +520,8 @@ public class OverseerTaskProcessor implements Runnable, Closeable {
|
|||
log.debug(messageHandler.getName() + ": Message id:" + head.getId() +
|
||||
" complete, response:" + response.getResponse().toString());
|
||||
success = true;
|
||||
} catch (AlreadyClosedException e) {
|
||||
|
||||
} catch (KeeperException e) {
|
||||
SolrException.log(log, "", e);
|
||||
} catch (InterruptedException e) {
|
||||
|
@ -513,7 +533,11 @@ public class OverseerTaskProcessor implements Runnable, Closeable {
|
|||
lock.unlock();
|
||||
if (!success) {
|
||||
// Reset task from tracking data structures so that it can be retried.
|
||||
resetTaskWithException(messageHandler, head.getId(), asyncId, taskKey, message);
|
||||
try {
|
||||
resetTaskWithException(messageHandler, head.getId(), asyncId, taskKey, message);
|
||||
} catch(AlreadyClosedException e) {
|
||||
|
||||
}
|
||||
}
|
||||
synchronized (waitLock){
|
||||
waitLock.notifyAll();
|
||||
|
@ -587,7 +611,7 @@ public class OverseerTaskProcessor implements Runnable, Closeable {
|
|||
log.debug("CompletedTasks: {}", completedTasks.keySet().toString());
|
||||
}
|
||||
synchronized (runningZKTasks) {
|
||||
log.debug("RunningZKTasks: {}", runningZKTasks.toString());
|
||||
log.info("RunningZKTasks: {}", runningZKTasks.toString());
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
|
@ -63,7 +63,6 @@ import org.apache.solr.update.CommitUpdateCommand;
|
|||
import org.apache.solr.update.PeerSyncWithLeader;
|
||||
import org.apache.solr.update.UpdateLog;
|
||||
import org.apache.solr.update.UpdateLog.RecoveryInfo;
|
||||
import org.apache.solr.update.processor.DistributedUpdateProcessor;
|
||||
import org.apache.solr.util.RefCounted;
|
||||
import org.apache.solr.util.SolrPluginUtils;
|
||||
import org.apache.solr.util.plugin.NamedListInitializedPlugin;
|
||||
|
@ -71,18 +70,21 @@ import org.slf4j.Logger;
|
|||
import org.slf4j.LoggerFactory;
|
||||
|
||||
/**
|
||||
* This class may change in future and customisations are not supported
|
||||
* between versions in terms of API or back compat behaviour.
|
||||
* This class may change in future and customisations are not supported between versions in terms of API or back compat
|
||||
* behaviour.
|
||||
*
|
||||
* @lucene.experimental
|
||||
*/
|
||||
public class RecoveryStrategy implements Runnable, Closeable {
|
||||
|
||||
public static class Builder implements NamedListInitializedPlugin {
|
||||
private NamedList args;
|
||||
|
||||
@Override
|
||||
public void init(NamedList args) {
|
||||
this.args = args;
|
||||
}
|
||||
|
||||
// this should only be used from SolrCoreState
|
||||
public RecoveryStrategy create(CoreContainer cc, CoreDescriptor cd,
|
||||
RecoveryStrategy.RecoveryListener recoveryListener) {
|
||||
|
@ -90,6 +92,7 @@ public class RecoveryStrategy implements Runnable, Closeable {
|
|||
SolrPluginUtils.invokeSetters(recoveryStrategy, args);
|
||||
return recoveryStrategy;
|
||||
}
|
||||
|
||||
protected RecoveryStrategy newRecoveryStrategy(CoreContainer cc, CoreDescriptor cd,
|
||||
RecoveryStrategy.RecoveryListener recoveryListener) {
|
||||
return new RecoveryStrategy(cc, cd, recoveryListener);
|
||||
|
@ -98,15 +101,17 @@ public class RecoveryStrategy implements Runnable, Closeable {
|
|||
|
||||
private static final Logger log = LoggerFactory.getLogger(MethodHandles.lookup().lookupClass());
|
||||
|
||||
private int waitForUpdatesWithStaleStatePauseMilliSeconds = Integer.getInteger("solr.cloud.wait-for-updates-with-stale-state-pause", 2500);
|
||||
private int waitForUpdatesWithStaleStatePauseMilliSeconds = Integer
|
||||
.getInteger("solr.cloud.wait-for-updates-with-stale-state-pause", 2500);
|
||||
private int maxRetries = 500;
|
||||
private int startingRecoveryDelayMilliSeconds = 5000;
|
||||
private int startingRecoveryDelayMilliSeconds = 2000;
|
||||
|
||||
public static interface RecoveryListener {
|
||||
public void recovered();
|
||||
|
||||
public void failed();
|
||||
}
|
||||
|
||||
|
||||
private volatile boolean close = false;
|
||||
|
||||
private RecoveryListener recoveryListener;
|
||||
|
@ -121,6 +126,8 @@ public class RecoveryStrategy implements Runnable, Closeable {
|
|||
private volatile HttpUriRequest prevSendPreRecoveryHttpUriRequest;
|
||||
private final Replica.Type replicaType;
|
||||
|
||||
private CoreDescriptor coreDescriptor;
|
||||
|
||||
protected RecoveryStrategy(CoreContainer cc, CoreDescriptor cd, RecoveryListener recoveryListener) {
|
||||
this.cc = cc;
|
||||
this.coreName = cd.getName();
|
||||
|
@ -136,7 +143,8 @@ public class RecoveryStrategy implements Runnable, Closeable {
|
|||
return waitForUpdatesWithStaleStatePauseMilliSeconds;
|
||||
}
|
||||
|
||||
final public void setWaitForUpdatesWithStaleStatePauseMilliSeconds(int waitForUpdatesWithStaleStatePauseMilliSeconds) {
|
||||
final public void setWaitForUpdatesWithStaleStatePauseMilliSeconds(
|
||||
int waitForUpdatesWithStaleStatePauseMilliSeconds) {
|
||||
this.waitForUpdatesWithStaleStatePauseMilliSeconds = waitForUpdatesWithStaleStatePauseMilliSeconds;
|
||||
}
|
||||
|
||||
|
@ -185,10 +193,11 @@ public class RecoveryStrategy implements Runnable, Closeable {
|
|||
recoveryListener.failed();
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* This method may change in future and customisations are not supported
|
||||
* between versions in terms of API or back compat behaviour.
|
||||
* This method may change in future and customisations are not supported between versions in terms of API or back
|
||||
* compat behaviour.
|
||||
*
|
||||
* @lucene.experimental
|
||||
*/
|
||||
protected String getReplicateLeaderUrl(ZkNodeProps leaderprops) {
|
||||
|
@ -199,37 +208,38 @@ public class RecoveryStrategy implements Runnable, Closeable {
|
|||
throws SolrServerException, IOException {
|
||||
|
||||
final String leaderUrl = getReplicateLeaderUrl(leaderprops);
|
||||
|
||||
|
||||
log.info("Attempting to replicate from [{}].", leaderUrl);
|
||||
|
||||
|
||||
// send commit
|
||||
commitOnLeader(leaderUrl);
|
||||
|
||||
|
||||
// use rep handler directly, so we can do this sync rather than async
|
||||
SolrRequestHandler handler = core.getRequestHandler(ReplicationHandler.PATH);
|
||||
ReplicationHandler replicationHandler = (ReplicationHandler) handler;
|
||||
|
||||
|
||||
if (replicationHandler == null) {
|
||||
throw new SolrException(ErrorCode.SERVICE_UNAVAILABLE,
|
||||
"Skipping recovery, no " + ReplicationHandler.PATH + " handler found");
|
||||
}
|
||||
|
||||
|
||||
ModifiableSolrParams solrParams = new ModifiableSolrParams();
|
||||
solrParams.set(ReplicationHandler.MASTER_URL, leaderUrl);
|
||||
solrParams.set(ReplicationHandler.SKIP_COMMIT_ON_MASTER_VERSION_ZERO, replicaType == Replica.Type.TLOG);
|
||||
// always download the tlogs from the leader when running with cdcr enabled. We need to have all the tlogs
|
||||
// to ensure leader failover doesn't cause missing docs on the target
|
||||
if (core.getUpdateHandler().getUpdateLog() != null && core.getUpdateHandler().getUpdateLog() instanceof CdcrUpdateLog) {
|
||||
if (core.getUpdateHandler().getUpdateLog() != null
|
||||
&& core.getUpdateHandler().getUpdateLog() instanceof CdcrUpdateLog) {
|
||||
solrParams.set(ReplicationHandler.TLOG_FILES, true);
|
||||
}
|
||||
|
||||
|
||||
if (isClosed()) return; // we check closed on return
|
||||
boolean success = replicationHandler.doFetch(solrParams, false).getSuccessful();
|
||||
|
||||
|
||||
if (!success) {
|
||||
throw new SolrException(ErrorCode.SERVER_ERROR, "Replication for recovery failed.");
|
||||
}
|
||||
|
||||
|
||||
// solrcloud_debug
|
||||
if (log.isDebugEnabled()) {
|
||||
try {
|
||||
|
@ -245,7 +255,8 @@ public class RecoveryStrategy implements Runnable, Closeable {
|
|||
+ " from "
|
||||
+ leaderUrl
|
||||
+ " gen:"
|
||||
+ (core.getDeletionPolicy().getLatestCommit() != null ? "null" : core.getDeletionPolicy().getLatestCommit().getGeneration())
|
||||
+ (core.getDeletionPolicy().getLatestCommit() != null ? "null"
|
||||
: core.getDeletionPolicy().getLatestCommit().getGeneration())
|
||||
+ " data:" + core.getDataDir()
|
||||
+ " index:" + core.getIndexDir()
|
||||
+ " newIndex:" + core.getNewIndexDir()
|
||||
|
@ -265,11 +276,13 @@ public class RecoveryStrategy implements Runnable, Closeable {
|
|||
IOException {
|
||||
try (HttpSolrClient client = new HttpSolrClient.Builder(leaderUrl)
|
||||
.withConnectionTimeout(30000)
|
||||
.withHttpClient(cc.getUpdateShardHandler().getRecoveryOnlyHttpClient())
|
||||
.build()) {
|
||||
UpdateRequest ureq = new UpdateRequest();
|
||||
ureq.setParams(new ModifiableSolrParams());
|
||||
ureq.getParams().set(DistributedUpdateProcessor.COMMIT_END_POINT, true);
|
||||
// ureq.getParams().set(UpdateParams.OPEN_SEARCHER, onlyLeaderIndexes);// Why do we need to open searcher if "onlyLeaderIndexes"?
|
||||
// ureq.getParams().set(DistributedUpdateProcessor.COMMIT_END_POINT, true);
|
||||
// ureq.getParams().set(UpdateParams.OPEN_SEARCHER, onlyLeaderIndexes);// Why do we need to open searcher if
|
||||
// "onlyLeaderIndexes"?
|
||||
ureq.getParams().set(UpdateParams.OPEN_SEARCHER, false);
|
||||
ureq.setAction(AbstractUpdateRequest.ACTION.COMMIT, false, true).process(
|
||||
client);
|
||||
|
@ -304,9 +317,12 @@ public class RecoveryStrategy implements Runnable, Closeable {
|
|||
MDCLoggingContext.clear();
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
final public void doRecovery(SolrCore core) throws Exception {
|
||||
if (core.getCoreDescriptor().getCloudDescriptor().requiresTransactionLog()) {
|
||||
// we can lose our core descriptor, so store it now
|
||||
this.coreDescriptor = core.getCoreDescriptor();
|
||||
|
||||
if (this.coreDescriptor.getCloudDescriptor().requiresTransactionLog()) {
|
||||
doSyncOrReplicateRecovery(core);
|
||||
} else {
|
||||
doReplicateOnlyRecovery(core);
|
||||
|
@ -316,14 +332,17 @@ public class RecoveryStrategy implements Runnable, Closeable {
|
|||
final private void doReplicateOnlyRecovery(SolrCore core) throws InterruptedException {
|
||||
boolean successfulRecovery = false;
|
||||
|
||||
// if (core.getUpdateHandler().getUpdateLog() != null) {
|
||||
// SolrException.log(log, "'replicate-only' recovery strategy should only be used if no update logs are present, but this core has one: "
|
||||
// + core.getUpdateHandler().getUpdateLog());
|
||||
// return;
|
||||
// }
|
||||
while (!successfulRecovery && !Thread.currentThread().isInterrupted() && !isClosed()) { // don't use interruption or it will close channels though
|
||||
// if (core.getUpdateHandler().getUpdateLog() != null) {
|
||||
// SolrException.log(log, "'replicate-only' recovery strategy should only be used if no update logs are present, but
|
||||
// this core has one: "
|
||||
// + core.getUpdateHandler().getUpdateLog());
|
||||
// return;
|
||||
// }
|
||||
while (!successfulRecovery && !Thread.currentThread().isInterrupted() && !isClosed()) { // don't use interruption or
|
||||
// it will close channels
|
||||
// though
|
||||
try {
|
||||
CloudDescriptor cloudDesc = core.getCoreDescriptor().getCloudDescriptor();
|
||||
CloudDescriptor cloudDesc = this.coreDescriptor.getCloudDescriptor();
|
||||
ZkNodeProps leaderprops = zkStateReader.getLeaderRetry(
|
||||
cloudDesc.getCollectionName(), cloudDesc.getShardId());
|
||||
final String leaderBaseUrl = leaderprops.getStr(ZkStateReader.BASE_URL_PROP);
|
||||
|
@ -333,7 +352,8 @@ public class RecoveryStrategy implements Runnable, Closeable {
|
|||
|
||||
String ourUrl = ZkCoreNodeProps.getCoreUrl(baseUrl, coreName);
|
||||
|
||||
boolean isLeader = leaderUrl.equals(ourUrl); //TODO: We can probably delete most of this code if we say this strategy can only be used for pull replicas
|
||||
boolean isLeader = leaderUrl.equals(ourUrl); // TODO: We can probably delete most of this code if we say this
|
||||
// strategy can only be used for pull replicas
|
||||
if (isLeader && !cloudDesc.isLeader()) {
|
||||
throw new SolrException(ErrorCode.SERVER_ERROR, "Cloud state still says we are leader.");
|
||||
}
|
||||
|
@ -342,14 +362,13 @@ public class RecoveryStrategy implements Runnable, Closeable {
|
|||
// we are now the leader - no one else must have been suitable
|
||||
log.warn("We have not yet recovered - but we are now the leader!");
|
||||
log.info("Finished recovery process.");
|
||||
zkController.publish(core.getCoreDescriptor(), Replica.State.ACTIVE);
|
||||
zkController.publish(this.coreDescriptor, Replica.State.ACTIVE);
|
||||
return;
|
||||
}
|
||||
|
||||
|
||||
log.info("Publishing state of core [{}] as recovering, leader is [{}] and I am [{}]", core.getName(), leaderUrl,
|
||||
ourUrl);
|
||||
zkController.publish(core.getCoreDescriptor(), Replica.State.RECOVERING);
|
||||
zkController.publish(this.coreDescriptor, Replica.State.RECOVERING);
|
||||
|
||||
if (isClosed()) {
|
||||
log.info("Recovery for core {} has been closed", core.getName());
|
||||
|
@ -381,7 +400,7 @@ public class RecoveryStrategy implements Runnable, Closeable {
|
|||
zkController.startReplicationFromLeader(coreName, false);
|
||||
log.info("Registering as Active after recovery.");
|
||||
try {
|
||||
zkController.publish(core.getCoreDescriptor(), Replica.State.ACTIVE);
|
||||
zkController.publish(this.coreDescriptor, Replica.State.ACTIVE);
|
||||
} catch (Exception e) {
|
||||
log.error("Could not publish as ACTIVE after succesful recovery", e);
|
||||
successfulRecovery = false;
|
||||
|
@ -411,7 +430,7 @@ public class RecoveryStrategy implements Runnable, Closeable {
|
|||
if (retries >= maxRetries) {
|
||||
SolrException.log(log, "Recovery failed - max retries exceeded (" + retries + ").");
|
||||
try {
|
||||
recoveryFailed(core, zkController, baseUrl, coreZkNodeName, core.getCoreDescriptor());
|
||||
recoveryFailed(core, zkController, baseUrl, coreZkNodeName, this.coreDescriptor);
|
||||
} catch (Exception e) {
|
||||
SolrException.log(log, "Could not publish that recovery failed", e);
|
||||
}
|
||||
|
@ -457,7 +476,7 @@ public class RecoveryStrategy implements Runnable, Closeable {
|
|||
if (ulog == null) {
|
||||
SolrException.log(log, "No UpdateLog found - cannot recover.");
|
||||
recoveryFailed(core, zkController, baseUrl, coreZkNodeName,
|
||||
core.getCoreDescriptor());
|
||||
this.coreDescriptor);
|
||||
return;
|
||||
}
|
||||
|
||||
|
@ -478,20 +497,22 @@ public class RecoveryStrategy implements Runnable, Closeable {
|
|||
try {
|
||||
int oldIdx = 0; // index of the start of the old list in the current list
|
||||
long firstStartingVersion = startingVersions.size() > 0 ? startingVersions.get(0) : 0;
|
||||
|
||||
|
||||
for (; oldIdx < recentVersions.size(); oldIdx++) {
|
||||
if (recentVersions.get(oldIdx) == firstStartingVersion) break;
|
||||
}
|
||||
|
||||
|
||||
if (oldIdx > 0) {
|
||||
log.info("Found new versions added after startup: num=[{}]", oldIdx);
|
||||
log.info("currentVersions size={} range=[{} to {}]", recentVersions.size(), recentVersions.get(0), recentVersions.get(recentVersions.size()-1));
|
||||
log.info("currentVersions size={} range=[{} to {}]", recentVersions.size(), recentVersions.get(0),
|
||||
recentVersions.get(recentVersions.size() - 1));
|
||||
}
|
||||
|
||||
if (startingVersions.isEmpty()) {
|
||||
log.info("startupVersions is empty");
|
||||
} else {
|
||||
log.info("startupVersions size={} range=[{} to {}]", startingVersions.size(), startingVersions.get(0), startingVersions.get(startingVersions.size()-1));
|
||||
log.info("startupVersions size={} range=[{} to {}]", startingVersions.size(), startingVersions.get(0),
|
||||
startingVersions.get(startingVersions.size() - 1));
|
||||
}
|
||||
} catch (Exception e) {
|
||||
SolrException.log(log, "Error getting recent versions.", e);
|
||||
|
@ -501,7 +522,7 @@ public class RecoveryStrategy implements Runnable, Closeable {
|
|||
|
||||
if (recoveringAfterStartup) {
|
||||
// if we're recovering after startup (i.e. we have been down), then we need to know what the last versions were
|
||||
// when we went down. We may have received updates since then.
|
||||
// when we went down. We may have received updates since then.
|
||||
recentVersions = startingVersions;
|
||||
try {
|
||||
if (ulog.existOldBufferLog()) {
|
||||
|
@ -523,10 +544,12 @@ public class RecoveryStrategy implements Runnable, Closeable {
|
|||
|
||||
final String ourUrl = ZkCoreNodeProps.getCoreUrl(baseUrl, coreName);
|
||||
Future<RecoveryInfo> replayFuture = null;
|
||||
while (!successfulRecovery && !Thread.currentThread().isInterrupted() && !isClosed()) { // don't use interruption or it will close channels though
|
||||
while (!successfulRecovery && !Thread.currentThread().isInterrupted() && !isClosed()) { // don't use interruption or
|
||||
// it will close channels
|
||||
// though
|
||||
try {
|
||||
CloudDescriptor cloudDesc = core.getCoreDescriptor().getCloudDescriptor();
|
||||
final Replica leader = pingLeader(ourUrl, core.getCoreDescriptor(), true);
|
||||
CloudDescriptor cloudDesc = this.coreDescriptor.getCloudDescriptor();
|
||||
final Replica leader = pingLeader(ourUrl, this.coreDescriptor, true);
|
||||
if (isClosed()) {
|
||||
log.info("RecoveryStrategy has been closed");
|
||||
break;
|
||||
|
@ -540,7 +563,7 @@ public class RecoveryStrategy implements Runnable, Closeable {
|
|||
// we are now the leader - no one else must have been suitable
|
||||
log.warn("We have not yet recovered - but we are now the leader!");
|
||||
log.info("Finished recovery process.");
|
||||
zkController.publish(core.getCoreDescriptor(), Replica.State.ACTIVE);
|
||||
zkController.publish(this.coreDescriptor, Replica.State.ACTIVE);
|
||||
return;
|
||||
}
|
||||
|
||||
|
@ -548,37 +571,37 @@ public class RecoveryStrategy implements Runnable, Closeable {
|
|||
// recalling buffer updates will drop the old buffer tlog
|
||||
ulog.bufferUpdates();
|
||||
|
||||
log.info("Publishing state of core [{}] as recovering, leader is [{}] and I am [{}]", core.getName(), leader.getCoreUrl(),
|
||||
log.info("Publishing state of core [{}] as recovering, leader is [{}] and I am [{}]", core.getName(),
|
||||
leader.getCoreUrl(),
|
||||
ourUrl);
|
||||
zkController.publish(core.getCoreDescriptor(), Replica.State.RECOVERING);
|
||||
|
||||
|
||||
zkController.publish(this.coreDescriptor, Replica.State.RECOVERING);
|
||||
|
||||
final Slice slice = zkStateReader.getClusterState().getCollection(cloudDesc.getCollectionName())
|
||||
.getSlice(cloudDesc.getShardId());
|
||||
|
||||
|
||||
try {
|
||||
prevSendPreRecoveryHttpUriRequest.abort();
|
||||
} catch (NullPointerException e) {
|
||||
// okay
|
||||
}
|
||||
|
||||
|
||||
if (isClosed()) {
|
||||
log.info("RecoveryStrategy has been closed");
|
||||
break;
|
||||
}
|
||||
|
||||
sendPrepRecoveryCmd(leader.getBaseUrl(), leader.getCoreName(), slice);
|
||||
|
||||
|
||||
if (isClosed()) {
|
||||
log.info("RecoveryStrategy has been closed");
|
||||
break;
|
||||
}
|
||||
|
||||
|
||||
// we wait a bit so that any updates on the leader
|
||||
// that started before they saw recovering state
|
||||
// that started before they saw recovering state
|
||||
// are sure to have finished (see SOLR-7141 for
|
||||
// discussion around current value)
|
||||
//TODO since SOLR-11216, we probably won't need this
|
||||
// TODO since SOLR-11216, we probably won't need this
|
||||
try {
|
||||
Thread.sleep(waitForUpdatesWithStaleStatePauseMilliSeconds);
|
||||
} catch (InterruptedException e) {
|
||||
|
@ -588,7 +611,8 @@ public class RecoveryStrategy implements Runnable, Closeable {
|
|||
// first thing we just try to sync
|
||||
if (firstTime) {
|
||||
firstTime = false; // only try sync the first time through the loop
|
||||
log.info("Attempting to PeerSync from [{}] - recoveringAfterStartup=[{}]", leader.getCoreUrl(), recoveringAfterStartup);
|
||||
log.info("Attempting to PeerSync from [{}] - recoveringAfterStartup=[{}]", leader.getCoreUrl(),
|
||||
recoveringAfterStartup);
|
||||
// System.out.println("Attempting to PeerSync from " + leaderUrl
|
||||
// + " i am:" + zkController.getNodeName());
|
||||
PeerSyncWithLeader peerSyncWithLeader = new PeerSyncWithLeader(core,
|
||||
|
@ -604,7 +628,7 @@ public class RecoveryStrategy implements Runnable, Closeable {
|
|||
|
||||
// solrcloud_debug
|
||||
cloudDebugLog(core, "synced");
|
||||
|
||||
|
||||
log.info("Replaying updates buffered during PeerSync.");
|
||||
replayFuture = replay(core);
|
||||
|
||||
|
@ -620,7 +644,7 @@ public class RecoveryStrategy implements Runnable, Closeable {
|
|||
log.info("RecoveryStrategy has been closed");
|
||||
break;
|
||||
}
|
||||
|
||||
|
||||
log.info("Starting Replication Recovery.");
|
||||
|
||||
try {
|
||||
|
@ -658,12 +682,12 @@ public class RecoveryStrategy implements Runnable, Closeable {
|
|||
if (replicaType == Replica.Type.TLOG) {
|
||||
zkController.startReplicationFromLeader(coreName, true);
|
||||
}
|
||||
zkController.publish(core.getCoreDescriptor(), Replica.State.ACTIVE);
|
||||
zkController.publish(this.coreDescriptor, Replica.State.ACTIVE);
|
||||
} catch (Exception e) {
|
||||
log.error("Could not publish as ACTIVE after succesful recovery", e);
|
||||
successfulRecovery = false;
|
||||
}
|
||||
|
||||
|
||||
if (successfulRecovery) {
|
||||
close = true;
|
||||
recoveryListener.recovered();
|
||||
|
@ -681,14 +705,14 @@ public class RecoveryStrategy implements Runnable, Closeable {
|
|||
log.info("RecoveryStrategy has been closed");
|
||||
break;
|
||||
}
|
||||
|
||||
|
||||
log.error("Recovery failed - trying again... (" + retries + ")");
|
||||
|
||||
|
||||
retries++;
|
||||
if (retries >= maxRetries) {
|
||||
SolrException.log(log, "Recovery failed - max retries exceeded (" + retries + ").");
|
||||
try {
|
||||
recoveryFailed(core, zkController, baseUrl, coreZkNodeName, core.getCoreDescriptor());
|
||||
recoveryFailed(core, zkController, baseUrl, coreZkNodeName, this.coreDescriptor);
|
||||
} catch (Exception e) {
|
||||
SolrException.log(log, "Could not publish that recovery failed", e);
|
||||
}
|
||||
|
@ -699,12 +723,12 @@ public class RecoveryStrategy implements Runnable, Closeable {
|
|||
}
|
||||
|
||||
try {
|
||||
// Wait an exponential interval between retries, start at 5 seconds and work up to a minute.
|
||||
// If we're at attempt >= 4, there's no point computing pow(2, retries) because the result
|
||||
// will always be the minimum of the two (12). Since we sleep at 5 seconds sub-intervals in
|
||||
// order to check if we were closed, 12 is chosen as the maximum loopCount (5s * 12 = 1m).
|
||||
double loopCount = retries < 4 ? Math.min(Math.pow(2, retries), 12) : 12;
|
||||
log.info("Wait [{}] seconds before trying to recover again (attempt={})", loopCount, retries);
|
||||
// Wait an exponential interval between retries, start at 2 seconds and work up to a minute.
|
||||
// Since we sleep at 2 seconds sub-intervals in
|
||||
// order to check if we were closed, 30 is chosen as the maximum loopCount (2s * 30 = 1m).
|
||||
double loopCount = Math.min(Math.pow(2, retries - 1), 30);
|
||||
log.info("Wait [{}] seconds before trying to recover again (attempt={})",
|
||||
loopCount * startingRecoveryDelayMilliSeconds, retries);
|
||||
for (int i = 0; i < loopCount; i++) {
|
||||
if (isClosed()) {
|
||||
log.info("RecoveryStrategy has been closed");
|
||||
|
@ -731,13 +755,15 @@ public class RecoveryStrategy implements Runnable, Closeable {
|
|||
log.info("Finished recovery process, successful=[{}]", Boolean.toString(successfulRecovery));
|
||||
}
|
||||
|
||||
private final Replica pingLeader(String ourUrl, CoreDescriptor coreDesc, boolean mayPutReplicaAsDown) throws Exception {
|
||||
private final Replica pingLeader(String ourUrl, CoreDescriptor coreDesc, boolean mayPutReplicaAsDown)
|
||||
throws Exception {
|
||||
int numTried = 0;
|
||||
while (true) {
|
||||
CloudDescriptor cloudDesc = coreDesc.getCloudDescriptor();
|
||||
DocCollection docCollection = zkStateReader.getClusterState().getCollection(cloudDesc.getCollectionName());
|
||||
if (!isClosed() && mayPutReplicaAsDown && numTried == 1 &&
|
||||
docCollection.getReplica(coreDesc.getCloudDescriptor().getCoreNodeName()).getState() == Replica.State.ACTIVE) {
|
||||
docCollection.getReplica(coreDesc.getCloudDescriptor().getCoreNodeName())
|
||||
.getState() == Replica.State.ACTIVE) {
|
||||
// this operation may take a long time, by putting replica into DOWN state, client won't query this replica
|
||||
zkController.publish(coreDesc, Replica.State.DOWN);
|
||||
}
|
||||
|
@ -763,6 +789,7 @@ public class RecoveryStrategy implements Runnable, Closeable {
|
|||
try (HttpSolrClient httpSolrClient = new HttpSolrClient.Builder(leaderReplica.getCoreUrl())
|
||||
.withSocketTimeout(1000)
|
||||
.withConnectionTimeout(1000)
|
||||
.withHttpClient(cc.getUpdateShardHandler().getRecoveryOnlyHttpClient())
|
||||
.build()) {
|
||||
SolrPingResponse resp = httpSolrClient.ping();
|
||||
return leaderReplica;
|
||||
|
@ -811,13 +838,13 @@ public class RecoveryStrategy implements Runnable, Closeable {
|
|||
|
||||
// the index may ahead of the tlog's caches after recovery, by calling this tlog's caches will be purged
|
||||
core.getUpdateHandler().getUpdateLog().openRealtimeSearcher();
|
||||
|
||||
|
||||
// solrcloud_debug
|
||||
cloudDebugLog(core, "replayed");
|
||||
|
||||
|
||||
return future;
|
||||
}
|
||||
|
||||
|
||||
final private void cloudDebugLog(SolrCore core, String op) {
|
||||
if (!log.isDebugEnabled()) {
|
||||
return;
|
||||
|
@ -838,9 +865,9 @@ public class RecoveryStrategy implements Runnable, Closeable {
|
|||
}
|
||||
|
||||
final public boolean isClosed() {
|
||||
return close;
|
||||
return close || cc.isShutDown();
|
||||
}
|
||||
|
||||
|
||||
final private void sendPrepRecoveryCmd(String leaderBaseUrl, String leaderCoreName, Slice slice)
|
||||
throws SolrServerException, IOException, InterruptedException, ExecutionException {
|
||||
|
||||
|
@ -858,8 +885,9 @@ public class RecoveryStrategy implements Runnable, Closeable {
|
|||
|
||||
int conflictWaitMs = zkController.getLeaderConflictResolveWait();
|
||||
// timeout after 5 seconds more than the max timeout (conflictWait + 3 seconds) on the server side
|
||||
int readTimeout = conflictWaitMs + 8000;
|
||||
try (HttpSolrClient client = new HttpSolrClient.Builder(leaderBaseUrl).build()) {
|
||||
int readTimeout = conflictWaitMs + Integer.parseInt(System.getProperty("prepRecoveryReadTimeoutExtraWait", "8000"));
|
||||
try (HttpSolrClient client = new HttpSolrClient.Builder(leaderBaseUrl)
|
||||
.withHttpClient(cc.getUpdateShardHandler().getRecoveryOnlyHttpClient()).build()) {
|
||||
client.setConnectionTimeout(10000);
|
||||
client.setSoTimeout(readTimeout);
|
||||
HttpUriRequestResponse mrr = client.httpUriRequest(prepCmd);
|
||||
|
|
|
@ -39,11 +39,11 @@ import org.slf4j.LoggerFactory;
|
|||
public class ReplicateFromLeader {
|
||||
private static final Logger log = LoggerFactory.getLogger(MethodHandles.lookup().lookupClass());
|
||||
|
||||
private CoreContainer cc;
|
||||
private String coreName;
|
||||
private final CoreContainer cc;
|
||||
private final String coreName;
|
||||
|
||||
private ReplicationHandler replicationProcess;
|
||||
private long lastVersion = 0;
|
||||
private volatile ReplicationHandler replicationProcess;
|
||||
private volatile long lastVersion = 0;
|
||||
|
||||
public ReplicateFromLeader(CoreContainer cc, String coreName) {
|
||||
this.cc = cc;
|
||||
|
|
|
@ -35,6 +35,7 @@ import org.apache.solr.common.util.NamedList;
|
|||
import org.apache.solr.core.CoreContainer;
|
||||
import org.apache.solr.core.CoreDescriptor;
|
||||
import org.apache.solr.core.SolrCore;
|
||||
import org.apache.solr.handler.component.HttpShardHandlerFactory;
|
||||
import org.apache.solr.handler.component.ShardHandler;
|
||||
import org.apache.solr.handler.component.ShardRequest;
|
||||
import org.apache.solr.handler.component.ShardResponse;
|
||||
|
@ -70,7 +71,7 @@ public class SyncStrategy {
|
|||
public SyncStrategy(CoreContainer cc) {
|
||||
UpdateShardHandler updateShardHandler = cc.getUpdateShardHandler();
|
||||
client = updateShardHandler.getDefaultHttpClient();
|
||||
shardHandler = cc.getShardHandlerFactory().getShardHandler();
|
||||
shardHandler = ((HttpShardHandlerFactory)cc.getShardHandlerFactory()).getShardHandler(cc.getUpdateShardHandler().getDefaultHttpClient());
|
||||
updateExecutor = updateShardHandler.getUpdateExecutor();
|
||||
}
|
||||
|
||||
|
@ -113,17 +114,18 @@ public class SyncStrategy {
|
|||
|
||||
private PeerSync.PeerSyncResult syncReplicas(ZkController zkController, SolrCore core,
|
||||
ZkNodeProps leaderProps, boolean peerSyncOnlyWithActive) {
|
||||
boolean success = false;
|
||||
PeerSync.PeerSyncResult result = null;
|
||||
CloudDescriptor cloudDesc = core.getCoreDescriptor().getCloudDescriptor();
|
||||
String collection = cloudDesc.getCollectionName();
|
||||
String shardId = cloudDesc.getShardId();
|
||||
|
||||
if (isClosed) {
|
||||
log.info("We have been closed, won't sync with replicas");
|
||||
return PeerSync.PeerSyncResult.failure();
|
||||
}
|
||||
|
||||
boolean success = false;
|
||||
PeerSync.PeerSyncResult result = null;
|
||||
assert core != null;
|
||||
assert core.getCoreDescriptor() != null;
|
||||
CloudDescriptor cloudDesc = core.getCoreDescriptor().getCloudDescriptor();
|
||||
String collection = cloudDesc.getCollectionName();
|
||||
String shardId = cloudDesc.getShardId();
|
||||
|
||||
// first sync ourselves - we are the potential leader after all
|
||||
try {
|
||||
result = syncWithReplicas(zkController, core, leaderProps, collection,
|
||||
|
@ -160,6 +162,11 @@ public class SyncStrategy {
|
|||
List<ZkCoreNodeProps> nodes = zkController.getZkStateReader()
|
||||
.getReplicaProps(collection, shardId,core.getCoreDescriptor().getCloudDescriptor().getCoreNodeName());
|
||||
|
||||
if (isClosed) {
|
||||
log.info("We have been closed, won't sync with replicas");
|
||||
return PeerSync.PeerSyncResult.failure();
|
||||
}
|
||||
|
||||
if (nodes == null) {
|
||||
// I have no replicas
|
||||
return PeerSync.PeerSyncResult.success();
|
||||
|
@ -184,6 +191,11 @@ public class SyncStrategy {
|
|||
String shardId, ZkNodeProps leaderProps, CoreDescriptor cd,
|
||||
int nUpdates) {
|
||||
|
||||
if (isClosed) {
|
||||
log.info("We have been closed, won't sync replicas to me.");
|
||||
return;
|
||||
}
|
||||
|
||||
// sync everyone else
|
||||
// TODO: we should do this in parallel at least
|
||||
List<ZkCoreNodeProps> nodes = zkController
|
||||
|
@ -289,6 +301,11 @@ public class SyncStrategy {
|
|||
}
|
||||
@Override
|
||||
public void run() {
|
||||
|
||||
if (isClosed) {
|
||||
log.info("We have been closed, won't request recovery");
|
||||
return;
|
||||
}
|
||||
RequestRecovery recoverRequestCmd = new RequestRecovery();
|
||||
recoverRequestCmd.setAction(CoreAdminAction.REQUESTRECOVERY);
|
||||
recoverRequestCmd.setCoreName(coreName);
|
||||
|
|
|
@ -16,6 +16,7 @@
|
|||
*/
|
||||
package org.apache.solr.cloud;
|
||||
|
||||
import java.io.Closeable;
|
||||
import java.io.File;
|
||||
import java.io.IOException;
|
||||
import java.io.UnsupportedEncodingException;
|
||||
|
@ -46,6 +47,7 @@ import java.util.concurrent.ConcurrentHashMap;
|
|||
import java.util.concurrent.CountDownLatch;
|
||||
import java.util.concurrent.ExecutionException;
|
||||
import java.util.concurrent.ExecutorService;
|
||||
import java.util.concurrent.ForkJoinPool;
|
||||
import java.util.concurrent.Future;
|
||||
import java.util.concurrent.TimeUnit;
|
||||
import java.util.concurrent.TimeoutException;
|
||||
|
@ -62,11 +64,13 @@ import org.apache.solr.client.solrj.request.CoreAdminRequest.WaitForState;
|
|||
import org.apache.solr.client.solrj.cloud.autoscaling.TriggerEventType;
|
||||
import org.apache.solr.cloud.overseer.OverseerAction;
|
||||
import org.apache.solr.cloud.overseer.SliceMutator;
|
||||
import org.apache.solr.common.AlreadyClosedException;
|
||||
import org.apache.solr.common.SolrException;
|
||||
import org.apache.solr.common.SolrException.ErrorCode;
|
||||
import org.apache.solr.common.cloud.BeforeReconnect;
|
||||
import org.apache.solr.common.cloud.ClusterState;
|
||||
import org.apache.solr.common.cloud.CollectionStateWatcher;
|
||||
import org.apache.solr.common.cloud.ConnectionManager;
|
||||
import org.apache.solr.common.cloud.DefaultConnectionStrategy;
|
||||
import org.apache.solr.common.cloud.DefaultZkACLProvider;
|
||||
import org.apache.solr.common.cloud.DefaultZkCredentialsProvider;
|
||||
|
@ -90,6 +94,7 @@ import org.apache.solr.common.params.CollectionParams;
|
|||
import org.apache.solr.common.params.CommonParams;
|
||||
import org.apache.solr.common.params.CoreAdminParams;
|
||||
import org.apache.solr.common.params.SolrParams;
|
||||
import org.apache.solr.common.util.ExecutorUtil;
|
||||
import org.apache.solr.common.util.IOUtils;
|
||||
import org.apache.solr.common.util.ObjectReleaseTracker;
|
||||
import org.apache.solr.common.util.StrUtils;
|
||||
|
@ -102,6 +107,7 @@ import org.apache.solr.core.CoreDescriptor;
|
|||
import org.apache.solr.core.SolrCore;
|
||||
import org.apache.solr.core.SolrCoreInitializationException;
|
||||
import org.apache.solr.handler.admin.ConfigSetsHandlerApi;
|
||||
import org.apache.solr.handler.component.HttpShardHandler;
|
||||
import org.apache.solr.logging.MDCLoggingContext;
|
||||
import org.apache.solr.search.SolrIndexSearcher;
|
||||
import org.apache.solr.servlet.SolrDispatchFilter;
|
||||
|
@ -137,7 +143,7 @@ import static org.apache.solr.common.cloud.ZkStateReader.SHARD_ID_PROP;
|
|||
* <p>
|
||||
* TODO: exceptions during close on attempts to update cloud state
|
||||
*/
|
||||
public class ZkController {
|
||||
public class ZkController implements Closeable {
|
||||
|
||||
private static final Logger log = LoggerFactory.getLogger(MethodHandles.lookup().lookupClass());
|
||||
static final int WAIT_DOWN_STATES_TIMEOUT_SECONDS = 60;
|
||||
|
@ -433,11 +439,14 @@ public class ZkController {
|
|||
closeOutstandingElections(registerOnReconnect);
|
||||
markAllAsNotLeader(registerOnReconnect);
|
||||
}
|
||||
}, zkACLProvider);
|
||||
}, zkACLProvider, new ConnectionManager.IsClosed() {
|
||||
|
||||
@Override
|
||||
public boolean isClosed() {
|
||||
return cc.isShutDown();
|
||||
}});
|
||||
|
||||
|
||||
this.overseerJobQueue = Overseer.getStateUpdateQueue(zkClient);
|
||||
this.overseerCollectionQueue = Overseer.getCollectionQueue(zkClient);
|
||||
this.overseerConfigSetQueue = Overseer.getConfigSetQueue(zkClient);
|
||||
this.overseerRunningMap = Overseer.getRunningMap(zkClient);
|
||||
this.overseerCompletedMap = Overseer.getCompletedMap(zkClient);
|
||||
this.overseerFailureMap = Overseer.getFailureMap(zkClient);
|
||||
|
@ -448,6 +457,10 @@ public class ZkController {
|
|||
});
|
||||
|
||||
init(registerOnReconnect);
|
||||
|
||||
this.overseerJobQueue = overseer.getStateUpdateQueue();
|
||||
this.overseerCollectionQueue = overseer.getCollectionQueue(zkClient);
|
||||
this.overseerConfigSetQueue = overseer.getConfigSetQueue(zkClient);
|
||||
|
||||
assert ObjectReleaseTracker.track(this);
|
||||
}
|
||||
|
@ -554,42 +567,62 @@ public class ZkController {
|
|||
*/
|
||||
public void close() {
|
||||
this.isClosed = true;
|
||||
|
||||
ForkJoinPool customThreadPool = new ForkJoinPool(10);
|
||||
|
||||
customThreadPool.submit(() -> Collections.singleton(overseerElector.getContext()).parallelStream().forEach(c -> {
|
||||
IOUtils.closeQuietly(c);
|
||||
}));
|
||||
|
||||
customThreadPool.submit(() -> Collections.singleton(overseer).parallelStream().forEach(c -> {
|
||||
IOUtils.closeQuietly(c);
|
||||
}));
|
||||
|
||||
synchronized (collectionToTerms) {
|
||||
collectionToTerms.values().forEach(ZkCollectionTerms::close);
|
||||
customThreadPool.submit(() -> collectionToTerms.values().parallelStream().forEach(c -> {
|
||||
c.close();
|
||||
}));
|
||||
}
|
||||
try {
|
||||
for (ElectionContext context : electionContexts.values()) {
|
||||
try {
|
||||
context.close();
|
||||
} catch (Exception e) {
|
||||
log.error("Error closing overseer", e);
|
||||
}
|
||||
}
|
||||
|
||||
customThreadPool.submit(() -> replicateFromLeaders.values().parallelStream().forEach(c -> {
|
||||
c.stopReplication();
|
||||
}));
|
||||
|
||||
customThreadPool.submit(() -> electionContexts.values().parallelStream().forEach(c -> {
|
||||
IOUtils.closeQuietly(c);
|
||||
}));
|
||||
|
||||
} finally {
|
||||
|
||||
customThreadPool.submit(() -> Collections.singleton(cloudSolrClient).parallelStream().forEach(c -> {
|
||||
IOUtils.closeQuietly(c);
|
||||
}));
|
||||
customThreadPool.submit(() -> Collections.singleton(cloudManager).parallelStream().forEach(c -> {
|
||||
IOUtils.closeQuietly(c);
|
||||
}));
|
||||
|
||||
try {
|
||||
IOUtils.closeQuietly(overseerElector.getContext());
|
||||
IOUtils.closeQuietly(overseer);
|
||||
} finally {
|
||||
if (cloudSolrClient != null) {
|
||||
IOUtils.closeQuietly(cloudSolrClient);
|
||||
}
|
||||
if (cloudManager != null) {
|
||||
IOUtils.closeQuietly(cloudManager);
|
||||
}
|
||||
try {
|
||||
try {
|
||||
zkStateReader.close();
|
||||
} catch (Exception e) {
|
||||
log.error("Error closing zkStateReader", e);
|
||||
}
|
||||
} finally {
|
||||
try {
|
||||
zkClient.close();
|
||||
} catch (Exception e) {
|
||||
log.error("Error closing zkClient", e);
|
||||
}
|
||||
zkStateReader.close();
|
||||
} catch (Exception e) {
|
||||
log.error("Error closing zkStateReader", e);
|
||||
}
|
||||
} finally {
|
||||
try {
|
||||
zkClient.close();
|
||||
} catch (Exception e) {
|
||||
log.error("Error closing zkClient", e);
|
||||
} finally {
|
||||
|
||||
// just in case the OverseerElectionContext managed to start another Overseer
|
||||
IOUtils.closeQuietly(overseer);
|
||||
|
||||
ExecutorUtil.shutdownAndAwaitTermination(customThreadPool);
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
}
|
||||
assert ObjectReleaseTracker.release(this);
|
||||
}
|
||||
|
@ -669,9 +702,11 @@ public class ZkController {
|
|||
if (cloudManager != null) {
|
||||
return cloudManager;
|
||||
}
|
||||
cloudSolrClient = new CloudSolrClient.Builder(Collections.singletonList(zkServerAddress), Optional.empty())
|
||||
.withHttpClient(cc.getUpdateShardHandler().getDefaultHttpClient()).build();
|
||||
cloudSolrClient = new CloudSolrClient.Builder(Collections.singletonList(zkServerAddress), Optional.empty()).withSocketTimeout(30000).withConnectionTimeout(15000)
|
||||
.withHttpClient(cc.getUpdateShardHandler().getDefaultHttpClient())
|
||||
.withConnectionTimeout(15000).withSocketTimeout(30000).build();
|
||||
cloudManager = new SolrClientCloudManager(new ZkDistributedQueueFactory(zkClient), cloudSolrClient);
|
||||
cloudManager.getClusterStateProvider().connect();
|
||||
}
|
||||
return cloudManager;
|
||||
}
|
||||
|
@ -764,7 +799,8 @@ public class ZkController {
|
|||
* @throws KeeperException if there is a Zookeeper error
|
||||
* @throws InterruptedException on interrupt
|
||||
*/
|
||||
public static void createClusterZkNodes(SolrZkClient zkClient) throws KeeperException, InterruptedException, IOException {
|
||||
public static void createClusterZkNodes(SolrZkClient zkClient)
|
||||
throws KeeperException, InterruptedException, IOException {
|
||||
ZkCmdExecutor cmdExecutor = new ZkCmdExecutor(zkClient.getZkClientTimeout());
|
||||
cmdExecutor.ensureExists(ZkStateReader.LIVE_NODES_ZKNODE, zkClient);
|
||||
cmdExecutor.ensureExists(ZkStateReader.COLLECTIONS_ZKNODE, zkClient);
|
||||
|
@ -777,7 +813,7 @@ public class ZkController {
|
|||
cmdExecutor.ensureExists(ZkStateReader.CLUSTER_STATE, emptyJson, CreateMode.PERSISTENT, zkClient);
|
||||
cmdExecutor.ensureExists(ZkStateReader.SOLR_SECURITY_CONF_PATH, emptyJson, CreateMode.PERSISTENT, zkClient);
|
||||
cmdExecutor.ensureExists(ZkStateReader.SOLR_AUTOSCALING_CONF_PATH, emptyJson, CreateMode.PERSISTENT, zkClient);
|
||||
bootstrapDefaultConfigSet(zkClient);
|
||||
bootstrapDefaultConfigSet(zkClient);
|
||||
}
|
||||
|
||||
private static void bootstrapDefaultConfigSet(SolrZkClient zkClient) throws KeeperException, InterruptedException, IOException {
|
||||
|
@ -839,7 +875,7 @@ public class ZkController {
|
|||
// start the overseer first as following code may need it's processing
|
||||
if (!zkRunOnly) {
|
||||
overseerElector = new LeaderElector(zkClient);
|
||||
this.overseer = new Overseer(cc.getShardHandlerFactory().getShardHandler(), cc.getUpdateShardHandler(),
|
||||
this.overseer = new Overseer((HttpShardHandler) cc.getShardHandlerFactory().getShardHandler(), cc.getUpdateShardHandler(),
|
||||
CommonParams.CORES_HANDLER_PATH, zkStateReader, this, cloudConfig);
|
||||
ElectionContext context = new OverseerElectionContext(zkClient,
|
||||
overseer, getNodeName());
|
||||
|
@ -911,10 +947,10 @@ public class ZkController {
|
|||
LiveNodesListener listener = (oldNodes, newNodes) -> {
|
||||
oldNodes.removeAll(newNodes);
|
||||
if (oldNodes.isEmpty()) { // only added nodes
|
||||
return;
|
||||
return false;
|
||||
}
|
||||
if (isClosed) {
|
||||
return;
|
||||
return true;
|
||||
}
|
||||
// if this node is in the top three then attempt to create nodeLost message
|
||||
int i = 0;
|
||||
|
@ -923,7 +959,7 @@ public class ZkController {
|
|||
break;
|
||||
}
|
||||
if (i > 2) {
|
||||
return; // this node is not in the top three
|
||||
return false; // this node is not in the top three
|
||||
}
|
||||
i++;
|
||||
}
|
||||
|
@ -948,11 +984,17 @@ public class ZkController {
|
|||
}
|
||||
}
|
||||
}
|
||||
return false;
|
||||
};
|
||||
zkStateReader.registerLiveNodesListener(listener);
|
||||
}
|
||||
|
||||
public void publishAndWaitForDownStates() throws KeeperException,
|
||||
InterruptedException {
|
||||
publishAndWaitForDownStates(WAIT_DOWN_STATES_TIMEOUT_SECONDS);
|
||||
}
|
||||
|
||||
public void publishAndWaitForDownStates(int timeoutSeconds) throws KeeperException,
|
||||
InterruptedException {
|
||||
|
||||
publishNodeAsDown(getNodeName());
|
||||
|
@ -983,7 +1025,7 @@ public class ZkController {
|
|||
});
|
||||
}
|
||||
|
||||
boolean allPublishedDown = latch.await(WAIT_DOWN_STATES_TIMEOUT_SECONDS, TimeUnit.SECONDS);
|
||||
boolean allPublishedDown = latch.await(timeoutSeconds, TimeUnit.SECONDS);
|
||||
if (!allPublishedDown) {
|
||||
log.warn("Timed out waiting to see all nodes published as DOWN in our cluster state.");
|
||||
}
|
||||
|
@ -1051,10 +1093,13 @@ public class ZkController {
|
|||
log.info("Remove node as live in ZooKeeper:" + nodePath);
|
||||
List<Op> ops = new ArrayList<>(2);
|
||||
ops.add(Op.delete(nodePath, -1));
|
||||
if (zkClient.exists(nodeAddedPath, true)) {
|
||||
ops.add(Op.delete(nodeAddedPath, -1));
|
||||
ops.add(Op.delete(nodeAddedPath, -1));
|
||||
|
||||
try {
|
||||
zkClient.multi(ops, true);
|
||||
} catch (NoNodeException e) {
|
||||
|
||||
}
|
||||
zkClient.multi(ops, true);
|
||||
}
|
||||
|
||||
public String getNodeName() {
|
||||
|
@ -1158,6 +1203,10 @@ public class ZkController {
|
|||
// TODO: should this actually be done earlier, before (or as part of)
|
||||
// leader election perhaps?
|
||||
|
||||
if (core == null) {
|
||||
throw new SolrException(ErrorCode.SERVICE_UNAVAILABLE, "SolrCore is no longer available to register");
|
||||
}
|
||||
|
||||
UpdateLog ulog = core.getUpdateHandler().getUpdateLog();
|
||||
boolean isTlogReplicaAndNotLeader = replica.getType() == Replica.Type.TLOG && !isLeader;
|
||||
if (isTlogReplicaAndNotLeader) {
|
||||
|
@ -1270,6 +1319,7 @@ public class ZkController {
|
|||
final long msInSec = 1000L;
|
||||
int maxTries = (int) Math.floor(leaderConflictResolveWait / msInSec);
|
||||
while (!leaderUrl.equals(clusterStateLeaderUrl)) {
|
||||
if (cc.isShutDown()) throw new AlreadyClosedException();
|
||||
if (tries > maxTries) {
|
||||
throw new SolrException(ErrorCode.SERVER_ERROR,
|
||||
"There is conflicting information about the leader of shard: "
|
||||
|
@ -1290,6 +1340,8 @@ public class ZkController {
|
|||
.getCoreUrl();
|
||||
}
|
||||
|
||||
} catch (AlreadyClosedException e) {
|
||||
throw e;
|
||||
} catch (Exception e) {
|
||||
log.error("Error getting leader from zk", e);
|
||||
throw new SolrException(SolrException.ErrorCode.SERVER_ERROR,
|
||||
|
@ -1336,7 +1388,7 @@ public class ZkController {
|
|||
Thread.sleep(1000);
|
||||
}
|
||||
if (cc.isShutDown()) {
|
||||
throw new SolrException(ErrorCode.SERVICE_UNAVAILABLE, "CoreContainer is closed");
|
||||
throw new AlreadyClosedException();
|
||||
}
|
||||
}
|
||||
throw new SolrException(ErrorCode.SERVICE_UNAVAILABLE, "Could not get leader props", exp);
|
||||
|
@ -2392,6 +2444,9 @@ public class ZkController {
|
|||
}
|
||||
|
||||
private boolean fireEventListeners(String zkDir) {
|
||||
if (isClosed || cc.isShutDown()) {
|
||||
return false;
|
||||
}
|
||||
synchronized (confDirectoryListeners) {
|
||||
// if this is not among directories to be watched then don't set the watcher anymore
|
||||
if (!confDirectoryListeners.containsKey(zkDir)) {
|
||||
|
@ -2527,15 +2582,17 @@ public class ZkController {
|
|||
* @param nodeName to operate on
|
||||
*/
|
||||
public void publishNodeAsDown(String nodeName) {
|
||||
log.debug("Publish node={} as DOWN", nodeName);
|
||||
log.info("Publish node={} as DOWN", nodeName);
|
||||
ZkNodeProps m = new ZkNodeProps(Overseer.QUEUE_OPERATION, OverseerAction.DOWNNODE.toLower(),
|
||||
ZkStateReader.NODE_NAME_PROP, nodeName);
|
||||
try {
|
||||
Overseer.getStateUpdateQueue(getZkClient()).offer(Utils.toJSON(m));
|
||||
overseer.getStateUpdateQueue().offer(Utils.toJSON(m));
|
||||
} catch (AlreadyClosedException e) {
|
||||
log.info("Not publishing node as DOWN because a resource required to do so is already closed.");
|
||||
} catch (InterruptedException e) {
|
||||
Thread.interrupted();
|
||||
Thread.currentThread().interrupt();
|
||||
log.debug("Publish node as down was interrupted.");
|
||||
} catch (Exception e) {
|
||||
} catch (KeeperException e) {
|
||||
log.warn("Could not publish node as down: " + e.getMessage());
|
||||
}
|
||||
}
|
||||
|
|
|
@ -39,6 +39,7 @@ import org.apache.solr.common.SolrException;
|
|||
import org.apache.solr.common.SolrException.ErrorCode;
|
||||
import org.apache.solr.common.cloud.SolrZkClient;
|
||||
import org.apache.solr.common.cloud.ZkCmdExecutor;
|
||||
import org.apache.solr.common.cloud.ConnectionManager.IsClosed;
|
||||
import org.apache.solr.common.util.Pair;
|
||||
import org.apache.zookeeper.CreateMode;
|
||||
import org.apache.zookeeper.KeeperException;
|
||||
|
@ -113,11 +114,15 @@ public class ZkDistributedQueue implements DistributedQueue {
|
|||
public ZkDistributedQueue(SolrZkClient zookeeper, String dir, Stats stats) {
|
||||
this(zookeeper, dir, stats, 0);
|
||||
}
|
||||
|
||||
|
||||
public ZkDistributedQueue(SolrZkClient zookeeper, String dir, Stats stats, int maxQueueSize) {
|
||||
this(zookeeper, dir, stats, maxQueueSize, null);
|
||||
}
|
||||
|
||||
public ZkDistributedQueue(SolrZkClient zookeeper, String dir, Stats stats, int maxQueueSize, IsClosed higherLevelIsClosed) {
|
||||
this.dir = dir;
|
||||
|
||||
ZkCmdExecutor cmdExecutor = new ZkCmdExecutor(zookeeper.getZkClientTimeout());
|
||||
ZkCmdExecutor cmdExecutor = new ZkCmdExecutor(zookeeper.getZkClientTimeout(), higherLevelIsClosed);
|
||||
try {
|
||||
cmdExecutor.ensureExists(dir, zookeeper);
|
||||
} catch (KeeperException e) {
|
||||
|
|
|
@ -313,29 +313,24 @@ public class ZkShardTerms implements AutoCloseable{
|
|||
* Create correspond ZK term node
|
||||
*/
|
||||
private void ensureTermNodeExist() {
|
||||
String path = "/collections/"+collection+ "/terms";
|
||||
String path = "/collections/" + collection + "/terms";
|
||||
try {
|
||||
if (!zkClient.exists(path, true)) {
|
||||
try {
|
||||
zkClient.makePath(path, true);
|
||||
} catch (KeeperException.NodeExistsException e) {
|
||||
// it's okay if another beats us creating the node
|
||||
}
|
||||
path += "/" + shard;
|
||||
|
||||
try {
|
||||
Map<String,Long> initialTerms = new HashMap<>();
|
||||
zkClient.makePath(path, Utils.toJSON(initialTerms), CreateMode.PERSISTENT, true);
|
||||
} catch (KeeperException.NodeExistsException e) {
|
||||
// it's okay if another beats us creating the node
|
||||
}
|
||||
path += "/"+shard;
|
||||
if (!zkClient.exists(path, true)) {
|
||||
try {
|
||||
Map<String, Long> initialTerms = new HashMap<>();
|
||||
zkClient.create(path, Utils.toJSON(initialTerms), CreateMode.PERSISTENT, true);
|
||||
} catch (KeeperException.NodeExistsException e) {
|
||||
// it's okay if another beats us creating the node
|
||||
}
|
||||
}
|
||||
} catch (InterruptedException e) {
|
||||
|
||||
} catch (InterruptedException e) {
|
||||
Thread.interrupted();
|
||||
throw new SolrException(SolrException.ErrorCode.SERVER_ERROR, "Error creating shard term node in Zookeeper for collection: " + collection, e);
|
||||
throw new SolrException(SolrException.ErrorCode.SERVER_ERROR,
|
||||
"Error creating shard term node in Zookeeper for collection: " + collection, e);
|
||||
} catch (KeeperException e) {
|
||||
throw new SolrException(SolrException.ErrorCode.SERVER_ERROR, "Error creating shard term node in Zookeeper for collection: " + collection, e);
|
||||
throw new SolrException(SolrException.ErrorCode.SERVER_ERROR,
|
||||
"Error creating shard term node in Zookeeper for collection: " + collection, e);
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
@ -245,7 +245,7 @@ public class AddReplicaCmd implements OverseerCollectionMessageHandler.Cmd {
|
|||
props = props.plus(ZkStateReader.CORE_NODE_NAME_PROP, createReplica.coreNodeName);
|
||||
}
|
||||
try {
|
||||
Overseer.getStateUpdateQueue(zkStateReader.getZkClient()).offer(Utils.toJSON(props));
|
||||
ocmh.overseer.offerStateUpdate(Utils.toJSON(props));
|
||||
} catch (Exception e) {
|
||||
throw new SolrException(SolrException.ErrorCode.SERVER_ERROR, "Exception updating Overseer state queue", e);
|
||||
}
|
||||
|
@ -328,6 +328,7 @@ public class AddReplicaCmd implements OverseerCollectionMessageHandler.Cmd {
|
|||
}
|
||||
}
|
||||
}
|
||||
log.info("Returning CreateReplica command.");
|
||||
return new CreateReplica(collection, shard, node, replicaType, coreName, coreNodeName);
|
||||
}
|
||||
|
||||
|
|
|
@ -115,7 +115,7 @@ public class Assign {
|
|||
} catch (IOException | KeeperException e) {
|
||||
throw new SolrException(SolrException.ErrorCode.SERVER_ERROR, "Error inc and get counter from Zookeeper for collection:"+collection, e);
|
||||
} catch (InterruptedException e) {
|
||||
Thread.interrupted();
|
||||
Thread.currentThread().interrupt();
|
||||
throw new SolrException(SolrException.ErrorCode.SERVER_ERROR, "Error inc and get counter from Zookeeper for collection:" + collection, e);
|
||||
}
|
||||
}
|
||||
|
@ -182,21 +182,34 @@ public class Assign {
|
|||
return String.format(Locale.ROOT, "%s_%s_replica_%s%s", collectionName, shard, type.name().substring(0,1).toLowerCase(Locale.ROOT), replicaNum);
|
||||
}
|
||||
|
||||
private static int defaultCounterValue(DocCollection collection, boolean newCollection) {
|
||||
private static int defaultCounterValue(DocCollection collection, boolean newCollection, String shard) {
|
||||
if (newCollection) return 0;
|
||||
int defaultValue = collection.getReplicas().size();
|
||||
|
||||
int defaultValue;
|
||||
if (collection.getSlice(shard) != null && collection.getSlice(shard).getReplicas().isEmpty()) {
|
||||
return 0;
|
||||
} else {
|
||||
defaultValue = collection.getReplicas().size() * 2;
|
||||
}
|
||||
|
||||
if (collection.getReplicationFactor() != null) {
|
||||
// numReplicas and replicationFactor * numSlices can be not equals,
|
||||
// in case of many addReplicas or deleteReplicas are executed
|
||||
defaultValue = Math.max(defaultValue,
|
||||
collection.getReplicationFactor() * collection.getSlices().size());
|
||||
}
|
||||
return defaultValue * 20;
|
||||
return defaultValue;
|
||||
}
|
||||
|
||||
private static int defaultCounterValue(DocCollection collection, boolean newCollection) {
|
||||
if (newCollection) return 0;
|
||||
int defaultValue = collection.getReplicas().size();
|
||||
return defaultValue;
|
||||
}
|
||||
|
||||
public static String buildSolrCoreName(DistribStateManager stateManager, DocCollection collection, String shard, Replica.Type type, boolean newCollection) {
|
||||
Slice slice = collection.getSlice(shard);
|
||||
int defaultValue = defaultCounterValue(collection, newCollection);
|
||||
int defaultValue = defaultCounterValue(collection, newCollection, shard);
|
||||
int replicaNum = incAndGetId(stateManager, collection.getName(), defaultValue);
|
||||
String coreName = buildSolrCoreName(collection.getName(), shard, type, replicaNum);
|
||||
while (existCoreName(coreName, slice)) {
|
||||
|
|
|
@ -160,7 +160,7 @@ public class BackupCmd implements OverseerCollectionMessageHandler.Cmd {
|
|||
String backupName = request.getStr(NAME);
|
||||
String asyncId = request.getStr(ASYNC);
|
||||
String repoName = request.getStr(CoreAdminParams.BACKUP_REPOSITORY);
|
||||
ShardHandler shardHandler = ocmh.shardHandlerFactory.getShardHandler();
|
||||
ShardHandler shardHandler = ocmh.shardHandlerFactory.getShardHandler(ocmh.overseer.getCoreContainer().getUpdateShardHandler().getDefaultHttpClient());
|
||||
Map<String, String> requestMap = new HashMap<>();
|
||||
|
||||
String commitName = request.getStr(CoreAdminParams.COMMIT_NAME);
|
||||
|
|
|
@ -155,8 +155,8 @@ public class CreateCollectionCmd implements OverseerCollectionMessageHandler.Cmd
|
|||
}
|
||||
|
||||
createCollectionZkNode(stateManager, collectionName, collectionParams);
|
||||
|
||||
Overseer.getStateUpdateQueue(zkStateReader.getZkClient()).offer(Utils.toJSON(message));
|
||||
|
||||
ocmh.overseer.offerStateUpdate(Utils.toJSON(message));
|
||||
|
||||
// wait for a while until we see the collection
|
||||
TimeOut waitUntil = new TimeOut(30, TimeUnit.SECONDS, timeSource);
|
||||
|
@ -195,7 +195,7 @@ public class CreateCollectionCmd implements OverseerCollectionMessageHandler.Cmd
|
|||
log.debug(formatString("Creating SolrCores for new collection {0}, shardNames {1} , message : {2}",
|
||||
collectionName, shardNames, message));
|
||||
Map<String,ShardRequest> coresToCreate = new LinkedHashMap<>();
|
||||
ShardHandler shardHandler = ocmh.shardHandlerFactory.getShardHandler();
|
||||
ShardHandler shardHandler = ocmh.shardHandlerFactory.getShardHandler(ocmh.overseer.getCoreContainer().getUpdateShardHandler().getDefaultHttpClient());
|
||||
for (ReplicaPosition replicaPosition : replicaPositions) {
|
||||
String nodeName = replicaPosition.node;
|
||||
|
||||
|
@ -235,7 +235,7 @@ public class CreateCollectionCmd implements OverseerCollectionMessageHandler.Cmd
|
|||
ZkStateReader.BASE_URL_PROP, baseUrl,
|
||||
ZkStateReader.REPLICA_TYPE, replicaPosition.type.name(),
|
||||
CommonAdminParams.WAIT_FOR_FINAL_STATE, Boolean.toString(waitForFinalState));
|
||||
Overseer.getStateUpdateQueue(zkStateReader.getZkClient()).offer(Utils.toJSON(props));
|
||||
ocmh.overseer.offerStateUpdate(Utils.toJSON(props));
|
||||
}
|
||||
|
||||
// Need to create new params for each request
|
||||
|
@ -308,7 +308,7 @@ public class CreateCollectionCmd implements OverseerCollectionMessageHandler.Cmd
|
|||
Overseer.QUEUE_OPERATION, MODIFYCOLLECTION.toString(),
|
||||
ZkStateReader.COLLECTION_PROP, withCollection,
|
||||
CollectionAdminParams.COLOCATED_WITH, collectionName);
|
||||
Overseer.getStateUpdateQueue(zkStateReader.getZkClient()).offer(Utils.toJSON(props));
|
||||
ocmh.overseer.offerStateUpdate(Utils.toJSON(props));
|
||||
try {
|
||||
zkStateReader.waitForState(withCollection, 5, TimeUnit.SECONDS, (liveNodes, collectionState) -> collectionName.equals(collectionState.getStr(COLOCATED_WITH)));
|
||||
} catch (TimeoutException e) {
|
||||
|
|
|
@ -21,7 +21,6 @@ import java.lang.invoke.MethodHandles;
|
|||
import java.util.HashMap;
|
||||
import java.util.Map;
|
||||
|
||||
import org.apache.solr.cloud.Overseer;
|
||||
import org.apache.solr.common.SolrException;
|
||||
import org.apache.solr.common.cloud.ClusterState;
|
||||
import org.apache.solr.common.cloud.DocCollection;
|
||||
|
@ -71,7 +70,7 @@ public class CreateShardCmd implements OverseerCollectionMessageHandler.Cmd {
|
|||
}
|
||||
|
||||
ZkStateReader zkStateReader = ocmh.zkStateReader;
|
||||
Overseer.getStateUpdateQueue(zkStateReader.getZkClient()).offer(Utils.toJSON(message));
|
||||
ocmh.overseer.offerStateUpdate(Utils.toJSON(message));
|
||||
// wait for a while until we see the shard
|
||||
ocmh.waitForNewShard(collectionName, sliceName);
|
||||
String async = message.getStr(ASYNC);
|
||||
|
|
|
@ -84,7 +84,7 @@ public class CreateSnapshotCmd implements OverseerCollectionMessageHandler.Cmd {
|
|||
Map<String, String> requestMap = new HashMap<>();
|
||||
NamedList shardRequestResults = new NamedList();
|
||||
Map<String, Slice> shardByCoreName = new HashMap<>();
|
||||
ShardHandler shardHandler = ocmh.shardHandlerFactory.getShardHandler();
|
||||
ShardHandler shardHandler = ocmh.shardHandlerFactory.getShardHandler(ocmh.overseer.getCoreContainer().getUpdateShardHandler().getDefaultHttpClient());
|
||||
|
||||
for (Slice slice : ocmh.zkStateReader.getClusterState().getCollection(collectionName).getSlices()) {
|
||||
for (Replica replica : slice.getReplicas()) {
|
||||
|
|
|
@ -46,7 +46,6 @@ import org.apache.solr.core.SolrInfoBean;
|
|||
import org.apache.solr.core.snapshots.SolrSnapshotManager;
|
||||
import org.apache.solr.handler.admin.MetricsHistoryHandler;
|
||||
import org.apache.solr.metrics.SolrMetricManager;
|
||||
import org.apache.solr.util.TimeOut;
|
||||
import org.apache.zookeeper.KeeperException;
|
||||
import org.slf4j.Logger;
|
||||
import org.slf4j.LoggerFactory;
|
||||
|
@ -127,24 +126,26 @@ public class DeleteCollectionCmd implements OverseerCollectionMessageHandler.Cmd
|
|||
}
|
||||
|
||||
ZkNodeProps m = new ZkNodeProps(Overseer.QUEUE_OPERATION, DELETE.toLower(), NAME, collection);
|
||||
Overseer.getStateUpdateQueue(zkStateReader.getZkClient()).offer(Utils.toJSON(m));
|
||||
ocmh.overseer.offerStateUpdate(Utils.toJSON(m));
|
||||
|
||||
// wait for a while until we don't see the collection
|
||||
TimeOut timeout = new TimeOut(30, TimeUnit.SECONDS, timeSource);
|
||||
boolean removed = false;
|
||||
while (! timeout.hasTimedOut()) {
|
||||
timeout.sleep(100);
|
||||
removed = !zkStateReader.getClusterState().hasCollection(collection);
|
||||
if (removed) {
|
||||
timeout.sleep(500); // just a bit of time so it's more likely other
|
||||
// readers see on return
|
||||
break;
|
||||
}
|
||||
}
|
||||
if (!removed) {
|
||||
throw new SolrException(SolrException.ErrorCode.SERVER_ERROR,
|
||||
"Could not fully remove collection: " + collection);
|
||||
}
|
||||
zkStateReader.waitForState(collection, 60, TimeUnit.SECONDS, (liveNodes, collectionState) -> collectionState == null);
|
||||
|
||||
// TimeOut timeout = new TimeOut(60, TimeUnit.SECONDS, timeSource);
|
||||
// boolean removed = false;
|
||||
// while (! timeout.hasTimedOut()) {
|
||||
// timeout.sleep(100);
|
||||
// removed = !zkStateReader.getClusterState().hasCollection(collection);
|
||||
// if (removed) {
|
||||
// timeout.sleep(500); // just a bit of time so it's more likely other
|
||||
// // readers see on return
|
||||
// break;
|
||||
// }
|
||||
// }
|
||||
// if (!removed) {
|
||||
// throw new SolrException(SolrException.ErrorCode.SERVER_ERROR,
|
||||
// "Could not fully remove collection: " + collection);
|
||||
// }
|
||||
} finally {
|
||||
|
||||
try {
|
||||
|
|
|
@ -218,7 +218,7 @@ public class DeleteReplicaCmd implements Cmd {
|
|||
" with onlyIfDown='true', but state is '" + replica.getStr(ZkStateReader.STATE_PROP) + "'");
|
||||
}
|
||||
|
||||
ShardHandler shardHandler = ocmh.shardHandlerFactory.getShardHandler();
|
||||
ShardHandler shardHandler = ocmh.shardHandlerFactory.getShardHandler(ocmh.overseer.getCoreContainer().getUpdateShardHandler().getDefaultHttpClient());
|
||||
String core = replica.getStr(ZkStateReader.CORE_NAME_PROP);
|
||||
String asyncId = message.getStr(ASYNC);
|
||||
AtomicReference<Map<String, String>> requestMap = new AtomicReference<>(null);
|
||||
|
@ -246,7 +246,7 @@ public class DeleteReplicaCmd implements Cmd {
|
|||
ocmh.processResponses(results, shardHandler, false, null, asyncId, requestMap.get());
|
||||
|
||||
//check if the core unload removed the corenode zk entry
|
||||
if (ocmh.waitForCoreNodeGone(collectionName, shard, replicaName, 5000)) return Boolean.TRUE;
|
||||
if (ocmh.waitForCoreNodeGone(collectionName, shard, replicaName, 30000)) return Boolean.TRUE;
|
||||
}
|
||||
|
||||
// try and ensure core info is removed from cluster state
|
||||
|
|
|
@ -17,6 +17,13 @@
|
|||
*/
|
||||
package org.apache.solr.cloud.api.collections;
|
||||
|
||||
import static org.apache.solr.common.cloud.ZkStateReader.COLLECTION_PROP;
|
||||
import static org.apache.solr.common.cloud.ZkStateReader.NODE_NAME_PROP;
|
||||
import static org.apache.solr.common.cloud.ZkStateReader.SHARD_ID_PROP;
|
||||
import static org.apache.solr.common.params.CollectionParams.CollectionAction.DELETEREPLICA;
|
||||
import static org.apache.solr.common.params.CollectionParams.CollectionAction.DELETESHARD;
|
||||
import static org.apache.solr.common.params.CommonAdminParams.ASYNC;
|
||||
|
||||
import java.lang.invoke.MethodHandles;
|
||||
import java.util.ArrayList;
|
||||
import java.util.HashMap;
|
||||
|
@ -26,12 +33,10 @@ import java.util.Map;
|
|||
import java.util.concurrent.CountDownLatch;
|
||||
import java.util.concurrent.TimeUnit;
|
||||
|
||||
import org.apache.solr.client.solrj.cloud.DistributedQueue;
|
||||
import org.apache.solr.cloud.Overseer;
|
||||
import org.apache.solr.cloud.overseer.OverseerAction;
|
||||
import org.apache.solr.common.SolrException;
|
||||
import org.apache.solr.common.cloud.ClusterState;
|
||||
import org.apache.solr.common.cloud.DocCollection;
|
||||
import org.apache.solr.common.cloud.Replica;
|
||||
import org.apache.solr.common.cloud.Slice;
|
||||
import org.apache.solr.common.cloud.ZkNodeProps;
|
||||
|
@ -41,18 +46,10 @@ import org.apache.solr.common.util.NamedList;
|
|||
import org.apache.solr.common.util.SimpleOrderedMap;
|
||||
import org.apache.solr.common.util.TimeSource;
|
||||
import org.apache.solr.common.util.Utils;
|
||||
import org.apache.solr.util.TimeOut;
|
||||
import org.apache.zookeeper.KeeperException;
|
||||
import org.slf4j.Logger;
|
||||
import org.slf4j.LoggerFactory;
|
||||
|
||||
import static org.apache.solr.common.cloud.ZkStateReader.COLLECTION_PROP;
|
||||
import static org.apache.solr.common.cloud.ZkStateReader.NODE_NAME_PROP;
|
||||
import static org.apache.solr.common.cloud.ZkStateReader.SHARD_ID_PROP;
|
||||
import static org.apache.solr.common.params.CollectionParams.CollectionAction.DELETEREPLICA;
|
||||
import static org.apache.solr.common.params.CollectionParams.CollectionAction.DELETESHARD;
|
||||
import static org.apache.solr.common.params.CommonAdminParams.ASYNC;
|
||||
|
||||
public class DeleteShardCmd implements OverseerCollectionMessageHandler.Cmd {
|
||||
private static final Logger log = LoggerFactory.getLogger(MethodHandles.lookup().lookupClass());
|
||||
private final OverseerCollectionMessageHandler ocmh;
|
||||
|
@ -85,13 +82,12 @@ public class DeleteShardCmd implements OverseerCollectionMessageHandler.Cmd {
|
|||
if (state == Slice.State.RECOVERY) {
|
||||
// mark the slice as 'construction' and only then try to delete the cores
|
||||
// see SOLR-9455
|
||||
DistributedQueue inQueue = Overseer.getStateUpdateQueue(ocmh.zkStateReader.getZkClient());
|
||||
Map<String, Object> propMap = new HashMap<>();
|
||||
propMap.put(Overseer.QUEUE_OPERATION, OverseerAction.UPDATESHARDSTATE.toLower());
|
||||
propMap.put(sliceId, Slice.State.CONSTRUCTION.toString());
|
||||
propMap.put(ZkStateReader.COLLECTION_PROP, collectionName);
|
||||
ZkNodeProps m = new ZkNodeProps(propMap);
|
||||
inQueue.offer(Utils.toJSON(m));
|
||||
ocmh.overseer.offerStateUpdate(Utils.toJSON(m));
|
||||
}
|
||||
|
||||
String asyncId = message.getStr(ASYNC);
|
||||
|
@ -129,29 +125,14 @@ public class DeleteShardCmd implements OverseerCollectionMessageHandler.Cmd {
|
|||
}
|
||||
}
|
||||
log.debug("Waiting for delete shard action to complete");
|
||||
cleanupLatch.await(5, TimeUnit.MINUTES);
|
||||
cleanupLatch.await(1, TimeUnit.MINUTES);
|
||||
|
||||
ZkNodeProps m = new ZkNodeProps(Overseer.QUEUE_OPERATION, DELETESHARD.toLower(), ZkStateReader.COLLECTION_PROP,
|
||||
collectionName, ZkStateReader.SHARD_ID_PROP, sliceId);
|
||||
ZkStateReader zkStateReader = ocmh.zkStateReader;
|
||||
Overseer.getStateUpdateQueue(zkStateReader.getZkClient()).offer(Utils.toJSON(m));
|
||||
ocmh.overseer.offerStateUpdate(Utils.toJSON(m));
|
||||
|
||||
// wait for a while until we don't see the shard
|
||||
TimeOut timeout = new TimeOut(30, TimeUnit.SECONDS, timeSource);
|
||||
boolean removed = false;
|
||||
while (!timeout.hasTimedOut()) {
|
||||
timeout.sleep(100);
|
||||
DocCollection collection = zkStateReader.getClusterState().getCollection(collectionName);
|
||||
removed = collection.getSlice(sliceId) == null;
|
||||
if (removed) {
|
||||
timeout.sleep(100); // just a bit of time so it's more likely other readers see on return
|
||||
break;
|
||||
}
|
||||
}
|
||||
if (!removed) {
|
||||
throw new SolrException(SolrException.ErrorCode.SERVER_ERROR,
|
||||
"Could not fully remove collection: " + collectionName + " shard: " + sliceId);
|
||||
}
|
||||
zkStateReader.waitForState(collectionName, 45, TimeUnit.SECONDS, (l, c) -> c.getSlice(sliceId) == null);
|
||||
|
||||
log.info("Successfully deleted collection: " + collectionName + ", shard: " + sliceId);
|
||||
} catch (SolrException e) {
|
||||
|
|
|
@ -69,7 +69,7 @@ public class DeleteSnapshotCmd implements OverseerCollectionMessageHandler.Cmd {
|
|||
String asyncId = message.getStr(ASYNC);
|
||||
Map<String, String> requestMap = new HashMap<>();
|
||||
NamedList shardRequestResults = new NamedList();
|
||||
ShardHandler shardHandler = ocmh.shardHandlerFactory.getShardHandler();
|
||||
ShardHandler shardHandler = ocmh.shardHandlerFactory.getShardHandler(ocmh.overseer.getCoreContainer().getUpdateShardHandler().getDefaultHttpClient());
|
||||
SolrZkClient zkClient = ocmh.zkStateReader.getZkClient();
|
||||
|
||||
Optional<CollectionSnapshotMetaData> meta = SolrSnapshotManager.getCollectionLevelSnapshot(zkClient, collectionName, commitName);
|
||||
|
|
|
@ -42,6 +42,7 @@ import org.apache.solr.common.params.ModifiableSolrParams;
|
|||
import org.apache.solr.common.util.NamedList;
|
||||
import org.apache.solr.common.util.TimeSource;
|
||||
import org.apache.solr.common.util.Utils;
|
||||
import org.apache.solr.handler.component.HttpShardHandlerFactory;
|
||||
import org.apache.solr.handler.component.ShardHandler;
|
||||
import org.apache.solr.handler.component.ShardHandlerFactory;
|
||||
import org.apache.solr.update.SolrIndexSplitter;
|
||||
|
@ -146,7 +147,7 @@ public class MigrateCmd implements OverseerCollectionMessageHandler.Cmd {
|
|||
DocRouter.Range keyHashRange = sourceRouter.keyHashRange(splitKey);
|
||||
|
||||
ShardHandlerFactory shardHandlerFactory = ocmh.shardHandlerFactory;
|
||||
ShardHandler shardHandler = shardHandlerFactory.getShardHandler();
|
||||
ShardHandler shardHandler = ((HttpShardHandlerFactory)shardHandlerFactory).getShardHandler(ocmh.overseer.getCoreContainer().getUpdateShardHandler().getDefaultHttpClient());
|
||||
|
||||
log.info("Hash range for split.key: {} is: {}", splitKey, keyHashRange);
|
||||
// intersect source range, keyHashRange and target range
|
||||
|
@ -181,7 +182,7 @@ public class MigrateCmd implements OverseerCollectionMessageHandler.Cmd {
|
|||
"targetCollection", targetCollection.getName(),
|
||||
"expireAt", RoutingRule.makeExpiryAt(timeout));
|
||||
log.info("Adding routing rule: " + m);
|
||||
Overseer.getStateUpdateQueue(zkStateReader.getZkClient()).offer(Utils.toJSON(m));
|
||||
ocmh.overseer.offerStateUpdate(Utils.toJSON(m));
|
||||
|
||||
// wait for a while until we see the new rule
|
||||
log.info("Waiting to see routing rule updated in clusterstate");
|
||||
|
|
|
@ -16,6 +16,58 @@
|
|||
*/
|
||||
package org.apache.solr.cloud.api.collections;
|
||||
|
||||
import static org.apache.solr.client.solrj.cloud.autoscaling.Policy.POLICY;
|
||||
import static org.apache.solr.common.cloud.DocCollection.SNITCH;
|
||||
import static org.apache.solr.common.cloud.ZkStateReader.BASE_URL_PROP;
|
||||
import static org.apache.solr.common.cloud.ZkStateReader.COLLECTION_PROP;
|
||||
import static org.apache.solr.common.cloud.ZkStateReader.CORE_NAME_PROP;
|
||||
import static org.apache.solr.common.cloud.ZkStateReader.CORE_NODE_NAME_PROP;
|
||||
import static org.apache.solr.common.cloud.ZkStateReader.ELECTION_NODE_PROP;
|
||||
import static org.apache.solr.common.cloud.ZkStateReader.PROPERTY_PROP;
|
||||
import static org.apache.solr.common.cloud.ZkStateReader.PROPERTY_VALUE_PROP;
|
||||
import static org.apache.solr.common.cloud.ZkStateReader.REJOIN_AT_HEAD_PROP;
|
||||
import static org.apache.solr.common.cloud.ZkStateReader.REPLICA_PROP;
|
||||
import static org.apache.solr.common.cloud.ZkStateReader.SHARD_ID_PROP;
|
||||
import static org.apache.solr.common.params.CollectionAdminParams.COLLECTION;
|
||||
import static org.apache.solr.common.params.CollectionAdminParams.COLOCATED_WITH;
|
||||
import static org.apache.solr.common.params.CollectionAdminParams.WITH_COLLECTION;
|
||||
import static org.apache.solr.common.params.CollectionParams.CollectionAction.ADDREPLICA;
|
||||
import static org.apache.solr.common.params.CollectionParams.CollectionAction.ADDREPLICAPROP;
|
||||
import static org.apache.solr.common.params.CollectionParams.CollectionAction.ADDROLE;
|
||||
import static org.apache.solr.common.params.CollectionParams.CollectionAction.ALIASPROP;
|
||||
import static org.apache.solr.common.params.CollectionParams.CollectionAction.BACKUP;
|
||||
import static org.apache.solr.common.params.CollectionParams.CollectionAction.BALANCESHARDUNIQUE;
|
||||
import static org.apache.solr.common.params.CollectionParams.CollectionAction.CREATE;
|
||||
import static org.apache.solr.common.params.CollectionParams.CollectionAction.CREATEALIAS;
|
||||
import static org.apache.solr.common.params.CollectionParams.CollectionAction.CREATESHARD;
|
||||
import static org.apache.solr.common.params.CollectionParams.CollectionAction.CREATESNAPSHOT;
|
||||
import static org.apache.solr.common.params.CollectionParams.CollectionAction.DELETE;
|
||||
import static org.apache.solr.common.params.CollectionParams.CollectionAction.DELETEALIAS;
|
||||
import static org.apache.solr.common.params.CollectionParams.CollectionAction.DELETENODE;
|
||||
import static org.apache.solr.common.params.CollectionParams.CollectionAction.DELETEREPLICA;
|
||||
import static org.apache.solr.common.params.CollectionParams.CollectionAction.DELETEREPLICAPROP;
|
||||
import static org.apache.solr.common.params.CollectionParams.CollectionAction.DELETESHARD;
|
||||
import static org.apache.solr.common.params.CollectionParams.CollectionAction.DELETESNAPSHOT;
|
||||
import static org.apache.solr.common.params.CollectionParams.CollectionAction.MAINTAINROUTEDALIAS;
|
||||
import static org.apache.solr.common.params.CollectionParams.CollectionAction.MIGRATE;
|
||||
import static org.apache.solr.common.params.CollectionParams.CollectionAction.MIGRATESTATEFORMAT;
|
||||
import static org.apache.solr.common.params.CollectionParams.CollectionAction.MOCK_COLL_TASK;
|
||||
import static org.apache.solr.common.params.CollectionParams.CollectionAction.MOCK_REPLICA_TASK;
|
||||
import static org.apache.solr.common.params.CollectionParams.CollectionAction.MOCK_SHARD_TASK;
|
||||
import static org.apache.solr.common.params.CollectionParams.CollectionAction.MODIFYCOLLECTION;
|
||||
import static org.apache.solr.common.params.CollectionParams.CollectionAction.MOVEREPLICA;
|
||||
import static org.apache.solr.common.params.CollectionParams.CollectionAction.OVERSEERSTATUS;
|
||||
import static org.apache.solr.common.params.CollectionParams.CollectionAction.REBALANCELEADERS;
|
||||
import static org.apache.solr.common.params.CollectionParams.CollectionAction.RELOAD;
|
||||
import static org.apache.solr.common.params.CollectionParams.CollectionAction.REMOVEROLE;
|
||||
import static org.apache.solr.common.params.CollectionParams.CollectionAction.REPLACENODE;
|
||||
import static org.apache.solr.common.params.CollectionParams.CollectionAction.RESTORE;
|
||||
import static org.apache.solr.common.params.CollectionParams.CollectionAction.SPLITSHARD;
|
||||
import static org.apache.solr.common.params.CollectionParams.CollectionAction.UTILIZENODE;
|
||||
import static org.apache.solr.common.params.CommonAdminParams.ASYNC;
|
||||
import static org.apache.solr.common.params.CommonParams.NAME;
|
||||
import static org.apache.solr.common.util.Utils.makeMap;
|
||||
|
||||
import java.io.IOException;
|
||||
import java.lang.invoke.MethodHandles;
|
||||
import java.util.ArrayList;
|
||||
|
@ -30,13 +82,12 @@ import java.util.Set;
|
|||
import java.util.concurrent.ExecutorService;
|
||||
import java.util.concurrent.SynchronousQueue;
|
||||
import java.util.concurrent.TimeUnit;
|
||||
import java.util.concurrent.TimeoutException;
|
||||
|
||||
import com.google.common.collect.ImmutableMap;
|
||||
import org.apache.commons.lang.StringUtils;
|
||||
import org.apache.solr.client.solrj.SolrResponse;
|
||||
import org.apache.solr.client.solrj.SolrServerException;
|
||||
import org.apache.solr.client.solrj.cloud.DistribStateManager;
|
||||
import org.apache.solr.client.solrj.cloud.DistributedQueue;
|
||||
import org.apache.solr.client.solrj.cloud.SolrCloudManager;
|
||||
import org.apache.solr.client.solrj.cloud.autoscaling.AlreadyExistsException;
|
||||
import org.apache.solr.client.solrj.cloud.autoscaling.BadVersionException;
|
||||
|
@ -79,8 +130,8 @@ import org.apache.solr.common.util.StrUtils;
|
|||
import org.apache.solr.common.util.SuppressForbidden;
|
||||
import org.apache.solr.common.util.TimeSource;
|
||||
import org.apache.solr.common.util.Utils;
|
||||
import org.apache.solr.handler.component.HttpShardHandlerFactory;
|
||||
import org.apache.solr.handler.component.ShardHandler;
|
||||
import org.apache.solr.handler.component.ShardHandlerFactory;
|
||||
import org.apache.solr.handler.component.ShardRequest;
|
||||
import org.apache.solr.handler.component.ShardResponse;
|
||||
import org.apache.solr.logging.MDCLoggingContext;
|
||||
|
@ -92,25 +143,7 @@ import org.apache.zookeeper.KeeperException;
|
|||
import org.slf4j.Logger;
|
||||
import org.slf4j.LoggerFactory;
|
||||
|
||||
import static org.apache.solr.client.solrj.cloud.autoscaling.Policy.POLICY;
|
||||
import static org.apache.solr.common.cloud.DocCollection.SNITCH;
|
||||
import static org.apache.solr.common.cloud.ZkStateReader.BASE_URL_PROP;
|
||||
import static org.apache.solr.common.cloud.ZkStateReader.COLLECTION_PROP;
|
||||
import static org.apache.solr.common.cloud.ZkStateReader.CORE_NAME_PROP;
|
||||
import static org.apache.solr.common.cloud.ZkStateReader.CORE_NODE_NAME_PROP;
|
||||
import static org.apache.solr.common.cloud.ZkStateReader.ELECTION_NODE_PROP;
|
||||
import static org.apache.solr.common.cloud.ZkStateReader.PROPERTY_PROP;
|
||||
import static org.apache.solr.common.cloud.ZkStateReader.PROPERTY_VALUE_PROP;
|
||||
import static org.apache.solr.common.cloud.ZkStateReader.REJOIN_AT_HEAD_PROP;
|
||||
import static org.apache.solr.common.cloud.ZkStateReader.REPLICA_PROP;
|
||||
import static org.apache.solr.common.cloud.ZkStateReader.SHARD_ID_PROP;
|
||||
import static org.apache.solr.common.params.CollectionAdminParams.COLLECTION;
|
||||
import static org.apache.solr.common.params.CollectionAdminParams.COLOCATED_WITH;
|
||||
import static org.apache.solr.common.params.CollectionAdminParams.WITH_COLLECTION;
|
||||
import static org.apache.solr.common.params.CollectionParams.CollectionAction.*;
|
||||
import static org.apache.solr.common.params.CommonAdminParams.ASYNC;
|
||||
import static org.apache.solr.common.params.CommonParams.NAME;
|
||||
import static org.apache.solr.common.util.Utils.makeMap;
|
||||
import com.google.common.collect.ImmutableMap;
|
||||
|
||||
/**
|
||||
* A {@link OverseerMessageHandler} that handles Collections API related
|
||||
|
@ -158,7 +191,7 @@ public class OverseerCollectionMessageHandler implements OverseerMessageHandler,
|
|||
private static final Logger log = LoggerFactory.getLogger(MethodHandles.lookup().lookupClass());
|
||||
|
||||
Overseer overseer;
|
||||
ShardHandlerFactory shardHandlerFactory;
|
||||
HttpShardHandlerFactory shardHandlerFactory;
|
||||
String adminPath;
|
||||
ZkStateReader zkStateReader;
|
||||
SolrCloudManager cloudManager;
|
||||
|
@ -191,7 +224,7 @@ public class OverseerCollectionMessageHandler implements OverseerMessageHandler,
|
|||
private volatile boolean isClosed;
|
||||
|
||||
public OverseerCollectionMessageHandler(ZkStateReader zkStateReader, String myId,
|
||||
final ShardHandlerFactory shardHandlerFactory,
|
||||
final HttpShardHandlerFactory shardHandlerFactory,
|
||||
String adminPath,
|
||||
Stats stats,
|
||||
Overseer overseer,
|
||||
|
@ -334,7 +367,7 @@ public class OverseerCollectionMessageHandler implements OverseerMessageHandler,
|
|||
sreq.shards = new String[] {baseUrl};
|
||||
sreq.actualShards = sreq.shards;
|
||||
sreq.params = params;
|
||||
ShardHandler shardHandler = shardHandlerFactory.getShardHandler();
|
||||
ShardHandler shardHandler = shardHandlerFactory.getShardHandler(overseer.getCoreContainer().getUpdateShardHandler().getDefaultHttpClient());
|
||||
shardHandler.submit(sreq, baseUrl, sreq.params);
|
||||
}
|
||||
|
||||
|
@ -343,24 +376,22 @@ public class OverseerCollectionMessageHandler implements OverseerMessageHandler,
|
|||
throws Exception {
|
||||
checkRequired(message, COLLECTION_PROP, SHARD_ID_PROP, REPLICA_PROP, PROPERTY_PROP, PROPERTY_VALUE_PROP);
|
||||
SolrZkClient zkClient = zkStateReader.getZkClient();
|
||||
DistributedQueue inQueue = Overseer.getStateUpdateQueue(zkClient);
|
||||
Map<String, Object> propMap = new HashMap<>();
|
||||
propMap.put(Overseer.QUEUE_OPERATION, ADDREPLICAPROP.toLower());
|
||||
propMap.putAll(message.getProperties());
|
||||
ZkNodeProps m = new ZkNodeProps(propMap);
|
||||
inQueue.offer(Utils.toJSON(m));
|
||||
overseer.offerStateUpdate(Utils.toJSON(m));
|
||||
}
|
||||
|
||||
private void processReplicaDeletePropertyCommand(ClusterState clusterState, ZkNodeProps message, NamedList results)
|
||||
throws Exception {
|
||||
checkRequired(message, COLLECTION_PROP, SHARD_ID_PROP, REPLICA_PROP, PROPERTY_PROP);
|
||||
SolrZkClient zkClient = zkStateReader.getZkClient();
|
||||
DistributedQueue inQueue = Overseer.getStateUpdateQueue(zkClient);
|
||||
Map<String, Object> propMap = new HashMap<>();
|
||||
propMap.put(Overseer.QUEUE_OPERATION, DELETEREPLICAPROP.toLower());
|
||||
propMap.putAll(message.getProperties());
|
||||
ZkNodeProps m = new ZkNodeProps(propMap);
|
||||
inQueue.offer(Utils.toJSON(m));
|
||||
overseer.offerStateUpdate(Utils.toJSON(m));
|
||||
}
|
||||
|
||||
private void balanceProperty(ClusterState clusterState, ZkNodeProps message, NamedList results) throws Exception {
|
||||
|
@ -370,11 +401,10 @@ public class OverseerCollectionMessageHandler implements OverseerMessageHandler,
|
|||
"' parameters are required for the BALANCESHARDUNIQUE operation, no action taken");
|
||||
}
|
||||
SolrZkClient zkClient = zkStateReader.getZkClient();
|
||||
DistributedQueue inQueue = Overseer.getStateUpdateQueue(zkClient);
|
||||
Map<String, Object> propMap = new HashMap<>();
|
||||
propMap.put(Overseer.QUEUE_OPERATION, BALANCESHARDUNIQUE.toLower());
|
||||
propMap.putAll(message.getProperties());
|
||||
inQueue.offer(Utils.toJSON(new ZkNodeProps(propMap)));
|
||||
Map<String, Object> m = new HashMap<>();
|
||||
m.put(Overseer.QUEUE_OPERATION, BALANCESHARDUNIQUE.toLower());
|
||||
m.putAll(message.getProperties());
|
||||
overseer.offerStateUpdate(Utils.toJSON(m));
|
||||
}
|
||||
|
||||
/**
|
||||
|
@ -417,20 +447,21 @@ public class OverseerCollectionMessageHandler implements OverseerMessageHandler,
|
|||
}
|
||||
|
||||
boolean waitForCoreNodeGone(String collectionName, String shard, String replicaName, int timeoutms) throws InterruptedException {
|
||||
TimeOut timeout = new TimeOut(timeoutms, TimeUnit.MILLISECONDS, timeSource);
|
||||
while (! timeout.hasTimedOut()) {
|
||||
timeout.sleep(100);
|
||||
DocCollection docCollection = zkStateReader.getClusterState().getCollection(collectionName);
|
||||
if (docCollection == null) { // someone already deleted the collection
|
||||
return true;
|
||||
}
|
||||
Slice slice = docCollection.getSlice(shard);
|
||||
if(slice == null || slice.getReplica(replicaName) == null) {
|
||||
return true;
|
||||
}
|
||||
try {
|
||||
zkStateReader.waitForState(collectionName, timeoutms, TimeUnit.MILLISECONDS, (n, c) -> {
|
||||
if (c == null)
|
||||
return true;
|
||||
Slice slice = c.getSlice(shard);
|
||||
if(slice == null || slice.getReplica(replicaName) == null) {
|
||||
return true;
|
||||
}
|
||||
return false;
|
||||
});
|
||||
} catch (TimeoutException e) {
|
||||
return false;
|
||||
}
|
||||
// replica still exists after the timeout
|
||||
return false;
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
void deleteCoreNode(String collectionName, String replicaName, Replica replica, String core) throws Exception {
|
||||
|
@ -441,7 +472,7 @@ public class OverseerCollectionMessageHandler implements OverseerMessageHandler,
|
|||
ZkStateReader.COLLECTION_PROP, collectionName,
|
||||
ZkStateReader.CORE_NODE_NAME_PROP, replicaName,
|
||||
ZkStateReader.BASE_URL_PROP, replica.getStr(ZkStateReader.BASE_URL_PROP));
|
||||
Overseer.getStateUpdateQueue(zkStateReader.getZkClient()).offer(Utils.toJSON(m));
|
||||
overseer.offerStateUpdate(Utils.toJSON(m));
|
||||
}
|
||||
|
||||
void checkRequired(ZkNodeProps message, String... props) {
|
||||
|
@ -475,7 +506,7 @@ public class OverseerCollectionMessageHandler implements OverseerMessageHandler,
|
|||
// Actually queue the migration command.
|
||||
firstLoop = false;
|
||||
ZkNodeProps m = new ZkNodeProps(Overseer.QUEUE_OPERATION, MIGRATESTATEFORMAT.toLower(), COLLECTION_PROP, collectionName);
|
||||
Overseer.getStateUpdateQueue(zkStateReader.getZkClient()).offer(Utils.toJSON(m));
|
||||
overseer.offerStateUpdate(Utils.toJSON(m));
|
||||
}
|
||||
timeout.sleep(100);
|
||||
}
|
||||
|
@ -584,7 +615,7 @@ public class OverseerCollectionMessageHandler implements OverseerMessageHandler,
|
|||
|
||||
}
|
||||
|
||||
public static void sendShardRequest(String nodeName, ModifiableSolrParams params, ShardHandler shardHandler,
|
||||
public void sendShardRequest(String nodeName, ModifiableSolrParams params, ShardHandler shardHandler,
|
||||
String asyncId, Map<String, String> requestMap, String adminPath,
|
||||
ZkStateReader zkStateReader) {
|
||||
if (asyncId != null) {
|
||||
|
@ -640,7 +671,7 @@ public class OverseerCollectionMessageHandler implements OverseerMessageHandler,
|
|||
reloadCollection(null, new ZkNodeProps(NAME, collectionName), results);
|
||||
}
|
||||
|
||||
Overseer.getStateUpdateQueue(zkStateReader.getZkClient()).offer(Utils.toJSON(message));
|
||||
overseer.offerStateUpdate(Utils.toJSON(message));
|
||||
|
||||
TimeOut timeout = new TimeOut(30, TimeUnit.SECONDS, timeSource);
|
||||
boolean areChangesVisible = true;
|
||||
|
@ -680,8 +711,9 @@ public class OverseerCollectionMessageHandler implements OverseerMessageHandler,
|
|||
}
|
||||
|
||||
Map<String, Replica> waitToSeeReplicasInState(String collectionName, Collection<String> coreNames) throws InterruptedException {
|
||||
assert coreNames.size() > 0;
|
||||
Map<String, Replica> result = new HashMap<>();
|
||||
TimeOut timeout = new TimeOut(30, TimeUnit.SECONDS, timeSource);
|
||||
TimeOut timeout = new TimeOut(Integer.getInteger("solr.waitToSeeReplicasInStateTimeoutSeconds", 120), TimeUnit.SECONDS, timeSource); // could be a big cluster
|
||||
while (true) {
|
||||
DocCollection coll = zkStateReader.getClusterState().getCollection(collectionName);
|
||||
for (String coreName : coreNames) {
|
||||
|
@ -791,7 +823,7 @@ public class OverseerCollectionMessageHandler implements OverseerMessageHandler,
|
|||
NamedList results, Replica.State stateMatcher, String asyncId, Map<String, String> requestMap, Set<String> okayExceptions) {
|
||||
log.info("Executing Collection Cmd={}, asyncId={}", params, asyncId);
|
||||
String collectionName = message.getStr(NAME);
|
||||
ShardHandler shardHandler = shardHandlerFactory.getShardHandler();
|
||||
ShardHandler shardHandler = shardHandlerFactory.getShardHandler(overseer.getCoreContainer().getUpdateShardHandler().getDefaultHttpClient());
|
||||
|
||||
ClusterState clusterState = zkStateReader.getClusterState();
|
||||
DocCollection coll = clusterState.getCollection(collectionName);
|
||||
|
|
|
@ -18,6 +18,20 @@
|
|||
package org.apache.solr.cloud.api.collections;
|
||||
|
||||
|
||||
import static org.apache.solr.common.cloud.DocCollection.STATE_FORMAT;
|
||||
import static org.apache.solr.common.cloud.ZkStateReader.COLLECTION_PROP;
|
||||
import static org.apache.solr.common.cloud.ZkStateReader.MAX_SHARDS_PER_NODE;
|
||||
import static org.apache.solr.common.cloud.ZkStateReader.NRT_REPLICAS;
|
||||
import static org.apache.solr.common.cloud.ZkStateReader.PULL_REPLICAS;
|
||||
import static org.apache.solr.common.cloud.ZkStateReader.REPLICATION_FACTOR;
|
||||
import static org.apache.solr.common.cloud.ZkStateReader.REPLICA_TYPE;
|
||||
import static org.apache.solr.common.cloud.ZkStateReader.SHARD_ID_PROP;
|
||||
import static org.apache.solr.common.cloud.ZkStateReader.TLOG_REPLICAS;
|
||||
import static org.apache.solr.common.params.CollectionParams.CollectionAction.CREATE;
|
||||
import static org.apache.solr.common.params.CollectionParams.CollectionAction.CREATESHARD;
|
||||
import static org.apache.solr.common.params.CommonAdminParams.ASYNC;
|
||||
import static org.apache.solr.common.params.CommonParams.NAME;
|
||||
|
||||
import java.lang.invoke.MethodHandles;
|
||||
import java.net.URI;
|
||||
import java.util.ArrayList;
|
||||
|
@ -33,7 +47,6 @@ import java.util.Optional;
|
|||
import java.util.Properties;
|
||||
import java.util.Set;
|
||||
|
||||
import org.apache.solr.client.solrj.cloud.DistributedQueue;
|
||||
import org.apache.solr.client.solrj.cloud.autoscaling.PolicyHelper;
|
||||
import org.apache.solr.cloud.Overseer;
|
||||
import org.apache.solr.cloud.overseer.OverseerAction;
|
||||
|
@ -60,20 +73,6 @@ import org.apache.solr.handler.component.ShardHandler;
|
|||
import org.slf4j.Logger;
|
||||
import org.slf4j.LoggerFactory;
|
||||
|
||||
import static org.apache.solr.common.cloud.DocCollection.STATE_FORMAT;
|
||||
import static org.apache.solr.common.cloud.ZkStateReader.COLLECTION_PROP;
|
||||
import static org.apache.solr.common.cloud.ZkStateReader.MAX_SHARDS_PER_NODE;
|
||||
import static org.apache.solr.common.cloud.ZkStateReader.NRT_REPLICAS;
|
||||
import static org.apache.solr.common.cloud.ZkStateReader.PULL_REPLICAS;
|
||||
import static org.apache.solr.common.cloud.ZkStateReader.REPLICATION_FACTOR;
|
||||
import static org.apache.solr.common.cloud.ZkStateReader.REPLICA_TYPE;
|
||||
import static org.apache.solr.common.cloud.ZkStateReader.SHARD_ID_PROP;
|
||||
import static org.apache.solr.common.cloud.ZkStateReader.TLOG_REPLICAS;
|
||||
import static org.apache.solr.common.params.CollectionParams.CollectionAction.CREATE;
|
||||
import static org.apache.solr.common.params.CollectionParams.CollectionAction.CREATESHARD;
|
||||
import static org.apache.solr.common.params.CommonAdminParams.ASYNC;
|
||||
import static org.apache.solr.common.params.CommonParams.NAME;
|
||||
|
||||
public class RestoreCmd implements OverseerCollectionMessageHandler.Cmd {
|
||||
private static final Logger log = LoggerFactory.getLogger(MethodHandles.lookup().lookupClass());
|
||||
|
||||
|
@ -89,7 +88,7 @@ public class RestoreCmd implements OverseerCollectionMessageHandler.Cmd {
|
|||
|
||||
String restoreCollectionName = message.getStr(COLLECTION_PROP);
|
||||
String backupName = message.getStr(NAME); // of backup
|
||||
ShardHandler shardHandler = ocmh.shardHandlerFactory.getShardHandler();
|
||||
ShardHandler shardHandler = ocmh.shardHandlerFactory.getShardHandler(ocmh.overseer.getCoreContainer().getUpdateShardHandler().getDefaultHttpClient());
|
||||
String asyncId = message.getStr(ASYNC);
|
||||
String repo = message.getStr(CoreAdminParams.BACKUP_REPOSITORY);
|
||||
Map<String, String> requestMap = new HashMap<>();
|
||||
|
@ -209,8 +208,6 @@ public class RestoreCmd implements OverseerCollectionMessageHandler.Cmd {
|
|||
|
||||
DocCollection restoreCollection = zkStateReader.getClusterState().getCollection(restoreCollectionName);
|
||||
|
||||
DistributedQueue inQueue = Overseer.getStateUpdateQueue(zkStateReader.getZkClient());
|
||||
|
||||
//Mark all shards in CONSTRUCTION STATE while we restore the data
|
||||
{
|
||||
//TODO might instead createCollection accept an initial state? Is there a race?
|
||||
|
@ -220,7 +217,7 @@ public class RestoreCmd implements OverseerCollectionMessageHandler.Cmd {
|
|||
propMap.put(shard.getName(), Slice.State.CONSTRUCTION.toString());
|
||||
}
|
||||
propMap.put(ZkStateReader.COLLECTION_PROP, restoreCollectionName);
|
||||
inQueue.offer(Utils.toJSON(new ZkNodeProps(propMap)));
|
||||
ocmh.overseer.offerStateUpdate(Utils.toJSON(new ZkNodeProps(propMap)));
|
||||
}
|
||||
|
||||
// TODO how do we leverage the RULE / SNITCH logic in createCollection?
|
||||
|
@ -323,7 +320,7 @@ public class RestoreCmd implements OverseerCollectionMessageHandler.Cmd {
|
|||
for (Slice shard : restoreCollection.getSlices()) {
|
||||
propMap.put(shard.getName(), Slice.State.ACTIVE.toString());
|
||||
}
|
||||
inQueue.offer(Utils.toJSON(new ZkNodeProps(propMap)));
|
||||
ocmh.overseer.offerStateUpdate((Utils.toJSON(new ZkNodeProps(propMap))));
|
||||
}
|
||||
|
||||
if (totalReplicasPerShard > 1) {
|
||||
|
|
|
@ -30,7 +30,6 @@ import java.util.Set;
|
|||
import java.util.concurrent.atomic.AtomicInteger;
|
||||
import java.util.concurrent.atomic.AtomicReference;
|
||||
|
||||
import org.apache.solr.client.solrj.cloud.DistributedQueue;
|
||||
import org.apache.solr.client.solrj.cloud.NodeStateProvider;
|
||||
import org.apache.solr.client.solrj.cloud.SolrCloudManager;
|
||||
import org.apache.solr.client.solrj.cloud.autoscaling.PolicyHelper;
|
||||
|
@ -249,8 +248,8 @@ public class SplitShardCmd implements OverseerCollectionMessageHandler.Cmd {
|
|||
propMap.put(ZkStateReader.SHARD_PARENT_PROP, parentSlice.getName());
|
||||
propMap.put("shard_parent_node", nodeName);
|
||||
propMap.put("shard_parent_zk_session", leaderZnodeStat.getEphemeralOwner());
|
||||
DistributedQueue inQueue = Overseer.getStateUpdateQueue(zkStateReader.getZkClient());
|
||||
inQueue.offer(Utils.toJSON(new ZkNodeProps(propMap)));
|
||||
|
||||
ocmh.overseer.offerStateUpdate(Utils.toJSON(new ZkNodeProps(propMap)));
|
||||
|
||||
// wait until we are able to see the new shard in cluster state
|
||||
ocmh.waitForNewShard(collectionName, subSlice);
|
||||
|
@ -281,7 +280,7 @@ public class SplitShardCmd implements OverseerCollectionMessageHandler.Cmd {
|
|||
ocmh.addReplica(clusterState, new ZkNodeProps(propMap), results, null);
|
||||
}
|
||||
|
||||
ShardHandler shardHandler = ocmh.shardHandlerFactory.getShardHandler();
|
||||
ShardHandler shardHandler = ocmh.shardHandlerFactory.getShardHandler(ocmh.overseer.getCoreContainer().getUpdateShardHandler().getDefaultHttpClient());
|
||||
|
||||
ocmh.processResponses(results, shardHandler, true, "SPLITSHARD failed to create subshard leaders", asyncId, requestMap);
|
||||
|
||||
|
@ -412,7 +411,7 @@ public class SplitShardCmd implements OverseerCollectionMessageHandler.Cmd {
|
|||
ZkStateReader.BASE_URL_PROP, zkStateReader.getBaseUrlForNodeName(subShardNodeName),
|
||||
ZkStateReader.NODE_NAME_PROP, subShardNodeName,
|
||||
CommonAdminParams.WAIT_FOR_FINAL_STATE, Boolean.toString(waitForFinalState));
|
||||
Overseer.getStateUpdateQueue(zkStateReader.getZkClient()).offer(Utils.toJSON(props));
|
||||
ocmh.overseer.offerStateUpdate(Utils.toJSON(props));
|
||||
|
||||
HashMap<String, Object> propMap = new HashMap<>();
|
||||
propMap.put(Overseer.QUEUE_OPERATION, ADDREPLICA.toLower());
|
||||
|
@ -446,7 +445,7 @@ public class SplitShardCmd implements OverseerCollectionMessageHandler.Cmd {
|
|||
leaderZnodeStat = zkStateReader.getZkClient().exists(ZkStateReader.LIVE_NODES_ZKNODE + "/" + parentShardLeader.getNodeName(), null, true);
|
||||
if (leaderZnodeStat == null || ephemeralOwner != leaderZnodeStat.getEphemeralOwner()) {
|
||||
// put sub-shards in recovery_failed state
|
||||
DistributedQueue inQueue = Overseer.getStateUpdateQueue(zkStateReader.getZkClient());
|
||||
|
||||
Map<String, Object> propMap = new HashMap<>();
|
||||
propMap.put(Overseer.QUEUE_OPERATION, OverseerAction.UPDATESHARDSTATE.toLower());
|
||||
for (String subSlice : subSlices) {
|
||||
|
@ -454,7 +453,7 @@ public class SplitShardCmd implements OverseerCollectionMessageHandler.Cmd {
|
|||
}
|
||||
propMap.put(ZkStateReader.COLLECTION_PROP, collectionName);
|
||||
ZkNodeProps m = new ZkNodeProps(propMap);
|
||||
inQueue.offer(Utils.toJSON(m));
|
||||
ocmh.overseer.offerStateUpdate(Utils.toJSON(m));
|
||||
|
||||
if (leaderZnodeStat == null) {
|
||||
// the leader is not live anymore, fail the split!
|
||||
|
@ -473,8 +472,7 @@ public class SplitShardCmd implements OverseerCollectionMessageHandler.Cmd {
|
|||
|
||||
if (repFactor == 1) {
|
||||
// switch sub shard states to 'active'
|
||||
log.debug("Replication factor is 1 so switching shard states");
|
||||
DistributedQueue inQueue = Overseer.getStateUpdateQueue(zkStateReader.getZkClient());
|
||||
log.info("Replication factor is 1 so switching shard states");
|
||||
Map<String, Object> propMap = new HashMap<>();
|
||||
propMap.put(Overseer.QUEUE_OPERATION, OverseerAction.UPDATESHARDSTATE.toLower());
|
||||
propMap.put(slice.get(), Slice.State.INACTIVE.toString());
|
||||
|
@ -483,10 +481,9 @@ public class SplitShardCmd implements OverseerCollectionMessageHandler.Cmd {
|
|||
}
|
||||
propMap.put(ZkStateReader.COLLECTION_PROP, collectionName);
|
||||
ZkNodeProps m = new ZkNodeProps(propMap);
|
||||
inQueue.offer(Utils.toJSON(m));
|
||||
ocmh.overseer.offerStateUpdate(Utils.toJSON(m));
|
||||
} else {
|
||||
log.debug("Requesting shard state be set to 'recovery'");
|
||||
DistributedQueue inQueue = Overseer.getStateUpdateQueue(zkStateReader.getZkClient());
|
||||
log.info("Requesting shard state be set to 'recovery'");
|
||||
Map<String, Object> propMap = new HashMap<>();
|
||||
propMap.put(Overseer.QUEUE_OPERATION, OverseerAction.UPDATESHARDSTATE.toLower());
|
||||
for (String subSlice : subSlices) {
|
||||
|
@ -494,7 +491,7 @@ public class SplitShardCmd implements OverseerCollectionMessageHandler.Cmd {
|
|||
}
|
||||
propMap.put(ZkStateReader.COLLECTION_PROP, collectionName);
|
||||
ZkNodeProps m = new ZkNodeProps(propMap);
|
||||
inQueue.offer(Utils.toJSON(m));
|
||||
ocmh.overseer.offerStateUpdate(Utils.toJSON(m));
|
||||
}
|
||||
|
||||
t = timings.sub("createCoresForReplicas");
|
||||
|
@ -590,7 +587,6 @@ public class SplitShardCmd implements OverseerCollectionMessageHandler.Cmd {
|
|||
|
||||
// set already created sub shards states to CONSTRUCTION - this prevents them
|
||||
// from entering into RECOVERY or ACTIVE (SOLR-9455)
|
||||
DistributedQueue inQueue = Overseer.getStateUpdateQueue(zkStateReader.getZkClient());
|
||||
final Map<String, Object> propMap = new HashMap<>();
|
||||
boolean sendUpdateState = false;
|
||||
propMap.put(Overseer.QUEUE_OPERATION, OverseerAction.UPDATESHARDSTATE.toLower());
|
||||
|
@ -618,7 +614,7 @@ public class SplitShardCmd implements OverseerCollectionMessageHandler.Cmd {
|
|||
if (sendUpdateState) {
|
||||
try {
|
||||
ZkNodeProps m = new ZkNodeProps(propMap);
|
||||
inQueue.offer(Utils.toJSON(m));
|
||||
ocmh.overseer.offerStateUpdate(Utils.toJSON(m));
|
||||
} catch (Exception e) {
|
||||
// don't give up yet - just log the error, we may still be able to clean up
|
||||
log.warn("Cleanup failed after failed split of " + collectionName + "/" + parentShard + ": (slice state changes)", e);
|
||||
|
|
|
@ -32,6 +32,7 @@ import java.util.concurrent.TimeUnit;
|
|||
|
||||
import org.apache.solr.client.solrj.cloud.SolrCloudManager;
|
||||
import org.apache.solr.client.solrj.cloud.autoscaling.TriggerEventType;
|
||||
import org.apache.solr.common.AlreadyClosedException;
|
||||
import org.apache.solr.common.SolrException;
|
||||
import org.apache.solr.common.cloud.ZkStateReader;
|
||||
import org.apache.solr.common.params.CollectionParams;
|
||||
|
@ -62,7 +63,7 @@ public class NodeLostTrigger extends TriggerBase {
|
|||
public void init() throws Exception {
|
||||
super.init();
|
||||
lastLiveNodes = new HashSet<>(cloudManager.getClusterStateProvider().getLiveNodes());
|
||||
log.debug("NodeLostTrigger {} - Initial livenodes: {}", name, lastLiveNodes);
|
||||
log.info("NodeLostTrigger {} - Initial livenodes: {}", name, lastLiveNodes);
|
||||
// pick up lost nodes for which marker paths were created
|
||||
try {
|
||||
List<String> lost = stateManager.listData(ZkStateReader.SOLR_AUTOSCALING_NODE_LOST_PATH);
|
||||
|
@ -147,7 +148,7 @@ public class NodeLostTrigger extends TriggerBase {
|
|||
}
|
||||
|
||||
Set<String> newLiveNodes = new HashSet<>(cloudManager.getClusterStateProvider().getLiveNodes());
|
||||
log.debug("Running NodeLostTrigger: {} with currently live nodes: {}", name, newLiveNodes.size());
|
||||
log.info("Running NodeLostTrigger: {} with currently live nodes: {} and last live nodes: {}", name, newLiveNodes.size(), lastLiveNodes.size());
|
||||
|
||||
// have any nodes that we were tracking been added to the cluster?
|
||||
// if so, remove them from the tracking map
|
||||
|
@ -158,7 +159,7 @@ public class NodeLostTrigger extends TriggerBase {
|
|||
Set<String> copyOfLastLiveNodes = new HashSet<>(lastLiveNodes);
|
||||
copyOfLastLiveNodes.removeAll(newLiveNodes);
|
||||
copyOfLastLiveNodes.forEach(n -> {
|
||||
log.debug("Tracking lost node: {}", n);
|
||||
log.info("Tracking lost node: {}", n);
|
||||
nodeNameVsTimeRemoved.put(n, cloudManager.getTimeSource().getTimeNs());
|
||||
});
|
||||
|
||||
|
@ -170,7 +171,8 @@ public class NodeLostTrigger extends TriggerBase {
|
|||
String nodeName = entry.getKey();
|
||||
Long timeRemoved = entry.getValue();
|
||||
long now = cloudManager.getTimeSource().getTimeNs();
|
||||
if (TimeUnit.SECONDS.convert(now - timeRemoved, TimeUnit.NANOSECONDS) >= getWaitForSecond()) {
|
||||
long te = TimeUnit.SECONDS.convert(now - timeRemoved, TimeUnit.NANOSECONDS);
|
||||
if (te >= getWaitForSecond()) {
|
||||
nodeNames.add(nodeName);
|
||||
times.add(timeRemoved);
|
||||
}
|
||||
|
@ -197,6 +199,8 @@ public class NodeLostTrigger extends TriggerBase {
|
|||
}
|
||||
}
|
||||
lastLiveNodes = new HashSet<>(newLiveNodes);
|
||||
} catch (AlreadyClosedException e) {
|
||||
|
||||
} catch (RuntimeException e) {
|
||||
log.error("Unexpected exception in NodeLostTrigger", e);
|
||||
}
|
||||
|
|
|
@ -29,12 +29,12 @@ import java.util.Set;
|
|||
import java.util.concurrent.locks.Condition;
|
||||
import java.util.concurrent.locks.ReentrantLock;
|
||||
|
||||
import org.apache.lucene.store.AlreadyClosedException;
|
||||
import org.apache.solr.client.solrj.cloud.autoscaling.AutoScalingConfig;
|
||||
import org.apache.solr.client.solrj.cloud.autoscaling.BadVersionException;
|
||||
import org.apache.solr.client.solrj.cloud.DistribStateManager;
|
||||
import org.apache.solr.client.solrj.cloud.SolrCloudManager;
|
||||
import org.apache.solr.client.solrj.cloud.autoscaling.TriggerEventType;
|
||||
import org.apache.solr.common.AlreadyClosedException;
|
||||
import org.apache.solr.common.SolrCloseable;
|
||||
import org.apache.solr.common.cloud.ZkStateReader;
|
||||
import org.apache.solr.common.util.IOUtils;
|
||||
|
@ -135,6 +135,8 @@ public class OverseerTriggerThread implements Runnable, SolrCloseable {
|
|||
log.debug("Adding .auto_add_replicas and .scheduled_maintenance triggers");
|
||||
cloudManager.getDistribStateManager().setData(SOLR_AUTOSCALING_CONF_PATH, Utils.toJSON(updatedConfig), updatedConfig.getZkVersion());
|
||||
break;
|
||||
} catch (AlreadyClosedException e) {
|
||||
break;
|
||||
} catch (BadVersionException bve) {
|
||||
// somebody else has changed the configuration so we must retry
|
||||
} catch (InterruptedException e) {
|
||||
|
@ -178,7 +180,7 @@ public class OverseerTriggerThread implements Runnable, SolrCloseable {
|
|||
|
||||
// must check for close here before we await on the condition otherwise we can only be woken up on interruption
|
||||
if (isClosed) {
|
||||
log.warn("OverseerTriggerThread has been closed, exiting.");
|
||||
log.info("OverseerTriggerThread has been closed, exiting.");
|
||||
break;
|
||||
}
|
||||
|
||||
|
@ -190,7 +192,7 @@ public class OverseerTriggerThread implements Runnable, SolrCloseable {
|
|||
|
||||
// are we closed?
|
||||
if (isClosed) {
|
||||
log.warn("OverseerTriggerThread woken up but we are closed, exiting.");
|
||||
log.info("OverseerTriggerThread woken up but we are closed, exiting.");
|
||||
break;
|
||||
}
|
||||
|
||||
|
@ -211,7 +213,6 @@ public class OverseerTriggerThread implements Runnable, SolrCloseable {
|
|||
} catch (InterruptedException e) {
|
||||
// Restore the interrupted status
|
||||
Thread.currentThread().interrupt();
|
||||
log.warn("Interrupted", e);
|
||||
break;
|
||||
}
|
||||
|
||||
|
@ -240,6 +241,8 @@ public class OverseerTriggerThread implements Runnable, SolrCloseable {
|
|||
}
|
||||
try {
|
||||
scheduledTriggers.add(entry.getValue());
|
||||
} catch (AlreadyClosedException e) {
|
||||
|
||||
} catch (Exception e) {
|
||||
log.warn("Exception initializing trigger " + entry.getKey() + ", configuration ignored", e);
|
||||
}
|
||||
|
@ -275,6 +278,8 @@ public class OverseerTriggerThread implements Runnable, SolrCloseable {
|
|||
});
|
||||
} catch (NoSuchElementException e) {
|
||||
// ignore
|
||||
} catch (AlreadyClosedException e) {
|
||||
|
||||
} catch (Exception e) {
|
||||
log.warn("Error removing old nodeAdded markers", e);
|
||||
}
|
||||
|
|
|
@ -151,8 +151,8 @@ public class ScheduledTrigger extends TriggerBase {
|
|||
public void run() {
|
||||
synchronized (this) {
|
||||
if (isClosed) {
|
||||
log.warn("ScheduledTrigger ran but was already closed");
|
||||
throw new RuntimeException("Trigger has been closed");
|
||||
log.debug("ScheduledTrigger ran but was already closed");
|
||||
return;
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
@ -42,7 +42,6 @@ import java.util.concurrent.locks.ReentrantLock;
|
|||
import java.util.stream.Collectors;
|
||||
|
||||
import org.apache.commons.lang3.exception.ExceptionUtils;
|
||||
import org.apache.lucene.store.AlreadyClosedException;
|
||||
import org.apache.solr.client.solrj.cloud.autoscaling.AutoScalingConfig;
|
||||
import org.apache.solr.client.solrj.cloud.DistribStateManager;
|
||||
import org.apache.solr.client.solrj.cloud.SolrCloudManager;
|
||||
|
@ -51,6 +50,7 @@ import org.apache.solr.client.solrj.cloud.autoscaling.VersionedData;
|
|||
import org.apache.solr.client.solrj.request.CollectionAdminRequest.RequestStatusResponse;
|
||||
import org.apache.solr.client.solrj.response.RequestStatusState;
|
||||
import org.apache.solr.cloud.Stats;
|
||||
import org.apache.solr.common.AlreadyClosedException;
|
||||
import org.apache.solr.common.SolrException;
|
||||
import org.apache.solr.common.cloud.ZkStateReader;
|
||||
import org.apache.solr.common.util.ExecutorUtil;
|
||||
|
@ -205,7 +205,7 @@ public class ScheduledTriggers implements Closeable {
|
|||
try {
|
||||
st = new TriggerWrapper(newTrigger, cloudManager, queueStats);
|
||||
} catch (Exception e) {
|
||||
if (isClosed) {
|
||||
if (isClosed || e instanceof AlreadyClosedException) {
|
||||
throw new AlreadyClosedException("ScheduledTriggers has been closed and cannot be used anymore");
|
||||
}
|
||||
if (cloudManager.isClosed()) {
|
||||
|
@ -559,7 +559,7 @@ public class ScheduledTriggers implements Closeable {
|
|||
// fire a trigger only if an action is not pending
|
||||
// note this is not fool proof e.g. it does not prevent an action being executed while a trigger
|
||||
// is still executing. There is additional protection against that scenario in the event listener.
|
||||
if (!hasPendingActions.get()) {
|
||||
if (!hasPendingActions.get()) {
|
||||
// this synchronization is usually never under contention
|
||||
// but the only reason to have it here is to ensure that when the set-properties API is used
|
||||
// to change the schedule delay, we can safely cancel the old scheduled task
|
||||
|
@ -567,28 +567,37 @@ public class ScheduledTriggers implements Closeable {
|
|||
// execution of the same trigger instance
|
||||
synchronized (TriggerWrapper.this) {
|
||||
// replay accumulated events on first run, if any
|
||||
if (replay) {
|
||||
TriggerEvent event;
|
||||
// peek first without removing - we may crash before calling the listener
|
||||
while ((event = queue.peekEvent()) != null) {
|
||||
// override REPLAYING=true
|
||||
event.getProperties().put(TriggerEvent.REPLAYING, true);
|
||||
if (! trigger.getProcessor().process(event)) {
|
||||
log.error("Failed to re-play event, discarding: " + event);
|
||||
|
||||
try {
|
||||
if (replay) {
|
||||
TriggerEvent event;
|
||||
// peek first without removing - we may crash before calling the listener
|
||||
while ((event = queue.peekEvent()) != null) {
|
||||
// override REPLAYING=true
|
||||
event.getProperties().put(TriggerEvent.REPLAYING, true);
|
||||
if (!trigger.getProcessor().process(event)) {
|
||||
log.error("Failed to re-play event, discarding: " + event);
|
||||
}
|
||||
queue.pollEvent(); // always remove it from queue
|
||||
}
|
||||
queue.pollEvent(); // always remove it from queue
|
||||
// now restore saved state to possibly generate new events from old state on the first run
|
||||
try {
|
||||
trigger.restoreState();
|
||||
} catch (Exception e) {
|
||||
// log but don't throw - see below
|
||||
log.error("Error restoring trigger state " + trigger.getName(), e);
|
||||
}
|
||||
replay = false;
|
||||
}
|
||||
// now restore saved state to possibly generate new events from old state on the first run
|
||||
try {
|
||||
trigger.restoreState();
|
||||
} catch (Exception e) {
|
||||
// log but don't throw - see below
|
||||
log.error("Error restoring trigger state " + trigger.getName(), e);
|
||||
}
|
||||
replay = false;
|
||||
} catch (AlreadyClosedException e) {
|
||||
|
||||
} catch (Exception e) {
|
||||
log.error("Unexpected exception from trigger: " + trigger.getName(), e);
|
||||
}
|
||||
try {
|
||||
trigger.run();
|
||||
} catch (AlreadyClosedException e) {
|
||||
|
||||
} catch (Exception e) {
|
||||
// log but do not propagate exception because an exception thrown from a scheduled operation
|
||||
// will suppress future executions
|
||||
|
|
|
@ -36,6 +36,7 @@ import org.apache.solr.client.solrj.cloud.SolrCloudManager;
|
|||
import org.apache.solr.client.solrj.cloud.autoscaling.TriggerEventType;
|
||||
|
||||
import org.apache.solr.client.solrj.cloud.autoscaling.VersionedData;
|
||||
import org.apache.solr.common.AlreadyClosedException;
|
||||
import org.apache.solr.common.cloud.ZkStateReader;
|
||||
import org.apache.solr.common.util.Utils;
|
||||
import org.apache.solr.core.SolrResourceLoader;
|
||||
|
@ -239,7 +240,9 @@ public abstract class TriggerBase implements AutoScaling.Trigger {
|
|||
stateManager.createData(path, data, CreateMode.PERSISTENT);
|
||||
}
|
||||
lastState = state;
|
||||
} catch (InterruptedException | BadVersionException | AlreadyExistsException | IOException | KeeperException e) {
|
||||
} catch (AlreadyExistsException e) {
|
||||
|
||||
} catch (InterruptedException | BadVersionException | IOException | KeeperException e) {
|
||||
log.warn("Exception updating trigger state '" + path + "'", e);
|
||||
}
|
||||
}
|
||||
|
@ -253,6 +256,8 @@ public abstract class TriggerBase implements AutoScaling.Trigger {
|
|||
VersionedData versionedData = stateManager.getData(path);
|
||||
data = versionedData.getData();
|
||||
}
|
||||
} catch (AlreadyClosedException e) {
|
||||
|
||||
} catch (Exception e) {
|
||||
log.warn("Exception getting trigger state '" + path + "'", e);
|
||||
}
|
||||
|
|
|
@ -24,6 +24,7 @@ import java.util.Map;
|
|||
import org.apache.solr.client.solrj.cloud.DistributedQueue;
|
||||
import org.apache.solr.client.solrj.cloud.SolrCloudManager;
|
||||
import org.apache.solr.cloud.Stats;
|
||||
import org.apache.solr.common.AlreadyClosedException;
|
||||
import org.apache.solr.common.cloud.ZkStateReader;
|
||||
import org.apache.solr.common.util.Utils;
|
||||
import org.apache.solr.common.util.TimeSource;
|
||||
|
@ -78,7 +79,11 @@ public class TriggerEventQueue {
|
|||
continue;
|
||||
}
|
||||
}
|
||||
} catch (Exception e) {
|
||||
}
|
||||
catch (AlreadyClosedException e) {
|
||||
|
||||
}
|
||||
catch (Exception e) {
|
||||
log.warn("Exception peeking queue of trigger " + triggerName, e);
|
||||
}
|
||||
return null;
|
||||
|
|
|
@ -124,10 +124,10 @@ public class CloudConfig {
|
|||
|
||||
public static class CloudConfigBuilder {
|
||||
|
||||
private static final int DEFAULT_ZK_CLIENT_TIMEOUT = 15000;
|
||||
private static final int DEFAULT_ZK_CLIENT_TIMEOUT = 45000;
|
||||
private static final int DEFAULT_LEADER_VOTE_WAIT = 180000; // 3 minutes
|
||||
private static final int DEFAULT_LEADER_CONFLICT_RESOLVE_WAIT = 180000;
|
||||
private static final int DEFAULT_CREATE_COLLECTION_ACTIVE_WAIT = 30; // 30 seconds
|
||||
private static final int DEFAULT_CREATE_COLLECTION_ACTIVE_WAIT = 45; // 45 seconds
|
||||
private static final boolean DEFAULT_CREATE_COLLECTION_CHECK_LEADER_ACTIVE = false;
|
||||
|
||||
private static final int DEFAULT_AUTO_REPLICA_FAILOVER_WAIT_AFTER_EXPIRATION = 120000;
|
||||
|
|
|
@ -16,6 +16,22 @@
|
|||
*/
|
||||
package org.apache.solr.core;
|
||||
|
||||
import static java.util.Objects.requireNonNull;
|
||||
import static org.apache.solr.common.params.CommonParams.AUTHC_PATH;
|
||||
import static org.apache.solr.common.params.CommonParams.AUTHZ_PATH;
|
||||
import static org.apache.solr.common.params.CommonParams.AUTOSCALING_HISTORY_PATH;
|
||||
import static org.apache.solr.common.params.CommonParams.COLLECTIONS_HANDLER_PATH;
|
||||
import static org.apache.solr.common.params.CommonParams.HEALTH_CHECK_HANDLER_PATH;
|
||||
import static org.apache.solr.common.params.CommonParams.CONFIGSETS_HANDLER_PATH;
|
||||
import static org.apache.solr.common.params.CommonParams.CORES_HANDLER_PATH;
|
||||
import static org.apache.solr.common.params.CommonParams.INFO_HANDLER_PATH;
|
||||
import static org.apache.solr.common.params.CommonParams.METRICS_HISTORY_PATH;
|
||||
import static org.apache.solr.common.params.CommonParams.METRICS_PATH;
|
||||
import static org.apache.solr.common.params.CommonParams.ZK_PATH;
|
||||
import static org.apache.solr.common.params.CommonParams.ZK_STATUS_PATH;
|
||||
import static org.apache.solr.core.CorePropertiesLocator.PROPERTIES_FILENAME;
|
||||
import static org.apache.solr.security.AuthenticationPlugin.AUTHENTICATION_PLUGIN_PROP;
|
||||
|
||||
import java.io.IOException;
|
||||
import java.lang.invoke.MethodHandles;
|
||||
import java.nio.file.Path;
|
||||
|
@ -35,10 +51,9 @@ import java.util.Properties;
|
|||
import java.util.concurrent.ConcurrentHashMap;
|
||||
import java.util.concurrent.ExecutionException;
|
||||
import java.util.concurrent.ExecutorService;
|
||||
import java.util.concurrent.ForkJoinPool;
|
||||
import java.util.concurrent.Future;
|
||||
|
||||
import com.google.common.collect.ImmutableMap;
|
||||
import com.google.common.collect.Maps;
|
||||
import org.apache.http.auth.AuthSchemeProvider;
|
||||
import org.apache.http.client.CredentialsProvider;
|
||||
import org.apache.http.config.Lookup;
|
||||
|
@ -58,6 +73,7 @@ import org.apache.solr.cloud.CloudDescriptor;
|
|||
import org.apache.solr.cloud.Overseer;
|
||||
import org.apache.solr.cloud.ZkController;
|
||||
import org.apache.solr.cloud.autoscaling.AutoScalingHandler;
|
||||
import org.apache.solr.common.AlreadyClosedException;
|
||||
import org.apache.solr.common.SolrException;
|
||||
import org.apache.solr.common.SolrException.ErrorCode;
|
||||
import org.apache.solr.common.cloud.DocCollection;
|
||||
|
@ -106,24 +122,13 @@ import org.apache.solr.util.DefaultSolrThreadFactory;
|
|||
import org.apache.solr.util.OrderedExecutor;
|
||||
import org.apache.solr.util.stats.MetricUtils;
|
||||
import org.apache.zookeeper.KeeperException;
|
||||
import org.apache.zookeeper.KeeperException.ConnectionLossException;
|
||||
import org.apache.zookeeper.KeeperException.SessionExpiredException;
|
||||
import org.slf4j.Logger;
|
||||
import org.slf4j.LoggerFactory;
|
||||
|
||||
import static java.util.Objects.requireNonNull;
|
||||
import static org.apache.solr.common.params.CommonParams.AUTHC_PATH;
|
||||
import static org.apache.solr.common.params.CommonParams.AUTHZ_PATH;
|
||||
import static org.apache.solr.common.params.CommonParams.AUTOSCALING_HISTORY_PATH;
|
||||
import static org.apache.solr.common.params.CommonParams.COLLECTIONS_HANDLER_PATH;
|
||||
import static org.apache.solr.common.params.CommonParams.CONFIGSETS_HANDLER_PATH;
|
||||
import static org.apache.solr.common.params.CommonParams.CORES_HANDLER_PATH;
|
||||
import static org.apache.solr.common.params.CommonParams.HEALTH_CHECK_HANDLER_PATH;
|
||||
import static org.apache.solr.common.params.CommonParams.INFO_HANDLER_PATH;
|
||||
import static org.apache.solr.common.params.CommonParams.METRICS_HISTORY_PATH;
|
||||
import static org.apache.solr.common.params.CommonParams.METRICS_PATH;
|
||||
import static org.apache.solr.common.params.CommonParams.ZK_PATH;
|
||||
import static org.apache.solr.common.params.CommonParams.ZK_STATUS_PATH;
|
||||
import static org.apache.solr.core.CorePropertiesLocator.PROPERTIES_FILENAME;
|
||||
import static org.apache.solr.security.AuthenticationPlugin.AUTHENTICATION_PLUGIN_PROP;
|
||||
import com.google.common.collect.ImmutableMap;
|
||||
import com.google.common.collect.Maps;
|
||||
|
||||
/**
|
||||
*
|
||||
|
@ -148,32 +153,32 @@ public class CoreContainer {
|
|||
|
||||
protected final Map<String, CoreLoadFailure> coreInitFailures = new ConcurrentHashMap<>();
|
||||
|
||||
protected CoreAdminHandler coreAdminHandler = null;
|
||||
protected CollectionsHandler collectionsHandler = null;
|
||||
protected HealthCheckHandler healthCheckHandler = null;
|
||||
protected volatile CoreAdminHandler coreAdminHandler = null;
|
||||
protected volatile CollectionsHandler collectionsHandler = null;
|
||||
protected volatile HealthCheckHandler healthCheckHandler = null;
|
||||
|
||||
private InfoHandler infoHandler;
|
||||
protected ConfigSetsHandler configSetsHandler = null;
|
||||
private volatile InfoHandler infoHandler;
|
||||
protected volatile ConfigSetsHandler configSetsHandler = null;
|
||||
|
||||
private PKIAuthenticationPlugin pkiAuthenticationPlugin;
|
||||
private volatile PKIAuthenticationPlugin pkiAuthenticationPlugin;
|
||||
|
||||
protected Properties containerProperties;
|
||||
protected volatile Properties containerProperties;
|
||||
|
||||
private ConfigSetService coreConfigService;
|
||||
private volatile ConfigSetService coreConfigService;
|
||||
|
||||
protected ZkContainer zkSys = new ZkContainer();
|
||||
protected ShardHandlerFactory shardHandlerFactory;
|
||||
protected final ZkContainer zkSys = new ZkContainer();
|
||||
protected volatile ShardHandlerFactory shardHandlerFactory;
|
||||
|
||||
private UpdateShardHandler updateShardHandler;
|
||||
private volatile UpdateShardHandler updateShardHandler;
|
||||
|
||||
private ExecutorService coreContainerWorkExecutor = ExecutorUtil.newMDCAwareCachedThreadPool(
|
||||
private volatile ExecutorService coreContainerWorkExecutor = ExecutorUtil.newMDCAwareCachedThreadPool(
|
||||
new DefaultSolrThreadFactory("coreContainerWorkExecutor") );
|
||||
|
||||
private final OrderedExecutor replayUpdatesExecutor;
|
||||
|
||||
protected LogWatcher logging = null;
|
||||
protected volatile LogWatcher logging = null;
|
||||
|
||||
private CloserThread backgroundCloser = null;
|
||||
private volatile CloserThread backgroundCloser = null;
|
||||
protected final NodeConfig cfg;
|
||||
protected final SolrResourceLoader loader;
|
||||
|
||||
|
@ -181,33 +186,33 @@ public class CoreContainer {
|
|||
|
||||
protected final CoresLocator coresLocator;
|
||||
|
||||
private String hostName;
|
||||
private volatile String hostName;
|
||||
|
||||
private final BlobRepository blobRepository = new BlobRepository(this);
|
||||
|
||||
private PluginBag<SolrRequestHandler> containerHandlers = new PluginBag<>(SolrRequestHandler.class, null);
|
||||
private volatile PluginBag<SolrRequestHandler> containerHandlers = new PluginBag<>(SolrRequestHandler.class, null);
|
||||
|
||||
private boolean asyncSolrCoreLoad;
|
||||
private volatile boolean asyncSolrCoreLoad;
|
||||
|
||||
protected SecurityConfHandler securityConfHandler;
|
||||
protected volatile SecurityConfHandler securityConfHandler;
|
||||
|
||||
private SecurityPluginHolder<AuthorizationPlugin> authorizationPlugin;
|
||||
private volatile SecurityPluginHolder<AuthorizationPlugin> authorizationPlugin;
|
||||
|
||||
private SecurityPluginHolder<AuthenticationPlugin> authenticationPlugin;
|
||||
private volatile SecurityPluginHolder<AuthenticationPlugin> authenticationPlugin;
|
||||
|
||||
private BackupRepositoryFactory backupRepoFactory;
|
||||
private volatile BackupRepositoryFactory backupRepoFactory;
|
||||
|
||||
protected SolrMetricManager metricManager;
|
||||
protected volatile SolrMetricManager metricManager;
|
||||
|
||||
protected String metricTag = Integer.toHexString(hashCode());
|
||||
protected volatile String metricTag = Integer.toHexString(hashCode());
|
||||
|
||||
protected MetricsHandler metricsHandler;
|
||||
|
||||
protected MetricsHistoryHandler metricsHistoryHandler;
|
||||
protected volatile MetricsHistoryHandler metricsHistoryHandler;
|
||||
|
||||
protected MetricsCollectorHandler metricsCollectorHandler;
|
||||
protected volatile MetricsCollectorHandler metricsCollectorHandler;
|
||||
|
||||
protected AutoscalingHistoryHandler autoscalingHistoryHandler;
|
||||
protected volatile AutoscalingHistoryHandler autoscalingHistoryHandler;
|
||||
|
||||
|
||||
// Bits for the state variable.
|
||||
|
@ -216,7 +221,7 @@ public class CoreContainer {
|
|||
public final static long INITIAL_CORE_LOAD_COMPLETE = 0x4L;
|
||||
private volatile long status = 0L;
|
||||
|
||||
protected AutoScalingHandler autoScalingHandler;
|
||||
protected volatile AutoScalingHandler autoScalingHandler;
|
||||
|
||||
private enum CoreInitFailedAction { fromleader, none }
|
||||
|
||||
|
@ -759,6 +764,7 @@ public class CoreContainer {
|
|||
name = getZkController().getNodeName();
|
||||
cloudManager = getZkController().getSolrCloudManager();
|
||||
client = new CloudSolrClient.Builder(Collections.singletonList(getZkController().getZkServerAddress()), Optional.empty())
|
||||
.withSocketTimeout(30000).withConnectionTimeout(15000)
|
||||
.withHttpClient(updateShardHandler.getDefaultHttpClient()).build();
|
||||
} else {
|
||||
name = getNodeConfig().getNodeName();
|
||||
|
@ -818,53 +824,40 @@ public class CoreContainer {
|
|||
return isShutDown;
|
||||
}
|
||||
|
||||
/**
|
||||
* Stops all cores.
|
||||
*/
|
||||
public void shutdown() {
|
||||
log.info("Shutting down CoreContainer instance="
|
||||
+ System.identityHashCode(this));
|
||||
|
||||
ForkJoinPool customThreadPool = new ForkJoinPool(6);
|
||||
|
||||
isShutDown = true;
|
||||
|
||||
ExecutorUtil.shutdownAndAwaitTermination(coreContainerWorkExecutor);
|
||||
replayUpdatesExecutor.shutdownAndAwaitTermination();
|
||||
|
||||
if (metricsHistoryHandler != null) {
|
||||
IOUtils.closeQuietly(metricsHistoryHandler.getSolrClient());
|
||||
metricsHistoryHandler.close();
|
||||
}
|
||||
|
||||
if (metricManager != null) {
|
||||
metricManager.closeReporters(SolrMetricManager.getRegistryName(SolrInfoBean.Group.node));
|
||||
metricManager.closeReporters(SolrMetricManager.getRegistryName(SolrInfoBean.Group.jvm));
|
||||
metricManager.closeReporters(SolrMetricManager.getRegistryName(SolrInfoBean.Group.jetty));
|
||||
|
||||
metricManager.unregisterGauges(SolrMetricManager.getRegistryName(SolrInfoBean.Group.node), metricTag);
|
||||
metricManager.unregisterGauges(SolrMetricManager.getRegistryName(SolrInfoBean.Group.jvm), metricTag);
|
||||
metricManager.unregisterGauges(SolrMetricManager.getRegistryName(SolrInfoBean.Group.jetty), metricTag);
|
||||
}
|
||||
|
||||
if (isZooKeeperAware()) {
|
||||
cancelCoreRecoveries();
|
||||
zkSys.zkController.publishNodeAsDown(zkSys.zkController.getNodeName());
|
||||
try {
|
||||
zkSys.zkController.removeEphemeralLiveNode();
|
||||
} catch (Exception e) {
|
||||
log.warn("Error removing live node. Continuing to close CoreContainer", e);
|
||||
}
|
||||
if (metricManager != null) {
|
||||
metricManager.closeReporters(SolrMetricManager.getRegistryName(SolrInfoBean.Group.cluster));
|
||||
}
|
||||
}
|
||||
|
||||
try {
|
||||
if (coreAdminHandler != null) coreAdminHandler.shutdown();
|
||||
} catch (Exception e) {
|
||||
log.warn("Error shutting down CoreAdminHandler. Continuing to close CoreContainer.", e);
|
||||
}
|
||||
if (isZooKeeperAware()) {
|
||||
cancelCoreRecoveries();
|
||||
|
||||
try {
|
||||
if (isZooKeeperAware()) {
|
||||
cancelCoreRecoveries();
|
||||
try {
|
||||
zkSys.zkController.removeEphemeralLiveNode();
|
||||
} catch (AlreadyClosedException | SessionExpiredException | ConnectionLossException e) {
|
||||
|
||||
} catch (Exception e) {
|
||||
log.warn("Error removing live node. Continuing to close CoreContainer", e);
|
||||
}
|
||||
}
|
||||
|
||||
try {
|
||||
if (zkSys.zkController.getZkClient().getConnectionManager().isConnected()) {
|
||||
log.info("Publish this node as DOWN...");
|
||||
zkSys.zkController.publishNodeAsDown(zkSys.zkController.getNodeName());
|
||||
}
|
||||
} catch (Exception e) {
|
||||
log.warn("Error publishing nodes as down. Continuing to close CoreContainer", e);
|
||||
}
|
||||
}
|
||||
|
||||
ExecutorUtil.shutdownAndAwaitTermination(coreContainerWorkExecutor);
|
||||
|
||||
// First wake up the closer thread, it'll terminate almost immediately since it checks isShutDown.
|
||||
synchronized (solrCores.getModifyLock()) {
|
||||
solrCores.getModifyLock().notifyAll(); // wake up anyone waiting
|
||||
|
@ -896,27 +889,77 @@ public class CoreContainer {
|
|||
synchronized (solrCores.getModifyLock()) {
|
||||
solrCores.getModifyLock().notifyAll(); // wake up the thread
|
||||
}
|
||||
|
||||
customThreadPool.submit(() -> Collections.singleton(replayUpdatesExecutor).parallelStream().forEach(c -> {
|
||||
c.shutdownAndAwaitTermination();
|
||||
}));
|
||||
|
||||
if (metricsHistoryHandler != null) {
|
||||
customThreadPool.submit(() -> Collections.singleton(metricsHistoryHandler).parallelStream().forEach(c -> {
|
||||
IOUtils.closeQuietly(c);
|
||||
}));
|
||||
customThreadPool.submit(() -> Collections.singleton(metricsHistoryHandler.getSolrClient()).parallelStream().forEach(c -> {
|
||||
IOUtils.closeQuietly(c);
|
||||
}));
|
||||
}
|
||||
|
||||
if (metricManager != null) {
|
||||
metricManager.closeReporters(SolrMetricManager.getRegistryName(SolrInfoBean.Group.node));
|
||||
metricManager.closeReporters(SolrMetricManager.getRegistryName(SolrInfoBean.Group.jvm));
|
||||
metricManager.closeReporters(SolrMetricManager.getRegistryName(SolrInfoBean.Group.jetty));
|
||||
|
||||
metricManager.unregisterGauges(SolrMetricManager.getRegistryName(SolrInfoBean.Group.node), metricTag);
|
||||
metricManager.unregisterGauges(SolrMetricManager.getRegistryName(SolrInfoBean.Group.jvm), metricTag);
|
||||
metricManager.unregisterGauges(SolrMetricManager.getRegistryName(SolrInfoBean.Group.jetty), metricTag);
|
||||
}
|
||||
|
||||
if (isZooKeeperAware()) {
|
||||
cancelCoreRecoveries();
|
||||
|
||||
if (metricManager != null) {
|
||||
metricManager.closeReporters(SolrMetricManager.getRegistryName(SolrInfoBean.Group.cluster));
|
||||
}
|
||||
}
|
||||
|
||||
try {
|
||||
if (coreAdminHandler != null) {
|
||||
customThreadPool.submit(() -> Collections.singleton(coreAdminHandler).parallelStream().forEach(c -> {
|
||||
c.shutdown();
|
||||
}));
|
||||
}
|
||||
} catch (Exception e) {
|
||||
log.warn("Error shutting down CoreAdminHandler. Continuing to close CoreContainer.", e);
|
||||
}
|
||||
|
||||
} finally {
|
||||
try {
|
||||
if (shardHandlerFactory != null) {
|
||||
shardHandlerFactory.close();
|
||||
customThreadPool.submit(() -> Collections.singleton(shardHandlerFactory).parallelStream().forEach(c -> {
|
||||
c.close();
|
||||
}));
|
||||
}
|
||||
} finally {
|
||||
try {
|
||||
if (updateShardHandler != null) {
|
||||
updateShardHandler.close();
|
||||
customThreadPool.submit(() -> Collections.singleton(shardHandlerFactory).parallelStream().forEach(c -> {
|
||||
updateShardHandler.close();
|
||||
}));
|
||||
}
|
||||
} finally {
|
||||
// we want to close zk stuff last
|
||||
zkSys.close();
|
||||
try {
|
||||
// we want to close zk stuff last
|
||||
zkSys.close();
|
||||
} finally {
|
||||
ExecutorUtil.shutdownAndAwaitTermination(customThreadPool);
|
||||
}
|
||||
}
|
||||
|
||||
}
|
||||
}
|
||||
|
||||
// It should be safe to close the authorization plugin at this point.
|
||||
try {
|
||||
if(authorizationPlugin != null) {
|
||||
if (authorizationPlugin != null) {
|
||||
authorizationPlugin.plugin.close();
|
||||
}
|
||||
} catch (IOException e) {
|
||||
|
@ -925,7 +968,7 @@ public class CoreContainer {
|
|||
|
||||
// It should be safe to close the authentication plugin at this point.
|
||||
try {
|
||||
if(authenticationPlugin != null) {
|
||||
if (authenticationPlugin != null) {
|
||||
authenticationPlugin.plugin.close();
|
||||
authenticationPlugin = null;
|
||||
}
|
||||
|
@ -1384,6 +1427,9 @@ public class CoreContainer {
|
|||
* @param name the name of the SolrCore to reload
|
||||
*/
|
||||
public void reload(String name) {
|
||||
if (isShutDown) {
|
||||
throw new AlreadyClosedException();
|
||||
}
|
||||
SolrCore core = solrCores.getCoreFromAnyList(name, false);
|
||||
if (core != null) {
|
||||
|
||||
|
|
|
@ -162,6 +162,7 @@ import org.apache.solr.util.NumberUtils;
|
|||
import org.apache.solr.util.PropertiesInputStream;
|
||||
import org.apache.solr.util.PropertiesOutputStream;
|
||||
import org.apache.solr.util.RefCounted;
|
||||
import org.apache.solr.util.TestInjection;
|
||||
import org.apache.solr.util.plugin.NamedListInitializedPlugin;
|
||||
import org.apache.solr.util.plugin.PluginInfoInitialized;
|
||||
import org.apache.solr.util.plugin.SolrCoreAware;
|
||||
|
@ -764,10 +765,14 @@ public final class SolrCore implements SolrInfoBean, SolrMetricProducer, Closeab
|
|||
// Create the index if it doesn't exist.
|
||||
if (!indexExists) {
|
||||
log.debug("{}Solr index directory '{}' doesn't exist. Creating new index...", logid, indexDir);
|
||||
|
||||
SolrIndexWriter writer = SolrIndexWriter.create(this, "SolrCore.initIndex", indexDir, getDirectoryFactory(), true,
|
||||
SolrIndexWriter writer = null;
|
||||
try {
|
||||
writer = SolrIndexWriter.create(this, "SolrCore.initIndex", indexDir, getDirectoryFactory(), true,
|
||||
getLatestSchema(), solrConfig.indexConfig, solrDelPolicy, codec);
|
||||
writer.close();
|
||||
} finally {
|
||||
IOUtils.closeQuietly(writer);
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
cleanupOldIndexDirectories(reload);
|
||||
|
@ -992,6 +997,33 @@ public final class SolrCore implements SolrInfoBean, SolrMetricProducer, Closeab
|
|||
resourceLoader.inform(resourceLoader);
|
||||
resourceLoader.inform(this); // last call before the latch is released.
|
||||
this.updateHandler.informEventListeners(this);
|
||||
|
||||
infoRegistry.put("core", this);
|
||||
|
||||
// register any SolrInfoMBeans SolrResourceLoader initialized
|
||||
//
|
||||
// this must happen after the latch is released, because a JMX server impl may
|
||||
// choose to block on registering until properties can be fetched from an MBean,
|
||||
// and a SolrCoreAware MBean may have properties that depend on getting a Searcher
|
||||
// from the core.
|
||||
resourceLoader.inform(infoRegistry);
|
||||
|
||||
// Allow the directory factory to report metrics
|
||||
if (directoryFactory instanceof SolrMetricProducer) {
|
||||
((SolrMetricProducer) directoryFactory).initializeMetrics(metricManager, coreMetricManager.getRegistryName(),
|
||||
metricTag, "directoryFactory");
|
||||
}
|
||||
|
||||
// seed version buckets with max from index during core initialization ... requires a searcher!
|
||||
seedVersionBuckets();
|
||||
|
||||
bufferUpdatesIfConstructing(coreDescriptor);
|
||||
|
||||
this.ruleExpiryLock = new ReentrantLock();
|
||||
this.snapshotDelLock = new ReentrantLock();
|
||||
|
||||
registerConfListener();
|
||||
|
||||
} catch (Throwable e) {
|
||||
// release the latch, otherwise we block trying to do the close. This
|
||||
// should be fine, since counting down on a latch of 0 is still fine
|
||||
|
@ -1016,31 +1048,6 @@ public final class SolrCore implements SolrInfoBean, SolrMetricProducer, Closeab
|
|||
// allow firstSearcher events to fire and make sure it is released
|
||||
latch.countDown();
|
||||
}
|
||||
|
||||
infoRegistry.put("core", this);
|
||||
|
||||
// register any SolrInfoMBeans SolrResourceLoader initialized
|
||||
//
|
||||
// this must happen after the latch is released, because a JMX server impl may
|
||||
// choose to block on registering until properties can be fetched from an MBean,
|
||||
// and a SolrCoreAware MBean may have properties that depend on getting a Searcher
|
||||
// from the core.
|
||||
resourceLoader.inform(infoRegistry);
|
||||
|
||||
// Allow the directory factory to report metrics
|
||||
if (directoryFactory instanceof SolrMetricProducer) {
|
||||
((SolrMetricProducer)directoryFactory).initializeMetrics(metricManager, coreMetricManager.getRegistryName(), metricTag, "directoryFactory");
|
||||
}
|
||||
|
||||
// seed version buckets with max from index during core initialization ... requires a searcher!
|
||||
seedVersionBuckets();
|
||||
|
||||
bufferUpdatesIfConstructing(coreDescriptor);
|
||||
|
||||
this.ruleExpiryLock = new ReentrantLock();
|
||||
this.snapshotDelLock = new ReentrantLock();
|
||||
|
||||
registerConfListener();
|
||||
|
||||
assert ObjectReleaseTracker.track(this);
|
||||
}
|
||||
|
@ -1999,7 +2006,7 @@ public final class SolrCore implements SolrInfoBean, SolrMetricProducer, Closeab
|
|||
*/
|
||||
public RefCounted<SolrIndexSearcher> openNewSearcher(boolean updateHandlerReopens, boolean realtime) {
|
||||
if (isClosed()) { // catch some errors quicker
|
||||
throw new SolrException(ErrorCode.SERVER_ERROR, "openNewSearcher called on closed core");
|
||||
throw new SolrCoreState.CoreIsClosedException();
|
||||
}
|
||||
|
||||
SolrIndexSearcher tmp;
|
||||
|
@ -2372,7 +2379,7 @@ public final class SolrCore implements SolrInfoBean, SolrMetricProducer, Closeab
|
|||
return returnSearcher ? newSearchHolder : null;
|
||||
|
||||
} catch (Exception e) {
|
||||
if (e instanceof SolrException) throw (SolrException)e;
|
||||
if (e instanceof RuntimeException) throw (RuntimeException)e;
|
||||
throw new SolrException(ErrorCode.SERVER_ERROR, e);
|
||||
} finally {
|
||||
|
||||
|
@ -2491,6 +2498,7 @@ public final class SolrCore implements SolrInfoBean, SolrMetricProducer, Closeab
|
|||
// even in the face of errors.
|
||||
onDeckSearchers--;
|
||||
searcherLock.notifyAll();
|
||||
assert TestInjection.injectSearcherHooks(getCoreDescriptor() != null && getCoreDescriptor().getCloudDescriptor() != null ? getCoreDescriptor().getCloudDescriptor().getCollectionName() : null);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@ -3008,7 +3016,7 @@ public final class SolrCore implements SolrInfoBean, SolrMetricProducer, Closeab
|
|||
int solrConfigversion, overlayVersion, managedSchemaVersion = 0;
|
||||
SolrConfig cfg = null;
|
||||
try (SolrCore solrCore = cc.solrCores.getCoreFromAnyList(coreName, true)) {
|
||||
if (solrCore == null || solrCore.isClosed()) return;
|
||||
if (solrCore == null || solrCore.isClosed() || solrCore.getCoreContainer().isShutDown()) return;
|
||||
cfg = solrCore.getSolrConfig();
|
||||
solrConfigversion = solrCore.getSolrConfig().getOverlay().getZnodeVersion();
|
||||
overlayVersion = solrCore.getSolrConfig().getZnodeVersion();
|
||||
|
@ -3042,7 +3050,7 @@ public final class SolrCore implements SolrInfoBean, SolrMetricProducer, Closeab
|
|||
}
|
||||
//some files in conf directory may have other than managedschema, overlay, params
|
||||
try (SolrCore solrCore = cc.solrCores.getCoreFromAnyList(coreName, true)) {
|
||||
if (solrCore == null || solrCore.isClosed()) return;
|
||||
if (solrCore == null || solrCore.isClosed() || cc.isShutDown()) return;
|
||||
for (Runnable listener : solrCore.confListeners) {
|
||||
try {
|
||||
listener.run();
|
||||
|
|
|
@ -31,7 +31,7 @@ import org.slf4j.LoggerFactory;
|
|||
public abstract class TransientSolrCoreCacheFactory {
|
||||
private static final Logger log = LoggerFactory.getLogger(MethodHandles.lookup().lookupClass());
|
||||
|
||||
private CoreContainer coreContainer = null;
|
||||
private volatile CoreContainer coreContainer = null;
|
||||
|
||||
public abstract TransientSolrCoreCache getTransientSolrCoreCache();
|
||||
/**
|
||||
|
|
|
@ -18,7 +18,7 @@ package org.apache.solr.core;
|
|||
|
||||
public class TransientSolrCoreCacheFactoryDefault extends TransientSolrCoreCacheFactory {
|
||||
|
||||
TransientSolrCoreCache transientSolrCoreCache = null;
|
||||
volatile TransientSolrCoreCache transientSolrCoreCache = null;
|
||||
|
||||
@Override
|
||||
public TransientSolrCoreCache getTransientSolrCoreCache() {
|
||||
|
|
|
@ -31,6 +31,7 @@ import java.util.function.Predicate;
|
|||
import org.apache.solr.cloud.CurrentCoreDescriptorProvider;
|
||||
import org.apache.solr.cloud.SolrZkServer;
|
||||
import org.apache.solr.cloud.ZkController;
|
||||
import org.apache.solr.common.AlreadyClosedException;
|
||||
import org.apache.solr.common.SolrException;
|
||||
import org.apache.solr.common.cloud.Replica;
|
||||
import org.apache.solr.common.cloud.ZkConfigManager;
|
||||
|
@ -174,24 +175,31 @@ public class ZkContainer {
|
|||
return zkRun.substring(0, zkRun.lastIndexOf('/'));
|
||||
}
|
||||
|
||||
public static Predicate<CoreDescriptor> testing_beforeRegisterInZk;
|
||||
public static volatile Predicate<CoreDescriptor> testing_beforeRegisterInZk;
|
||||
|
||||
public void registerInZk(final SolrCore core, boolean background, boolean skipRecovery) {
|
||||
CoreDescriptor cd = core.getCoreDescriptor(); // save this here - the core may not have it later
|
||||
Runnable r = () -> {
|
||||
MDCLoggingContext.setCore(core);
|
||||
try {
|
||||
try {
|
||||
if (testing_beforeRegisterInZk != null) {
|
||||
testing_beforeRegisterInZk.test(core.getCoreDescriptor());
|
||||
testing_beforeRegisterInZk.test(cd);
|
||||
}
|
||||
if (!core.getCoreContainer().isShutDown()) {
|
||||
zkController.register(core.getName(), cd, skipRecovery);
|
||||
}
|
||||
zkController.register(core.getName(), core.getCoreDescriptor(), skipRecovery);
|
||||
} catch (InterruptedException e) {
|
||||
// Restore the interrupted status
|
||||
Thread.currentThread().interrupt();
|
||||
SolrException.log(log, "", e);
|
||||
} catch (KeeperException e) {
|
||||
SolrException.log(log, "", e);
|
||||
} catch (AlreadyClosedException e) {
|
||||
|
||||
} catch (Exception e) {
|
||||
try {
|
||||
zkController.publish(core.getCoreDescriptor(), Replica.State.DOWN);
|
||||
zkController.publish(cd, Replica.State.DOWN);
|
||||
} catch (InterruptedException e1) {
|
||||
Thread.currentThread().interrupt();
|
||||
log.error("", e1);
|
||||
|
|
|
@ -97,6 +97,7 @@ class CdcrReplicatorManager implements CdcrStateManager.CdcrStateObserver {
|
|||
String targetCollection = params.get(CdcrParams.TARGET_COLLECTION_PARAM);
|
||||
|
||||
CloudSolrClient client = new Builder(Collections.singletonList(zkHost), Optional.empty())
|
||||
.withSocketTimeout(30000).withConnectionTimeout(15000)
|
||||
.sendUpdatesOnlyToShardLeaders()
|
||||
.build();
|
||||
client.setDefaultCollection(targetCollection);
|
||||
|
|
|
@ -222,7 +222,7 @@ public class IndexFetcher {
|
|||
httpClientParams.set(HttpClientUtil.PROP_BASIC_AUTH_PASS, httpBasicAuthPassword);
|
||||
httpClientParams.set(HttpClientUtil.PROP_ALLOW_COMPRESSION, useCompression);
|
||||
|
||||
return HttpClientUtil.createClient(httpClientParams, core.getCoreContainer().getUpdateShardHandler().getDefaultConnectionManager(), true);
|
||||
return HttpClientUtil.createClient(httpClientParams, core.getCoreContainer().getUpdateShardHandler().getRecoveryOnlyConnectionManager(), true);
|
||||
}
|
||||
|
||||
public IndexFetcher(final NamedList initArgs, final ReplicationHandler handler, final SolrCore sc) {
|
||||
|
|
|
@ -197,7 +197,7 @@ public class ReplicationHandler extends RequestHandlerBase implements SolrCoreAw
|
|||
|
||||
private boolean replicateOnStart = false;
|
||||
|
||||
private ScheduledExecutorService executorService;
|
||||
private volatile ScheduledExecutorService executorService;
|
||||
|
||||
private volatile long executorStartTime;
|
||||
|
||||
|
@ -1369,6 +1369,8 @@ public class ReplicationHandler extends RequestHandlerBase implements SolrCoreAw
|
|||
if (restoreFuture != null) {
|
||||
restoreFuture.cancel(false);
|
||||
}
|
||||
|
||||
ExecutorUtil.shutdownAndAwaitTermination(executorService);
|
||||
}
|
||||
|
||||
/**
|
||||
|
|
|
@ -125,7 +125,7 @@ public class AutoscalingHistoryHandler extends RequestHandlerBase implements Per
|
|||
}
|
||||
}
|
||||
}
|
||||
try (CloudSolrClient cloudSolrClient = new CloudSolrClient.Builder(Collections.singletonList(coreContainer.getZkController().getZkServerAddress()), Optional.empty())
|
||||
try (CloudSolrClient cloudSolrClient = new CloudSolrClient.Builder(Collections.singletonList(coreContainer.getZkController().getZkServerAddress()), Optional.empty()).withSocketTimeout(30000).withConnectionTimeout(15000)
|
||||
.withHttpClient(coreContainer.getUpdateShardHandler().getDefaultHttpClient())
|
||||
.build()) {
|
||||
QueryResponse qr = cloudSolrClient.query(collection, params);
|
||||
|
|
|
@ -31,6 +31,7 @@ import java.util.Map;
|
|||
import java.util.Optional;
|
||||
import java.util.Set;
|
||||
import java.util.concurrent.TimeUnit;
|
||||
import java.util.concurrent.TimeoutException;
|
||||
import java.util.stream.Collectors;
|
||||
|
||||
import com.google.common.collect.ImmutableList;
|
||||
|
@ -45,10 +46,10 @@ import org.apache.solr.client.solrj.request.CollectionAdminRequest;
|
|||
import org.apache.solr.client.solrj.request.CoreAdminRequest.RequestSyncShard;
|
||||
import org.apache.solr.client.solrj.response.RequestStatusState;
|
||||
import org.apache.solr.client.solrj.util.SolrIdentifierValidator;
|
||||
import org.apache.solr.cloud.Overseer;
|
||||
import org.apache.solr.cloud.OverseerSolrResponse;
|
||||
import org.apache.solr.cloud.OverseerTaskQueue;
|
||||
import org.apache.solr.cloud.OverseerTaskQueue.QueueEvent;
|
||||
import org.apache.solr.cloud.ZkController.NotInClusterStateException;
|
||||
import org.apache.solr.cloud.ZkController;
|
||||
import org.apache.solr.cloud.ZkShardTerms;
|
||||
import org.apache.solr.cloud.overseer.SliceMutator;
|
||||
|
@ -285,7 +286,7 @@ public class CollectionsHandler extends RequestHandlerBase implements Permission
|
|||
|
||||
} else {
|
||||
// submits and doesn't wait for anything (no response)
|
||||
Overseer.getStateUpdateQueue(coreContainer.getZkController().getZkClient()).offer(Utils.toJSON(props));
|
||||
coreContainer.getZkController().getOverseer().offerStateUpdate(Utils.toJSON(props));
|
||||
}
|
||||
|
||||
}
|
||||
|
@ -1249,61 +1250,59 @@ public class CollectionsHandler extends RequestHandlerBase implements Permission
|
|||
return;
|
||||
}
|
||||
|
||||
int replicaFailCount;
|
||||
if (createCollResponse.getResponse().get("failure") != null) {
|
||||
// TODO: we should not wait for Replicas we know failed
|
||||
replicaFailCount = ((NamedList) createCollResponse.getResponse().get("failure")).size();
|
||||
} else {
|
||||
replicaFailCount = 0;
|
||||
}
|
||||
|
||||
String replicaNotAlive = null;
|
||||
String replicaState = null;
|
||||
String nodeNotLive = null;
|
||||
|
||||
CloudConfig ccfg = cc.getConfig().getCloudConfig();
|
||||
Integer numRetries = ccfg.getCreateCollectionWaitTimeTillActive(); // this config is actually # seconds, not # tries
|
||||
Integer seconds = ccfg.getCreateCollectionWaitTimeTillActive();
|
||||
Boolean checkLeaderOnly = ccfg.isCreateCollectionCheckLeaderActive();
|
||||
log.info("Wait for new collection to be active for at most " + numRetries + " seconds. Check all shard "
|
||||
log.info("Wait for new collection to be active for at most " + seconds + " seconds. Check all shard "
|
||||
+ (checkLeaderOnly ? "leaders" : "replicas"));
|
||||
ZkStateReader zkStateReader = cc.getZkController().getZkStateReader();
|
||||
for (int i = 0; i < numRetries; i++) {
|
||||
ClusterState clusterState = zkStateReader.getClusterState();
|
||||
|
||||
final DocCollection docCollection = clusterState.getCollectionOrNull(collectionName);
|
||||
|
||||
if (docCollection != null && docCollection.getSlices() != null) {
|
||||
Collection<Slice> shards = docCollection.getSlices();
|
||||
replicaNotAlive = null;
|
||||
for (Slice shard : shards) {
|
||||
Collection<Replica> replicas;
|
||||
if (!checkLeaderOnly) replicas = shard.getReplicas();
|
||||
else {
|
||||
replicas = new ArrayList<Replica>();
|
||||
replicas.add(shard.getLeader());
|
||||
}
|
||||
for (Replica replica : replicas) {
|
||||
String state = replica.getStr(ZkStateReader.STATE_PROP);
|
||||
log.debug("Checking replica status, collection={} replica={} state={}", collectionName,
|
||||
replica.getCoreUrl(), state);
|
||||
if (!clusterState.liveNodesContain(replica.getNodeName())
|
||||
|| !state.equals(Replica.State.ACTIVE.toString())) {
|
||||
replicaNotAlive = replica.getCoreUrl();
|
||||
nodeNotLive = replica.getNodeName();
|
||||
replicaState = state;
|
||||
break;
|
||||
try {
|
||||
cc.getZkController().getZkStateReader().waitForState(collectionName, seconds, TimeUnit.SECONDS, (n, c) -> {
|
||||
|
||||
if (c == null) {
|
||||
// the collection was not created, don't wait
|
||||
return true;
|
||||
}
|
||||
|
||||
if (c.getSlices() != null) {
|
||||
Collection<Slice> shards = c.getSlices();
|
||||
int replicaNotAliveCnt = 0;
|
||||
for (Slice shard : shards) {
|
||||
Collection<Replica> replicas;
|
||||
if (!checkLeaderOnly) replicas = shard.getReplicas();
|
||||
else {
|
||||
replicas = new ArrayList<Replica>();
|
||||
replicas.add(shard.getLeader());
|
||||
}
|
||||
for (Replica replica : replicas) {
|
||||
String state = replica.getStr(ZkStateReader.STATE_PROP);
|
||||
log.debug("Checking replica status, collection={} replica={} state={}", collectionName,
|
||||
replica.getCoreUrl(), state);
|
||||
if (!n.contains(replica.getNodeName())
|
||||
|| !state.equals(Replica.State.ACTIVE.toString())) {
|
||||
replicaNotAliveCnt++;
|
||||
return false;
|
||||
}
|
||||
}
|
||||
}
|
||||
if (replicaNotAlive != null) break;
|
||||
}
|
||||
|
||||
if (replicaNotAlive == null) return;
|
||||
}
|
||||
Thread.sleep(1000); // thus numRetries is roughly number of seconds
|
||||
}
|
||||
if (nodeNotLive != null && replicaState != null) {
|
||||
log.error("Timed out waiting for new collection's replicas to become ACTIVE "
|
||||
+ (replicaState.equals(Replica.State.ACTIVE.toString()) ? "node " + nodeNotLive + " is not live"
|
||||
: "replica " + replicaNotAlive + " is in state of " + replicaState.toString()) + " with timeout=" + numRetries);
|
||||
} else {
|
||||
log.error("Timed out waiting for new collection's replicas to become ACTIVE with timeout=" + numRetries);
|
||||
if ((replicaNotAliveCnt == 0) || (replicaNotAliveCnt <= replicaFailCount)) return true;
|
||||
}
|
||||
return false;
|
||||
});
|
||||
} catch (TimeoutException | InterruptedException e) {
|
||||
|
||||
String error = "Timeout waiting for active collection " + collectionName + " with timeout=" + seconds;
|
||||
throw new NotInClusterStateException(ErrorCode.SERVER_ERROR, error);
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
public static void verifyRuleParams(CoreContainer cc, Map<String, Object> m) {
|
||||
|
|
|
@ -371,7 +371,7 @@ public class CoreAdminHandler extends RequestHandlerBase implements PermissionNa
|
|||
* Method to ensure shutting down of the ThreadPool Executor.
|
||||
*/
|
||||
public void shutdown() {
|
||||
if (parallelExecutor != null && !parallelExecutor.isShutdown())
|
||||
if (parallelExecutor != null)
|
||||
ExecutorUtil.shutdownAndAwaitTermination(parallelExecutor);
|
||||
}
|
||||
|
||||
|
|
|
@ -642,7 +642,17 @@ public class MetricsHistoryHandler extends RequestHandlerBase implements Permiss
|
|||
public void close() {
|
||||
log.debug("Closing " + hashCode());
|
||||
if (collectService != null) {
|
||||
collectService.shutdownNow();
|
||||
boolean shutdown = false;
|
||||
while (!shutdown) {
|
||||
try {
|
||||
// Wait a while for existing tasks to terminate
|
||||
collectService.shutdownNow();
|
||||
shutdown = collectService.awaitTermination(5, TimeUnit.SECONDS);
|
||||
} catch (InterruptedException ie) {
|
||||
// Preserve interrupt status
|
||||
Thread.currentThread().interrupt();
|
||||
}
|
||||
}
|
||||
}
|
||||
if (factory != null) {
|
||||
factory.close();
|
||||
|
|
|
@ -18,13 +18,15 @@
|
|||
package org.apache.solr.handler.admin;
|
||||
|
||||
import java.lang.invoke.MethodHandles;
|
||||
import java.util.Objects;
|
||||
import java.util.concurrent.TimeUnit;
|
||||
import java.util.concurrent.TimeoutException;
|
||||
import java.util.concurrent.atomic.AtomicReference;
|
||||
|
||||
import org.apache.solr.cloud.CloudDescriptor;
|
||||
import org.apache.solr.cloud.ZkController.NotInClusterStateException;
|
||||
import org.apache.solr.cloud.ZkShardTerms;
|
||||
import org.apache.solr.common.SolrException;
|
||||
import org.apache.solr.common.cloud.ClusterState;
|
||||
import org.apache.solr.common.cloud.DocCollection;
|
||||
import org.apache.solr.common.SolrException.ErrorCode;
|
||||
import org.apache.solr.common.cloud.Replica;
|
||||
import org.apache.solr.common.cloud.Slice;
|
||||
import org.apache.solr.common.cloud.ZkStateReader;
|
||||
|
@ -47,10 +49,7 @@ class PrepRecoveryOp implements CoreAdminHandler.CoreAdminOp {
|
|||
|
||||
final SolrParams params = it.req.getParams();
|
||||
|
||||
String cname = params.get(CoreAdminParams.CORE);
|
||||
if (cname == null) {
|
||||
cname = "";
|
||||
}
|
||||
String cname = params.get(CoreAdminParams.CORE, "");
|
||||
|
||||
String nodeName = params.get("nodeName");
|
||||
String coreNodeName = params.get("coreNodeName");
|
||||
|
@ -59,133 +58,110 @@ class PrepRecoveryOp implements CoreAdminHandler.CoreAdminOp {
|
|||
Boolean onlyIfLeader = params.getBool("onlyIfLeader");
|
||||
Boolean onlyIfLeaderActive = params.getBool("onlyIfLeaderActive");
|
||||
|
||||
|
||||
CoreContainer coreContainer = it.handler.coreContainer;
|
||||
// wait long enough for the leader conflict to work itself out plus a little extra
|
||||
int conflictWaitMs = coreContainer.getZkController().getLeaderConflictResolveWait();
|
||||
int maxTries = (int) Math.round(conflictWaitMs / 1000) + 3;
|
||||
log.info("Going to wait for coreNodeName: {}, state: {}, checkLive: {}, onlyIfLeader: {}, onlyIfLeaderActive: {}, maxTime: {} s",
|
||||
coreNodeName, waitForState, checkLive, onlyIfLeader, onlyIfLeaderActive, maxTries);
|
||||
|
||||
Replica.State state = null;
|
||||
boolean live = false;
|
||||
int retry = 0;
|
||||
while (true) {
|
||||
try (SolrCore core = coreContainer.getCore(cname)) {
|
||||
if (core == null && retry == Math.min(30, maxTries)) {
|
||||
throw new SolrException(SolrException.ErrorCode.BAD_REQUEST, "core not found:"
|
||||
+ cname);
|
||||
}
|
||||
if (core != null) {
|
||||
log.info(
|
||||
"Going to wait for coreNodeName: {}, state: {}, checkLive: {}, onlyIfLeader: {}, onlyIfLeaderActive: {}",
|
||||
coreNodeName, waitForState, checkLive, onlyIfLeader, onlyIfLeaderActive);
|
||||
|
||||
String collectionName;
|
||||
CloudDescriptor cloudDescriptor;
|
||||
try (SolrCore core = coreContainer.getCore(cname)) {
|
||||
if (core == null) throw new SolrException(SolrException.ErrorCode.BAD_REQUEST, "core not found:" + cname);
|
||||
collectionName = core.getCoreDescriptor().getCloudDescriptor().getCollectionName();
|
||||
cloudDescriptor = core.getCoreDescriptor()
|
||||
.getCloudDescriptor();
|
||||
}
|
||||
AtomicReference<String> errorMessage = new AtomicReference<>();
|
||||
try {
|
||||
coreContainer.getZkController().getZkStateReader().waitForState(collectionName, conflictWaitMs, TimeUnit.MILLISECONDS, (n, c) -> {
|
||||
if (c == null)
|
||||
return false;
|
||||
|
||||
try (SolrCore core = coreContainer.getCore(cname)) {
|
||||
if (core == null) throw new SolrException(SolrException.ErrorCode.BAD_REQUEST, "core not found:" + cname);
|
||||
if (onlyIfLeader != null && onlyIfLeader) {
|
||||
if (!core.getCoreDescriptor().getCloudDescriptor().isLeader()) {
|
||||
throw new SolrException(SolrException.ErrorCode.BAD_REQUEST, "We are not the leader");
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// wait until we are sure the recovering node is ready
|
||||
// to accept updates
|
||||
CloudDescriptor cloudDescriptor = core.getCoreDescriptor()
|
||||
.getCloudDescriptor();
|
||||
String collectionName = cloudDescriptor.getCollectionName();
|
||||
// wait until we are sure the recovering node is ready
|
||||
// to accept updates
|
||||
Replica.State state = null;
|
||||
boolean live = false;
|
||||
Slice slice = c.getSlice(cloudDescriptor.getShardId());
|
||||
if (slice != null) {
|
||||
final Replica replica = slice.getReplicasMap().get(coreNodeName);
|
||||
if (replica != null) {
|
||||
state = replica.getState();
|
||||
live = n.contains(nodeName);
|
||||
|
||||
if (retry % 15 == 0) {
|
||||
if (retry > 0 && log.isInfoEnabled())
|
||||
log.info("After " + retry + " seconds, core " + cname + " (" +
|
||||
cloudDescriptor.getShardId() + " of " +
|
||||
cloudDescriptor.getCollectionName() + ") still does not have state: " +
|
||||
waitForState + "; forcing ClusterState update from ZooKeeper");
|
||||
final Replica.State localState = cloudDescriptor.getLastPublished();
|
||||
|
||||
// force a cluster state update
|
||||
coreContainer.getZkController().getZkStateReader().forceUpdateCollection(collectionName);
|
||||
}
|
||||
// TODO: This is funky but I've seen this in testing where the replica asks the
|
||||
// leader to be in recovery? Need to track down how that happens ... in the meantime,
|
||||
// this is a safeguard
|
||||
boolean leaderDoesNotNeedRecovery = (onlyIfLeader != null &&
|
||||
onlyIfLeader &&
|
||||
cname.equals(replica.getStr("core")) &&
|
||||
waitForState == Replica.State.RECOVERING &&
|
||||
localState == Replica.State.ACTIVE &&
|
||||
state == Replica.State.ACTIVE);
|
||||
|
||||
ClusterState clusterState = coreContainer.getZkController().getClusterState();
|
||||
DocCollection collection = clusterState.getCollection(collectionName);
|
||||
Slice slice = collection.getSlice(cloudDescriptor.getShardId());
|
||||
if (slice != null) {
|
||||
final Replica replica = slice.getReplicasMap().get(coreNodeName);
|
||||
if (replica != null) {
|
||||
state = replica.getState();
|
||||
live = clusterState.liveNodesContain(nodeName);
|
||||
if (leaderDoesNotNeedRecovery) {
|
||||
log.warn(
|
||||
"Leader " + cname + " ignoring request to be in the recovering state because it is live and active.");
|
||||
}
|
||||
|
||||
final Replica.State localState = cloudDescriptor.getLastPublished();
|
||||
ZkShardTerms shardTerms = coreContainer.getZkController().getShardTerms(collectionName, slice.getName());
|
||||
// if the replica is waiting for leader to see recovery state, the leader should refresh its terms
|
||||
if (waitForState == Replica.State.RECOVERING && shardTerms.registered(coreNodeName)
|
||||
&& shardTerms.skipSendingUpdatesTo(coreNodeName)) {
|
||||
// The replica changed it term, then published itself as RECOVERING.
|
||||
// This core already see replica as RECOVERING
|
||||
// so it is guarantees that a live-fetch will be enough for this core to see max term published
|
||||
shardTerms.refreshTerms();
|
||||
}
|
||||
|
||||
// TODO: This is funky but I've seen this in testing where the replica asks the
|
||||
// leader to be in recovery? Need to track down how that happens ... in the meantime,
|
||||
// this is a safeguard
|
||||
boolean leaderDoesNotNeedRecovery = (onlyIfLeader != null &&
|
||||
onlyIfLeader &&
|
||||
core.getName().equals(replica.getStr("core")) &&
|
||||
waitForState == Replica.State.RECOVERING &&
|
||||
localState == Replica.State.ACTIVE &&
|
||||
state == Replica.State.ACTIVE);
|
||||
boolean onlyIfActiveCheckResult = onlyIfLeaderActive != null && onlyIfLeaderActive
|
||||
&& localState != Replica.State.ACTIVE;
|
||||
log.info(
|
||||
"In WaitForState(" + waitForState + "): collection=" + collectionName + ", shard=" + slice.getName() +
|
||||
", thisCore=" + cname + ", leaderDoesNotNeedRecovery=" + leaderDoesNotNeedRecovery +
|
||||
", isLeader? " + cloudDescriptor.isLeader() +
|
||||
", live=" + live + ", checkLive=" + checkLive + ", currentState=" + state.toString()
|
||||
+ ", localState=" + localState + ", nodeName=" + nodeName +
|
||||
", coreNodeName=" + coreNodeName + ", onlyIfActiveCheckResult=" + onlyIfActiveCheckResult
|
||||
+ ", nodeProps: " + replica);
|
||||
|
||||
if (leaderDoesNotNeedRecovery) {
|
||||
log.warn("Leader " + core.getName() + " ignoring request to be in the recovering state because it is live and active.");
|
||||
}
|
||||
|
||||
ZkShardTerms shardTerms = coreContainer.getZkController().getShardTerms(collectionName, slice.getName());
|
||||
// if the replica is waiting for leader to see recovery state, the leader should refresh its terms
|
||||
if (waitForState == Replica.State.RECOVERING && shardTerms.registered(coreNodeName) && shardTerms.skipSendingUpdatesTo(coreNodeName)) {
|
||||
// The replica changed it term, then published itself as RECOVERING.
|
||||
// This core already see replica as RECOVERING
|
||||
// so it is guarantees that a live-fetch will be enough for this core to see max term published
|
||||
shardTerms.refreshTerms();
|
||||
}
|
||||
|
||||
boolean onlyIfActiveCheckResult = onlyIfLeaderActive != null && onlyIfLeaderActive && localState != Replica.State.ACTIVE;
|
||||
log.info("In WaitForState(" + waitForState + "): collection=" + collectionName + ", shard=" + slice.getName() +
|
||||
", thisCore=" + core.getName() + ", leaderDoesNotNeedRecovery=" + leaderDoesNotNeedRecovery +
|
||||
", isLeader? " + core.getCoreDescriptor().getCloudDescriptor().isLeader() +
|
||||
", live=" + live + ", checkLive=" + checkLive + ", currentState=" + state.toString() + ", localState=" + localState + ", nodeName=" + nodeName +
|
||||
", coreNodeName=" + coreNodeName + ", onlyIfActiveCheckResult=" + onlyIfActiveCheckResult + ", nodeProps: " + replica);
|
||||
|
||||
if (!onlyIfActiveCheckResult && replica != null && (state == waitForState || leaderDoesNotNeedRecovery)) {
|
||||
if (checkLive == null) {
|
||||
break;
|
||||
} else if (checkLive && live) {
|
||||
break;
|
||||
} else if (!checkLive && !live) {
|
||||
break;
|
||||
}
|
||||
if (!onlyIfActiveCheckResult && replica != null && (state == waitForState || leaderDoesNotNeedRecovery)) {
|
||||
if (checkLive == null) {
|
||||
return true;
|
||||
} else if (checkLive && live) {
|
||||
return true;
|
||||
} else if (!checkLive && !live) {
|
||||
return true;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if (retry++ == maxTries) {
|
||||
String collection = null;
|
||||
String leaderInfo = null;
|
||||
String shardId = null;
|
||||
|
||||
try {
|
||||
CloudDescriptor cloudDescriptor =
|
||||
core.getCoreDescriptor().getCloudDescriptor();
|
||||
collection = cloudDescriptor.getCollectionName();
|
||||
shardId = cloudDescriptor.getShardId();
|
||||
leaderInfo = coreContainer.getZkController().
|
||||
getZkStateReader().getLeaderUrl(collection, shardId, 5000);
|
||||
} catch (Exception exc) {
|
||||
leaderInfo = "Not available due to: " + exc;
|
||||
}
|
||||
|
||||
throw new SolrException(SolrException.ErrorCode.BAD_REQUEST,
|
||||
"I was asked to wait on state " + waitForState + " for "
|
||||
+ shardId + " in " + collection + " on " + nodeName
|
||||
+ " but I still do not see the requested state. I see state: "
|
||||
+ Objects.toString(state) + " live:" + live + " leader from ZK: " + leaderInfo);
|
||||
}
|
||||
|
||||
if (coreContainer.isShutDown()) {
|
||||
throw new SolrException(SolrException.ErrorCode.BAD_REQUEST,
|
||||
"Solr is shutting down");
|
||||
}
|
||||
}
|
||||
Thread.sleep(1000);
|
||||
|
||||
return false;
|
||||
});
|
||||
} catch (TimeoutException | InterruptedException e) {
|
||||
String error = errorMessage.get();
|
||||
if (error == null)
|
||||
error = "Timeout waiting for collection state.";
|
||||
throw new NotInClusterStateException(ErrorCode.SERVER_ERROR, error);
|
||||
}
|
||||
|
||||
log.info("Waited coreNodeName: " + coreNodeName + ", state: " + waitForState
|
||||
+ ", checkLive: " + checkLive + ", onlyIfLeader: " + onlyIfLeader + " for: " + retry + " seconds.");
|
||||
}
|
||||
}
|
||||
|
|
|
@ -16,13 +16,16 @@
|
|||
*/
|
||||
package org.apache.solr.handler.component;
|
||||
|
||||
import java.lang.invoke.MethodHandles;
|
||||
import java.util.concurrent.Callable;
|
||||
import java.util.concurrent.Future;
|
||||
import java.util.concurrent.ExecutorService;
|
||||
import java.util.List;
|
||||
import java.util.ArrayList;
|
||||
import static org.apache.solr.common.params.CommonParams.DISTRIB;
|
||||
|
||||
import java.lang.invoke.MethodHandles;
|
||||
import java.util.ArrayList;
|
||||
import java.util.List;
|
||||
import java.util.concurrent.Callable;
|
||||
import java.util.concurrent.ExecutorService;
|
||||
import java.util.concurrent.Future;
|
||||
|
||||
import org.apache.http.impl.client.CloseableHttpClient;
|
||||
import org.apache.solr.client.solrj.SolrRequest;
|
||||
import org.apache.solr.client.solrj.impl.HttpClientUtil;
|
||||
import org.apache.solr.client.solrj.impl.HttpSolrClient;
|
||||
|
@ -34,28 +37,28 @@ import org.apache.solr.common.params.ModifiableSolrParams;
|
|||
import org.apache.solr.common.util.ExecutorUtil;
|
||||
import org.apache.solr.common.util.SolrjNamedThreadFactory;
|
||||
import org.apache.solr.search.SolrIndexSearcher;
|
||||
import org.apache.http.client.HttpClient;
|
||||
import org.slf4j.Logger;
|
||||
import org.slf4j.LoggerFactory;
|
||||
|
||||
import static org.apache.solr.common.params.CommonParams.DISTRIB;
|
||||
|
||||
public abstract class IterativeMergeStrategy implements MergeStrategy {
|
||||
|
||||
protected ExecutorService executorService;
|
||||
protected static HttpClient httpClient;
|
||||
protected volatile ExecutorService executorService;
|
||||
|
||||
protected volatile CloseableHttpClient httpClient;
|
||||
|
||||
private static final Logger log = LoggerFactory.getLogger(MethodHandles.lookup().lookupClass());
|
||||
|
||||
public void merge(ResponseBuilder rb, ShardRequest sreq) {
|
||||
rb._responseDocs = new SolrDocumentList(); // Null pointers will occur otherwise.
|
||||
rb.onePassDistributedQuery = true; // Turn off the second pass distributed.
|
||||
executorService = ExecutorUtil.newMDCAwareCachedThreadPool(new SolrjNamedThreadFactory("IterativeMergeStrategy"));
|
||||
executorService = ExecutorUtil.newMDCAwareCachedThreadPool(new SolrjNamedThreadFactory("IterativeMergeStrategy"));
|
||||
httpClient = getHttpClient();
|
||||
try {
|
||||
process(rb, sreq);
|
||||
} catch (Exception e) {
|
||||
throw new RuntimeException(e);
|
||||
} finally {
|
||||
HttpClientUtil.close(httpClient);
|
||||
executorService.shutdownNow();
|
||||
}
|
||||
}
|
||||
|
@ -76,7 +79,7 @@ public abstract class IterativeMergeStrategy implements MergeStrategy {
|
|||
|
||||
}
|
||||
|
||||
public static class CallBack implements Callable<CallBack> {
|
||||
public class CallBack implements Callable<CallBack> {
|
||||
private HttpSolrClient solrClient;
|
||||
private QueryRequest req;
|
||||
private QueryResponse response;
|
||||
|
@ -85,7 +88,7 @@ public abstract class IterativeMergeStrategy implements MergeStrategy {
|
|||
public CallBack(ShardResponse originalShardResponse, QueryRequest req) {
|
||||
|
||||
this.solrClient = new Builder(originalShardResponse.getShardAddress())
|
||||
.withHttpClient(getHttpClient())
|
||||
.withHttpClient(httpClient)
|
||||
.build();
|
||||
this.req = req;
|
||||
this.originalShardResponse = originalShardResponse;
|
||||
|
@ -122,16 +125,16 @@ public abstract class IterativeMergeStrategy implements MergeStrategy {
|
|||
|
||||
protected abstract void process(ResponseBuilder rb, ShardRequest sreq) throws Exception;
|
||||
|
||||
static synchronized HttpClient getHttpClient() {
|
||||
private CloseableHttpClient getHttpClient() {
|
||||
ModifiableSolrParams params = new ModifiableSolrParams();
|
||||
params.set(HttpClientUtil.PROP_MAX_CONNECTIONS, 128);
|
||||
params.set(HttpClientUtil.PROP_MAX_CONNECTIONS_PER_HOST, 32);
|
||||
CloseableHttpClient httpClient = HttpClientUtil.createClient(params);
|
||||
|
||||
if(httpClient == null) {
|
||||
ModifiableSolrParams params = new ModifiableSolrParams();
|
||||
params.set(HttpClientUtil.PROP_MAX_CONNECTIONS, 128);
|
||||
params.set(HttpClientUtil.PROP_MAX_CONNECTIONS_PER_HOST, 32);
|
||||
httpClient = HttpClientUtil.createClient(params);
|
||||
return httpClient;
|
||||
} else {
|
||||
return httpClient;
|
||||
}
|
||||
return httpClient;
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
|
||||
|
||||
|
|
|
@ -38,7 +38,6 @@ import org.apache.solr.common.util.DataInputInputStream;
|
|||
import org.apache.solr.common.util.FastInputStream;
|
||||
import org.apache.solr.common.util.JavaBinCodec;
|
||||
import org.apache.solr.common.util.NamedList;
|
||||
import org.apache.solr.handler.RequestHandlerUtils;
|
||||
import org.apache.solr.request.SolrQueryRequest;
|
||||
import org.apache.solr.response.SolrQueryResponse;
|
||||
import org.apache.solr.update.AddUpdateCommand;
|
||||
|
@ -89,13 +88,6 @@ public class JavabinLoader extends ContentStreamLoader {
|
|||
@Override
|
||||
public void update(SolrInputDocument document, UpdateRequest updateRequest, Integer commitWithin, Boolean overwrite) {
|
||||
if (document == null) {
|
||||
// Perhaps commit from the parameters
|
||||
try {
|
||||
RequestHandlerUtils.handleCommit(req, processor, updateRequest.getParams(), false);
|
||||
RequestHandlerUtils.handleRollback(req, processor, updateRequest.getParams(), false);
|
||||
} catch (IOException e) {
|
||||
throw new SolrException(SolrException.ErrorCode.SERVER_ERROR, "ERROR handling commit/rollback");
|
||||
}
|
||||
return;
|
||||
}
|
||||
if (addCmd == null) {
|
||||
|
|
|
@ -53,7 +53,7 @@ class SolrSchema extends AbstractSchema {
|
|||
@Override
|
||||
protected Map<String, Table> getTableMap() {
|
||||
String zk = this.properties.getProperty("zk");
|
||||
try(CloudSolrClient cloudSolrClient = new CloudSolrClient.Builder(Collections.singletonList(zk), Optional.empty()).build()) {
|
||||
try(CloudSolrClient cloudSolrClient = new CloudSolrClient.Builder(Collections.singletonList(zk), Optional.empty()).withSocketTimeout(30000).withConnectionTimeout(15000).build()) {
|
||||
cloudSolrClient.connect();
|
||||
ZkStateReader zkStateReader = cloudSolrClient.getZkStateReader();
|
||||
ClusterState clusterState = zkStateReader.getClusterState();
|
||||
|
@ -77,7 +77,7 @@ class SolrSchema extends AbstractSchema {
|
|||
|
||||
private Map<String, LukeResponse.FieldInfo> getFieldInfo(String collection) {
|
||||
String zk = this.properties.getProperty("zk");
|
||||
try(CloudSolrClient cloudSolrClient = new CloudSolrClient.Builder(Collections.singletonList(zk), Optional.empty()).build()) {
|
||||
try(CloudSolrClient cloudSolrClient = new CloudSolrClient.Builder(Collections.singletonList(zk), Optional.empty()).withSocketTimeout(30000).withConnectionTimeout(15000).build()) {
|
||||
cloudSolrClient.connect();
|
||||
LukeRequest lukeRequest = new LukeRequest();
|
||||
lukeRequest.setNumTerms(0);
|
||||
|
|
|
@ -34,8 +34,6 @@ import java.util.concurrent.Future;
|
|||
import java.util.concurrent.FutureTask;
|
||||
import java.util.concurrent.RunnableFuture;
|
||||
import java.util.concurrent.Semaphore;
|
||||
import java.util.concurrent.SynchronousQueue;
|
||||
import java.util.concurrent.TimeUnit;
|
||||
import java.util.function.Predicate;
|
||||
|
||||
import org.apache.lucene.index.LeafReader;
|
||||
|
@ -66,7 +64,6 @@ import org.apache.solr.common.params.FacetParams;
|
|||
import org.apache.solr.common.params.GroupParams;
|
||||
import org.apache.solr.common.params.RequiredSolrParams;
|
||||
import org.apache.solr.common.params.SolrParams;
|
||||
import org.apache.solr.common.util.ExecutorUtil;
|
||||
import org.apache.solr.common.util.NamedList;
|
||||
import org.apache.solr.common.util.SimpleOrderedMap;
|
||||
import org.apache.solr.common.util.StrUtils;
|
||||
|
@ -93,7 +90,6 @@ import org.apache.solr.search.facet.FacetDebugInfo;
|
|||
import org.apache.solr.search.facet.FacetRequest;
|
||||
import org.apache.solr.search.grouping.GroupingSpecification;
|
||||
import org.apache.solr.util.BoundedTreeSet;
|
||||
import org.apache.solr.util.DefaultSolrThreadFactory;
|
||||
import org.apache.solr.util.RTimer;
|
||||
import org.slf4j.Logger;
|
||||
import org.slf4j.LoggerFactory;
|
||||
|
@ -170,6 +166,7 @@ public class SimpleFacets {
|
|||
this.docsOrig = docs;
|
||||
this.global = params;
|
||||
this.rb = rb;
|
||||
this.facetExecutor = req.getCore().getCoreContainer().getUpdateShardHandler().getUpdateExecutor();
|
||||
}
|
||||
|
||||
public void setFacetDebugInfo(FacetDebugInfo fdebugParent) {
|
||||
|
@ -773,13 +770,7 @@ public class SimpleFacets {
|
|||
}
|
||||
};
|
||||
|
||||
static final Executor facetExecutor = new ExecutorUtil.MDCAwareThreadPoolExecutor(
|
||||
0,
|
||||
Integer.MAX_VALUE,
|
||||
10, TimeUnit.SECONDS, // terminate idle threads after 10 sec
|
||||
new SynchronousQueue<Runnable>() // directly hand off tasks
|
||||
, new DefaultSolrThreadFactory("facetExecutor")
|
||||
);
|
||||
private final Executor facetExecutor;
|
||||
|
||||
/**
|
||||
* Returns a list of value constraints and the associated facet counts
|
||||
|
|
|
@ -55,7 +55,7 @@ public class SolrRequestInfo {
|
|||
SolrRequestInfo prev = threadLocal.get();
|
||||
if (prev != null) {
|
||||
log.error("Previous SolrRequestInfo was not closed! req=" + prev.req.getOriginalParams().toString());
|
||||
log.error("prev == info : {}", prev.req == info.req);
|
||||
log.error("prev == info : {}", prev.req == info.req, new RuntimeException());
|
||||
}
|
||||
assert prev == null;
|
||||
|
||||
|
|
|
@ -60,7 +60,7 @@ public class PKIAuthenticationPlugin extends AuthenticationPlugin implements Htt
|
|||
private final Map<String, PublicKey> keyCache = new ConcurrentHashMap<>();
|
||||
private final PublicKeyHandler publicKeyHandler;
|
||||
private final CoreContainer cores;
|
||||
private final int MAX_VALIDITY = Integer.parseInt(System.getProperty("pkiauth.ttl", "10000"));
|
||||
private final int MAX_VALIDITY = Integer.parseInt(System.getProperty("pkiauth.ttl", "15000"));
|
||||
private final String myNodeName;
|
||||
private final HttpHeaderClientInterceptor interceptor = new HttpHeaderClientInterceptor();
|
||||
private boolean interceptorRegistered = false;
|
||||
|
|
|
@ -885,9 +885,8 @@ public class HttpSolrCall {
|
|||
boolean byCoreName = false;
|
||||
|
||||
if (slices == null) {
|
||||
activeSlices = new ArrayList<>();
|
||||
// look by core name
|
||||
byCoreName = true;
|
||||
activeSlices = new ArrayList<>();
|
||||
getSlicesForCollections(clusterState, activeSlices, true);
|
||||
if (activeSlices.isEmpty()) {
|
||||
getSlicesForCollections(clusterState, activeSlices, false);
|
||||
|
@ -930,7 +929,7 @@ public class HttpSolrCall {
|
|||
if (!activeReplicas || (liveNodes.contains(replica.getNodeName())
|
||||
&& replica.getState() == Replica.State.ACTIVE)) {
|
||||
|
||||
if (byCoreName && !collectionName.equals(replica.getStr(CORE_NAME_PROP))) {
|
||||
if (byCoreName && !origCorename.equals(replica.getStr(CORE_NAME_PROP))) {
|
||||
// if it's by core name, make sure they match
|
||||
continue;
|
||||
}
|
||||
|
|
|
@ -102,6 +102,7 @@ public class SolrDispatchFilter extends BaseSolrFilter {
|
|||
private final String metricTag = Integer.toHexString(hashCode());
|
||||
private SolrMetricManager metricManager;
|
||||
private String registryName;
|
||||
private volatile boolean closeOnDestroy = true;
|
||||
|
||||
/**
|
||||
* Enum to define action that needs to be processed.
|
||||
|
@ -294,26 +295,43 @@ public class SolrDispatchFilter extends BaseSolrFilter {
|
|||
|
||||
@Override
|
||||
public void destroy() {
|
||||
if (closeOnDestroy) {
|
||||
close();
|
||||
}
|
||||
}
|
||||
|
||||
public void close() {
|
||||
CoreContainer cc = cores;
|
||||
cores = null;
|
||||
try {
|
||||
FileCleaningTracker fileCleaningTracker = SolrRequestParsers.fileCleaningTracker;
|
||||
if (fileCleaningTracker != null) {
|
||||
fileCleaningTracker.exitWhenFinished();
|
||||
}
|
||||
} catch (Exception e) {
|
||||
log.warn("Exception closing FileCleaningTracker", e);
|
||||
} finally {
|
||||
SolrRequestParsers.fileCleaningTracker = null;
|
||||
}
|
||||
|
||||
if (metricManager != null) {
|
||||
metricManager.unregisterGauges(registryName, metricTag);
|
||||
}
|
||||
|
||||
if (cores != null) {
|
||||
try {
|
||||
cores.shutdown();
|
||||
FileCleaningTracker fileCleaningTracker = SolrRequestParsers.fileCleaningTracker;
|
||||
if (fileCleaningTracker != null) {
|
||||
fileCleaningTracker.exitWhenFinished();
|
||||
}
|
||||
} catch (NullPointerException e) {
|
||||
// okay
|
||||
} catch (Exception e) {
|
||||
log.warn("Exception closing FileCleaningTracker", e);
|
||||
} finally {
|
||||
cores = null;
|
||||
SolrRequestParsers.fileCleaningTracker = null;
|
||||
}
|
||||
|
||||
if (metricManager != null) {
|
||||
try {
|
||||
metricManager.unregisterGauges(registryName, metricTag);
|
||||
} catch (NullPointerException e) {
|
||||
// okay
|
||||
} catch (Exception e) {
|
||||
log.warn("Exception closing FileCleaningTracker", e);
|
||||
} finally {
|
||||
metricManager = null;
|
||||
}
|
||||
}
|
||||
} finally {
|
||||
if (cc != null) {
|
||||
httpClient = null;
|
||||
cc.shutdown();
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@ -594,4 +612,8 @@ public class SolrDispatchFilter extends BaseSolrFilter {
|
|||
return response;
|
||||
}
|
||||
}
|
||||
|
||||
public void closeOnDestroy(boolean closeOnDestroy) {
|
||||
this.closeOnDestroy = closeOnDestroy;
|
||||
}
|
||||
}
|
||||
|
|
|
@ -59,7 +59,7 @@ public final class CommitTracker implements Runnable {
|
|||
private long tLogFileSizeUpperBound;
|
||||
|
||||
private final ScheduledExecutorService scheduler =
|
||||
Executors.newScheduledThreadPool(1, new DefaultSolrThreadFactory("commitScheduler"));
|
||||
Executors.newScheduledThreadPool(0, new DefaultSolrThreadFactory("commitScheduler"));
|
||||
private ScheduledFuture pending;
|
||||
|
||||
// state
|
||||
|
|
|
@ -814,25 +814,23 @@ public class DirectUpdateHandler2 extends UpdateHandler implements SolrCoreState
|
|||
}
|
||||
|
||||
|
||||
public static boolean commitOnClose = true; // TODO: make this a real config option or move it to TestInjection
|
||||
public static volatile boolean commitOnClose = true; // TODO: make this a real config option or move it to TestInjection
|
||||
|
||||
// IndexWriterCloser interface method - called from solrCoreState.decref(this)
|
||||
@Override
|
||||
public void closeWriter(IndexWriter writer) throws IOException {
|
||||
|
||||
assert TestInjection.injectNonGracefullClose(core.getCoreContainer());
|
||||
|
||||
|
||||
boolean clearRequestInfo = false;
|
||||
solrCoreState.getCommitLock().lock();
|
||||
|
||||
SolrQueryRequest req = new LocalSolrQueryRequest(core, new ModifiableSolrParams());
|
||||
SolrQueryResponse rsp = new SolrQueryResponse();
|
||||
if (SolrRequestInfo.getRequestInfo() == null) {
|
||||
clearRequestInfo = true;
|
||||
SolrRequestInfo.setRequestInfo(new SolrRequestInfo(req, rsp)); // important for debugging
|
||||
}
|
||||
try {
|
||||
SolrQueryRequest req = new LocalSolrQueryRequest(core, new ModifiableSolrParams());
|
||||
SolrQueryResponse rsp = new SolrQueryResponse();
|
||||
if (SolrRequestInfo.getRequestInfo() == null) {
|
||||
clearRequestInfo = true;
|
||||
SolrRequestInfo.setRequestInfo(new SolrRequestInfo(req, rsp)); // important for debugging
|
||||
}
|
||||
|
||||
|
||||
if (!commitOnClose) {
|
||||
if (writer != null) {
|
||||
writer.rollback();
|
||||
|
@ -845,58 +843,65 @@ public class DirectUpdateHandler2 extends UpdateHandler implements SolrCoreState
|
|||
return;
|
||||
}
|
||||
|
||||
// do a commit before we quit?
|
||||
boolean tryToCommit = writer != null && ulog != null && ulog.hasUncommittedChanges() && ulog.getState() == UpdateLog.State.ACTIVE;
|
||||
// do a commit before we quit?
|
||||
boolean tryToCommit = writer != null && ulog != null && ulog.hasUncommittedChanges()
|
||||
&& ulog.getState() == UpdateLog.State.ACTIVE;
|
||||
|
||||
// be tactical with this lock! closing the updatelog can deadlock when it tries to commit
|
||||
solrCoreState.getCommitLock().lock();
|
||||
try {
|
||||
if (tryToCommit) {
|
||||
log.info("Committing on IndexWriter close.");
|
||||
CommitUpdateCommand cmd = new CommitUpdateCommand(req, false);
|
||||
cmd.openSearcher = false;
|
||||
cmd.waitSearcher = false;
|
||||
cmd.softCommit = false;
|
||||
try {
|
||||
if (tryToCommit) {
|
||||
log.info("Committing on IndexWriter close.");
|
||||
CommitUpdateCommand cmd = new CommitUpdateCommand(req, false);
|
||||
cmd.openSearcher = false;
|
||||
cmd.waitSearcher = false;
|
||||
cmd.softCommit = false;
|
||||
|
||||
// TODO: keep other commit callbacks from being called?
|
||||
// this.commit(cmd); // too many test failures using this method... is it because of callbacks?
|
||||
// TODO: keep other commit callbacks from being called?
|
||||
// this.commit(cmd); // too many test failures using this method... is it because of callbacks?
|
||||
|
||||
synchronized (solrCoreState.getUpdateLock()) {
|
||||
ulog.preCommit(cmd);
|
||||
synchronized (solrCoreState.getUpdateLock()) {
|
||||
ulog.preCommit(cmd);
|
||||
}
|
||||
|
||||
// todo: refactor this shared code (or figure out why a real CommitUpdateCommand can't be used)
|
||||
SolrIndexWriter.setCommitData(writer, cmd.getVersion());
|
||||
writer.commit();
|
||||
|
||||
synchronized (solrCoreState.getUpdateLock()) {
|
||||
ulog.postCommit(cmd);
|
||||
}
|
||||
}
|
||||
|
||||
// todo: refactor this shared code (or figure out why a real CommitUpdateCommand can't be used)
|
||||
SolrIndexWriter.setCommitData(writer, cmd.getVersion());
|
||||
writer.commit();
|
||||
|
||||
synchronized (solrCoreState.getUpdateLock()) {
|
||||
ulog.postCommit(cmd);
|
||||
} catch (Throwable th) {
|
||||
log.error("Error in final commit", th);
|
||||
if (th instanceof OutOfMemoryError) {
|
||||
throw (OutOfMemoryError) th;
|
||||
}
|
||||
}
|
||||
} catch (Throwable th) {
|
||||
log.error("Error in final commit", th);
|
||||
if (th instanceof OutOfMemoryError) {
|
||||
throw (OutOfMemoryError) th;
|
||||
}
|
||||
}
|
||||
|
||||
// we went through the normal process to commit, so we don't have to artificially
|
||||
// cap any ulog files.
|
||||
try {
|
||||
if (ulog != null) ulog.close(false);
|
||||
} catch (Throwable th) {
|
||||
log.error("Error closing log files", th);
|
||||
if (th instanceof OutOfMemoryError) {
|
||||
throw (OutOfMemoryError) th;
|
||||
}
|
||||
}
|
||||
} finally {
|
||||
solrCoreState.getCommitLock().unlock();
|
||||
|
||||
if (writer != null) {
|
||||
writer.close();
|
||||
}
|
||||
|
||||
} finally {
|
||||
solrCoreState.getCommitLock().unlock();
|
||||
if (clearRequestInfo) SolrRequestInfo.clearRequestInfo();
|
||||
}
|
||||
// we went through the normal process to commit, so we don't have to artificially
|
||||
// cap any ulog files.
|
||||
try {
|
||||
if (ulog != null) ulog.close(false);
|
||||
} catch (Throwable th) {
|
||||
log.error("Error closing log files", th);
|
||||
if (th instanceof OutOfMemoryError) {
|
||||
throw (OutOfMemoryError) th;
|
||||
}
|
||||
}
|
||||
|
||||
if (writer != null) {
|
||||
writer.close();
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
@Override
|
||||
|
|
|
@ -26,6 +26,7 @@ import org.apache.lucene.index.IndexWriter;
|
|||
import org.apache.lucene.search.Sort;
|
||||
import org.apache.solr.cloud.ActionThrottle;
|
||||
import org.apache.solr.cloud.RecoveryStrategy;
|
||||
import org.apache.solr.common.AlreadyClosedException;
|
||||
import org.apache.solr.core.CoreContainer;
|
||||
import org.apache.solr.core.CoreDescriptor;
|
||||
import org.apache.solr.core.DirectoryFactory;
|
||||
|
@ -172,7 +173,12 @@ public abstract class SolrCoreState {
|
|||
|
||||
public abstract void setLastReplicateIndexSuccess(boolean success);
|
||||
|
||||
public static class CoreIsClosedException extends IllegalStateException {
|
||||
public static class CoreIsClosedException extends AlreadyClosedException {
|
||||
|
||||
public CoreIsClosedException() {
|
||||
super();
|
||||
}
|
||||
|
||||
public CoreIsClosedException(String s) {
|
||||
super(s);
|
||||
}
|
||||
|
|
|
@ -183,7 +183,7 @@ public class UpdateLog implements PluginInfoInitialized, SolrMetricProducer {
|
|||
}
|
||||
|
||||
long id = -1;
|
||||
protected State state = State.ACTIVE;
|
||||
protected volatile State state = State.ACTIVE;
|
||||
|
||||
protected TransactionLog bufferTlog;
|
||||
protected TransactionLog tlog;
|
||||
|
@ -1351,8 +1351,9 @@ public class UpdateLog implements PluginInfoInitialized, SolrMetricProducer {
|
|||
}
|
||||
|
||||
public void close(boolean committed, boolean deleteOnClose) {
|
||||
recoveryExecutor.shutdown(); // no new tasks
|
||||
|
||||
synchronized (this) {
|
||||
recoveryExecutor.shutdown(); // no new tasks
|
||||
|
||||
// Don't delete the old tlogs, we want to be able to replay from them and retrieve old versions
|
||||
|
||||
|
@ -1373,11 +1374,12 @@ public class UpdateLog implements PluginInfoInitialized, SolrMetricProducer {
|
|||
bufferTlog.forceClose();
|
||||
}
|
||||
|
||||
try {
|
||||
ExecutorUtil.shutdownAndAwaitTermination(recoveryExecutor);
|
||||
} catch (Exception e) {
|
||||
SolrException.log(log, e);
|
||||
}
|
||||
}
|
||||
|
||||
try {
|
||||
ExecutorUtil.shutdownAndAwaitTermination(recoveryExecutor);
|
||||
} catch (Exception e) {
|
||||
SolrException.log(log, e);
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
@ -66,10 +66,14 @@ public class UpdateShardHandler implements SolrMetricProducer, SolrInfoBean {
|
|||
|
||||
private final CloseableHttpClient updateOnlyClient;
|
||||
|
||||
private final CloseableHttpClient recoveryOnlyClient;
|
||||
|
||||
private final CloseableHttpClient defaultClient;
|
||||
|
||||
private final InstrumentedPoolingHttpClientConnectionManager updateOnlyConnectionManager;
|
||||
|
||||
private final InstrumentedPoolingHttpClientConnectionManager recoveryOnlyConnectionManager;
|
||||
|
||||
private final InstrumentedPoolingHttpClientConnectionManager defaultConnectionManager;
|
||||
|
||||
private final InstrumentedHttpRequestExecutor httpRequestExecutor;
|
||||
|
@ -83,10 +87,13 @@ public class UpdateShardHandler implements SolrMetricProducer, SolrInfoBean {
|
|||
|
||||
public UpdateShardHandler(UpdateShardHandlerConfig cfg) {
|
||||
updateOnlyConnectionManager = new InstrumentedPoolingHttpClientConnectionManager(HttpClientUtil.getSchemaRegisteryProvider().getSchemaRegistry());
|
||||
recoveryOnlyConnectionManager = new InstrumentedPoolingHttpClientConnectionManager(HttpClientUtil.getSchemaRegisteryProvider().getSchemaRegistry());
|
||||
defaultConnectionManager = new InstrumentedPoolingHttpClientConnectionManager(HttpClientUtil.getSchemaRegisteryProvider().getSchemaRegistry());
|
||||
if (cfg != null ) {
|
||||
updateOnlyConnectionManager.setMaxTotal(cfg.getMaxUpdateConnections());
|
||||
updateOnlyConnectionManager.setDefaultMaxPerRoute(cfg.getMaxUpdateConnectionsPerHost());
|
||||
recoveryOnlyConnectionManager.setMaxTotal(cfg.getMaxUpdateConnections());
|
||||
recoveryOnlyConnectionManager.setDefaultMaxPerRoute(cfg.getMaxUpdateConnectionsPerHost());
|
||||
defaultConnectionManager.setMaxTotal(cfg.getMaxUpdateConnections());
|
||||
defaultConnectionManager.setDefaultMaxPerRoute(cfg.getMaxUpdateConnectionsPerHost());
|
||||
}
|
||||
|
@ -110,6 +117,7 @@ public class UpdateShardHandler implements SolrMetricProducer, SolrInfoBean {
|
|||
|
||||
httpRequestExecutor = new InstrumentedHttpRequestExecutor(metricNameStrategy);
|
||||
updateOnlyClient = HttpClientUtil.createClient(clientParams, updateOnlyConnectionManager, false, httpRequestExecutor);
|
||||
recoveryOnlyClient = HttpClientUtil.createClient(clientParams, recoveryOnlyConnectionManager, false, httpRequestExecutor);
|
||||
defaultClient = HttpClientUtil.createClient(clientParams, defaultConnectionManager, false, httpRequestExecutor);
|
||||
|
||||
// following is done only for logging complete configuration.
|
||||
|
@ -178,6 +186,11 @@ public class UpdateShardHandler implements SolrMetricProducer, SolrInfoBean {
|
|||
return updateOnlyClient;
|
||||
}
|
||||
|
||||
// don't introduce a bug, this client is for recovery ops only!
|
||||
public HttpClient getRecoveryOnlyHttpClient() {
|
||||
return recoveryOnlyClient;
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* This method returns an executor that is meant for non search related tasks.
|
||||
|
@ -191,6 +204,10 @@ public class UpdateShardHandler implements SolrMetricProducer, SolrInfoBean {
|
|||
public PoolingHttpClientConnectionManager getDefaultConnectionManager() {
|
||||
return defaultConnectionManager;
|
||||
}
|
||||
|
||||
public PoolingHttpClientConnectionManager getRecoveryOnlyConnectionManager() {
|
||||
return recoveryOnlyConnectionManager;
|
||||
}
|
||||
|
||||
/**
|
||||
*
|
||||
|
@ -206,12 +223,14 @@ public class UpdateShardHandler implements SolrMetricProducer, SolrInfoBean {
|
|||
ExecutorUtil.shutdownAndAwaitTermination(updateExecutor);
|
||||
ExecutorUtil.shutdownAndAwaitTermination(recoveryExecutor);
|
||||
} catch (Exception e) {
|
||||
SolrException.log(log, e);
|
||||
throw new RuntimeException(e);
|
||||
} finally {
|
||||
HttpClientUtil.close(updateOnlyClient);
|
||||
HttpClientUtil.close(recoveryOnlyClient);
|
||||
HttpClientUtil.close(defaultClient);
|
||||
updateOnlyConnectionManager.close();
|
||||
defaultConnectionManager.close();
|
||||
recoveryOnlyConnectionManager.close();
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
@ -16,6 +16,9 @@
|
|||
*/
|
||||
package org.apache.solr.update.processor;
|
||||
|
||||
import static org.apache.solr.common.params.CommonParams.DISTRIB;
|
||||
import static org.apache.solr.update.processor.DistributingUpdateProcessorFactory.DISTRIB_UPDATE_PARAM;
|
||||
|
||||
import java.io.IOException;
|
||||
import java.lang.invoke.MethodHandles;
|
||||
import java.util.ArrayList;
|
||||
|
@ -28,6 +31,9 @@ import java.util.List;
|
|||
import java.util.Map;
|
||||
import java.util.Map.Entry;
|
||||
import java.util.Set;
|
||||
import java.util.concurrent.CompletionService;
|
||||
import java.util.concurrent.ExecutorCompletionService;
|
||||
import java.util.concurrent.Future;
|
||||
import java.util.concurrent.TimeUnit;
|
||||
import java.util.concurrent.locks.ReentrantLock;
|
||||
|
||||
|
@ -37,7 +43,6 @@ import org.apache.lucene.util.CharsRefBuilder;
|
|||
import org.apache.solr.client.solrj.SolrRequest;
|
||||
import org.apache.solr.client.solrj.SolrRequest.METHOD;
|
||||
import org.apache.solr.client.solrj.SolrServerException;
|
||||
import org.apache.solr.client.solrj.cloud.DistributedQueue;
|
||||
import org.apache.solr.client.solrj.impl.HttpSolrClient;
|
||||
import org.apache.solr.client.solrj.request.GenericSolrRequest;
|
||||
import org.apache.solr.client.solrj.request.UpdateRequest;
|
||||
|
@ -97,9 +102,6 @@ import org.apache.zookeeper.KeeperException;
|
|||
import org.slf4j.Logger;
|
||||
import org.slf4j.LoggerFactory;
|
||||
|
||||
import static org.apache.solr.common.params.CommonParams.DISTRIB;
|
||||
import static org.apache.solr.update.processor.DistributingUpdateProcessorFactory.DISTRIB_UPDATE_PARAM;
|
||||
|
||||
// NOT mt-safe... create a new processor for each add thread
|
||||
// TODO: we really should not wait for distrib after local? unless a certain replication factor is asked for
|
||||
public class DistributedUpdateProcessor extends UpdateRequestProcessor {
|
||||
|
@ -116,12 +118,12 @@ public class DistributedUpdateProcessor extends UpdateRequestProcessor {
|
|||
/**
|
||||
* Request forwarded to a leader of a different shard will be retried up to this amount of times by default
|
||||
*/
|
||||
static final int MAX_RETRIES_ON_FORWARD_DEAULT = 25;
|
||||
static final int MAX_RETRIES_ON_FORWARD_DEAULT = Integer.getInteger("solr.retries.on.forward", 25);
|
||||
|
||||
/**
|
||||
* Requests from leader to it's followers will be retried this amount of times by default
|
||||
*/
|
||||
static final int MAX_RETRIES_TO_FOLLOWERS_DEFAULT = 3;
|
||||
static final int MAX_RETRIES_TO_FOLLOWERS_DEFAULT = Integer.getInteger("solr.retries.to.followers", 3);
|
||||
|
||||
/**
|
||||
* Values this processor supports for the <code>DISTRIB_UPDATE_PARAM</code>.
|
||||
|
@ -433,6 +435,46 @@ public class DistributedUpdateProcessor extends UpdateRequestProcessor {
|
|||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
private List<Node> getReplicaNodesForLeader(String shardId, Replica leaderReplica) {
|
||||
ClusterState clusterState = zkController.getZkStateReader().getClusterState();
|
||||
String leaderCoreNodeName = leaderReplica.getName();
|
||||
List<Replica> replicas = clusterState.getCollection(collection)
|
||||
.getSlice(shardId)
|
||||
.getReplicas(EnumSet.of(Replica.Type.NRT, Replica.Type.TLOG));
|
||||
replicas.removeIf((replica) -> replica.getName().equals(leaderCoreNodeName));
|
||||
if (replicas.isEmpty()) {
|
||||
return null;
|
||||
}
|
||||
|
||||
// check for test param that lets us miss replicas
|
||||
String[] skipList = req.getParams().getParams(TEST_DISTRIB_SKIP_SERVERS);
|
||||
Set<String> skipListSet = null;
|
||||
if (skipList != null) {
|
||||
skipListSet = new HashSet<>(skipList.length);
|
||||
skipListSet.addAll(Arrays.asList(skipList));
|
||||
log.info("test.distrib.skip.servers was found and contains:" + skipListSet);
|
||||
}
|
||||
|
||||
List<Node> nodes = new ArrayList<>(replicas.size());
|
||||
skippedCoreNodeNames = new HashSet<>();
|
||||
ZkShardTerms zkShardTerms = zkController.getShardTerms(collection, shardId);
|
||||
for (Replica replica : replicas) {
|
||||
String coreNodeName = replica.getName();
|
||||
if (skipList != null && skipListSet.contains(replica.getCoreUrl())) {
|
||||
log.info("check url:" + replica.getCoreUrl() + " against:" + skipListSet + " result:true");
|
||||
} else if (zkShardTerms.registered(coreNodeName) && zkShardTerms.skipSendingUpdatesTo(coreNodeName)) {
|
||||
log.debug("skip url:{} cause its term is less than leader", replica.getCoreUrl());
|
||||
skippedCoreNodeNames.add(replica.getName());
|
||||
} else if (!clusterState.getLiveNodes().contains(replica.getNodeName())
|
||||
|| replica.getState() == Replica.State.DOWN) {
|
||||
skippedCoreNodeNames.add(replica.getName());
|
||||
} else {
|
||||
nodes.add(new StdNode(new ZkCoreNodeProps(replica), collection, shardId));
|
||||
}
|
||||
}
|
||||
return nodes;
|
||||
}
|
||||
|
||||
/** For {@link org.apache.solr.common.params.CollectionParams.CollectionAction#SPLITSHARD} */
|
||||
private List<Node> getSubShardLeaders(DocCollection coll, String shardId, String docId, SolrInputDocument doc) {
|
||||
|
@ -521,8 +563,7 @@ public class DistributedUpdateProcessor extends UpdateRequestProcessor {
|
|||
ZkStateReader.SHARD_ID_PROP, myShardId,
|
||||
"routeKey", routeKey + "!");
|
||||
SolrZkClient zkClient = zkController.getZkClient();
|
||||
DistributedQueue queue = Overseer.getStateUpdateQueue(zkClient);
|
||||
queue.offer(Utils.toJSON(map));
|
||||
zkController.getOverseer().offerStateUpdate(Utils.toJSON(map));
|
||||
} catch (KeeperException e) {
|
||||
log.warn("Exception while removing routing rule for route key: " + routeKey, e);
|
||||
} catch (Exception e) {
|
||||
|
@ -1865,38 +1906,42 @@ public class DistributedUpdateProcessor extends UpdateRequestProcessor {
|
|||
|
||||
updateCommand = cmd;
|
||||
List<Node> nodes = null;
|
||||
boolean singleLeader = false;
|
||||
Replica leaderReplica = null;
|
||||
if (zkEnabled) {
|
||||
zkCheck();
|
||||
try {
|
||||
leaderReplica = zkController.getZkStateReader().getLeaderRetry(collection, cloudDesc.getShardId());
|
||||
} catch (InterruptedException e) {
|
||||
Thread.interrupted();
|
||||
throw new SolrException(ErrorCode.SERVICE_UNAVAILABLE, "Exception finding leader for shard " + cloudDesc.getShardId(), e);
|
||||
}
|
||||
isLeader = leaderReplica.getName().equals(cloudDesc.getCoreNodeName());
|
||||
|
||||
nodes = getCollectionUrls(collection, EnumSet.of(Replica.Type.TLOG,Replica.Type.NRT));
|
||||
nodes = getCollectionUrls(collection, EnumSet.of(Replica.Type.TLOG,Replica.Type.NRT), true);
|
||||
if (nodes == null) {
|
||||
// This could happen if there are only pull replicas
|
||||
throw new SolrException(SolrException.ErrorCode.SERVER_ERROR,
|
||||
"Unable to distribute commit operation. No replicas available of types " + Replica.Type.TLOG + " or " + Replica.Type.NRT);
|
||||
}
|
||||
if (isLeader && nodes.size() == 1 && replicaType != Replica.Type.PULL) {
|
||||
singleLeader = true;
|
||||
}
|
||||
|
||||
nodes.removeIf((node) -> node.getNodeProps().getNodeName().equals(zkController.getNodeName())
|
||||
&& node.getNodeProps().getCoreName().equals(req.getCore().getName()));
|
||||
}
|
||||
|
||||
if (!zkEnabled || req.getParams().getBool(COMMIT_END_POINT, false) || singleLeader) {
|
||||
CompletionService<Exception> completionService = new ExecutorCompletionService<>(req.getCore().getCoreContainer().getUpdateShardHandler().getUpdateExecutor());
|
||||
Set<Future<Exception>> pending = new HashSet<>();
|
||||
if (!zkEnabled || (!isLeader && req.getParams().get(COMMIT_END_POINT, "").equals("replicas"))) {
|
||||
if (replicaType == Replica.Type.TLOG) {
|
||||
try {
|
||||
Replica leaderReplica = zkController.getZkStateReader().getLeaderRetry(
|
||||
collection, cloudDesc.getShardId());
|
||||
isLeader = leaderReplica.getName().equals(cloudDesc.getCoreNodeName());
|
||||
if (isLeader) {
|
||||
long commitVersion = vinfo.getNewClock();
|
||||
cmd.setVersion(commitVersion);
|
||||
doLocalCommit(cmd);
|
||||
} else {
|
||||
assert TestInjection.waitForInSyncWithLeader(req.getCore(),
|
||||
zkController, collection, cloudDesc.getShardId()): "Core " + req.getCore() + " not in sync with leader";
|
||||
}
|
||||
} catch (InterruptedException e) {
|
||||
throw new SolrException(ErrorCode.SERVICE_UNAVAILABLE, "Exception finding leader for shard " + cloudDesc.getShardId(), e);
|
||||
|
||||
if (isLeader) {
|
||||
long commitVersion = vinfo.getNewClock();
|
||||
cmd.setVersion(commitVersion);
|
||||
doLocalCommit(cmd);
|
||||
} else {
|
||||
assert TestInjection.waitForInSyncWithLeader(req.getCore(),
|
||||
zkController, collection, cloudDesc.getShardId()) : "Core " + req.getCore() + " not in sync with leader";
|
||||
}
|
||||
|
||||
} else if (replicaType == Replica.Type.PULL) {
|
||||
log.warn("Commit not supported on replicas of type " + Replica.Type.PULL);
|
||||
} else {
|
||||
|
@ -1905,21 +1950,51 @@ public class DistributedUpdateProcessor extends UpdateRequestProcessor {
|
|||
long commitVersion = vinfo.getNewClock();
|
||||
cmd.setVersion(commitVersion);
|
||||
}
|
||||
|
||||
doLocalCommit(cmd);
|
||||
}
|
||||
} else {
|
||||
ModifiableSolrParams params = new ModifiableSolrParams(filterParams(req.getParams()));
|
||||
if (!req.getParams().getBool(COMMIT_END_POINT, false)) {
|
||||
params.set(COMMIT_END_POINT, true);
|
||||
|
||||
List<Node> useNodes = null;
|
||||
if (req.getParams().get(COMMIT_END_POINT) == null) {
|
||||
useNodes = nodes;
|
||||
params.set(DISTRIB_UPDATE_PARAM, DistribPhase.TOLEADER.toString());
|
||||
params.set(COMMIT_END_POINT, "leaders");
|
||||
if (useNodes != null) {
|
||||
params.set(DISTRIB_FROM, ZkCoreNodeProps.getCoreUrl(
|
||||
zkController.getBaseUrl(), req.getCore().getName()));
|
||||
cmdDistrib.distribCommit(cmd, useNodes, params);
|
||||
cmdDistrib.blockAndDoRetries();
|
||||
}
|
||||
}
|
||||
|
||||
if (isLeader) {
|
||||
params.set(DISTRIB_UPDATE_PARAM, DistribPhase.FROMLEADER.toString());
|
||||
params.set(DISTRIB_FROM, ZkCoreNodeProps.getCoreUrl(
|
||||
zkController.getBaseUrl(), req.getCore().getName()));
|
||||
if (nodes != null) {
|
||||
cmdDistrib.distribCommit(cmd, nodes, params);
|
||||
|
||||
params.set(COMMIT_END_POINT, "replicas");
|
||||
|
||||
useNodes = getReplicaNodesForLeader(cloudDesc.getShardId(), leaderReplica);
|
||||
|
||||
if (useNodes != null) {
|
||||
params.set(DISTRIB_FROM, ZkCoreNodeProps.getCoreUrl(
|
||||
zkController.getBaseUrl(), req.getCore().getName()));
|
||||
|
||||
cmdDistrib.distribCommit(cmd, useNodes, params);
|
||||
}
|
||||
// NRT replicas will always commit
|
||||
if (vinfo != null) {
|
||||
long commitVersion = vinfo.getNewClock();
|
||||
cmd.setVersion(commitVersion);
|
||||
}
|
||||
|
||||
doLocalCommit(cmd);
|
||||
if (useNodes != null) {
|
||||
cmdDistrib.blockAndDoRetries();
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
private void doLocalCommit(CommitUpdateCommand cmd) throws IOException {
|
||||
|
@ -1951,7 +2026,7 @@ public class DistributedUpdateProcessor extends UpdateRequestProcessor {
|
|||
if (next != null && nodes == null) next.finish();
|
||||
}
|
||||
|
||||
private List<Node> getCollectionUrls(String collection, EnumSet<Replica.Type> types) {
|
||||
private List<Node> getCollectionUrls(String collection, EnumSet<Replica.Type> types, boolean onlyLeaders) {
|
||||
ClusterState clusterState = zkController.getClusterState();
|
||||
final DocCollection docCollection = clusterState.getCollectionOrNull(collection);
|
||||
if (collection == null || docCollection.getSlicesMap() == null) {
|
||||
|
@ -1962,7 +2037,14 @@ public class DistributedUpdateProcessor extends UpdateRequestProcessor {
|
|||
final List<Node> urls = new ArrayList<>(slices.size());
|
||||
for (Map.Entry<String,Slice> sliceEntry : slices.entrySet()) {
|
||||
Slice replicas = slices.get(sliceEntry.getKey());
|
||||
|
||||
if (onlyLeaders) {
|
||||
Replica replica = docCollection.getLeader(replicas.getName());
|
||||
if (replica != null) {
|
||||
ZkCoreNodeProps nodeProps = new ZkCoreNodeProps(replica);
|
||||
urls.add(new StdNode(nodeProps, collection, replicas.getName()));
|
||||
}
|
||||
continue;
|
||||
}
|
||||
Map<String,Replica> shardMap = replicas.getReplicasMap();
|
||||
|
||||
for (Entry<String,Replica> entry : shardMap.entrySet()) {
|
||||
|
|
|
@ -2381,7 +2381,7 @@ public class SolrCLI {
|
|||
|
||||
protected void deleteCollection(CommandLine cli) throws Exception {
|
||||
String zkHost = getZkHost(cli);
|
||||
try (CloudSolrClient cloudSolrClient = new CloudSolrClient.Builder(Collections.singletonList(zkHost), Optional.empty()).build()) {
|
||||
try (CloudSolrClient cloudSolrClient = new CloudSolrClient.Builder(Collections.singletonList(zkHost), Optional.empty()).withSocketTimeout(30000).withConnectionTimeout(15000).build()) {
|
||||
echoIfVerbose("Connecting to ZooKeeper at " + zkHost, cli);
|
||||
cloudSolrClient.connect();
|
||||
deleteCollection(cloudSolrClient, cli);
|
||||
|
|
|
@ -16,6 +16,9 @@
|
|||
*/
|
||||
package org.apache.solr.util;
|
||||
|
||||
import static org.apache.solr.handler.ReplicationHandler.CMD_DETAILS;
|
||||
import static org.apache.solr.handler.ReplicationHandler.COMMAND;
|
||||
|
||||
import java.lang.invoke.MethodHandles;
|
||||
import java.lang.reflect.Method;
|
||||
import java.util.Collections;
|
||||
|
@ -24,6 +27,7 @@ import java.util.Random;
|
|||
import java.util.Set;
|
||||
import java.util.Timer;
|
||||
import java.util.TimerTask;
|
||||
import java.util.concurrent.ConcurrentHashMap;
|
||||
import java.util.concurrent.CountDownLatch;
|
||||
import java.util.concurrent.TimeUnit;
|
||||
import java.util.concurrent.atomic.AtomicInteger;
|
||||
|
@ -50,9 +54,6 @@ import org.apache.solr.update.SolrIndexWriter;
|
|||
import org.slf4j.Logger;
|
||||
import org.slf4j.LoggerFactory;
|
||||
|
||||
import static org.apache.solr.handler.ReplicationHandler.CMD_DETAILS;
|
||||
import static org.apache.solr.handler.ReplicationHandler.COMMAND;
|
||||
|
||||
|
||||
/**
|
||||
* Allows random faults to be injected in running code during test runs.
|
||||
|
@ -116,43 +117,50 @@ public class TestInjection {
|
|||
}
|
||||
}
|
||||
|
||||
public static String nonGracefullClose = null;
|
||||
public volatile static String nonGracefullClose = null;
|
||||
|
||||
public static String failReplicaRequests = null;
|
||||
public volatile static String failReplicaRequests = null;
|
||||
|
||||
public static String failUpdateRequests = null;
|
||||
public volatile static String failUpdateRequests = null;
|
||||
|
||||
public static String nonExistentCoreExceptionAfterUnload = null;
|
||||
public volatile static String nonExistentCoreExceptionAfterUnload = null;
|
||||
|
||||
public static String updateLogReplayRandomPause = null;
|
||||
public volatile static String updateLogReplayRandomPause = null;
|
||||
|
||||
public static String updateRandomPause = null;
|
||||
public volatile static String updateRandomPause = null;
|
||||
|
||||
public static String prepRecoveryOpPauseForever = null;
|
||||
public volatile static String prepRecoveryOpPauseForever = null;
|
||||
|
||||
public static String randomDelayInCoreCreation = null;
|
||||
public volatile static String randomDelayInCoreCreation = null;
|
||||
|
||||
public static int randomDelayMaxInCoreCreationInSec = 10;
|
||||
public volatile static int randomDelayMaxInCoreCreationInSec = 10;
|
||||
|
||||
public static String splitFailureBeforeReplicaCreation = null;
|
||||
public volatile static String splitFailureBeforeReplicaCreation = null;
|
||||
|
||||
public static String splitFailureAfterReplicaCreation = null;
|
||||
public volatile static String splitFailureAfterReplicaCreation = null;
|
||||
|
||||
public static CountDownLatch splitLatch = null;
|
||||
public volatile static CountDownLatch splitLatch = null;
|
||||
|
||||
public static String waitForReplicasInSync = "true:60";
|
||||
public volatile static String waitForReplicasInSync = "true:60";
|
||||
|
||||
public static String failIndexFingerprintRequests = null;
|
||||
public volatile static String failIndexFingerprintRequests = null;
|
||||
|
||||
public static String wrongIndexFingerprint = null;
|
||||
public volatile static String wrongIndexFingerprint = null;
|
||||
|
||||
private static Set<Timer> timers = Collections.synchronizedSet(new HashSet<Timer>());
|
||||
private volatile static Set<Timer> timers = Collections.synchronizedSet(new HashSet<Timer>());
|
||||
|
||||
private static AtomicInteger countPrepRecoveryOpPauseForever = new AtomicInteger(0);
|
||||
private volatile static AtomicInteger countPrepRecoveryOpPauseForever = new AtomicInteger(0);
|
||||
|
||||
public static Integer delayBeforeSlaveCommitRefresh=null;
|
||||
public volatile static Integer delayBeforeSlaveCommitRefresh=null;
|
||||
|
||||
public static boolean uifOutOfMemoryError = false;
|
||||
public volatile static boolean uifOutOfMemoryError = false;
|
||||
|
||||
private volatile static CountDownLatch notifyPauseForeverDone = new CountDownLatch(1);
|
||||
|
||||
public static void notifyPauseForeverDone() {
|
||||
notifyPauseForeverDone.countDown();
|
||||
notifyPauseForeverDone = new CountDownLatch(1);
|
||||
}
|
||||
|
||||
public static void reset() {
|
||||
nonGracefullClose = null;
|
||||
|
@ -172,7 +180,8 @@ public class TestInjection {
|
|||
wrongIndexFingerprint = null;
|
||||
delayBeforeSlaveCommitRefresh = null;
|
||||
uifOutOfMemoryError = false;
|
||||
|
||||
notifyPauseForeverDone();
|
||||
newSearcherHooks.clear();
|
||||
for (Timer timer : timers) {
|
||||
timer.cancel();
|
||||
}
|
||||
|
@ -371,19 +380,20 @@ public class TestInjection {
|
|||
}
|
||||
|
||||
public static boolean injectPrepRecoveryOpPauseForever() {
|
||||
if (prepRecoveryOpPauseForever != null) {
|
||||
String val = prepRecoveryOpPauseForever;
|
||||
if (val != null) {
|
||||
Random rand = random();
|
||||
if (null == rand) return true;
|
||||
|
||||
Pair<Boolean,Integer> pair = parseValue(prepRecoveryOpPauseForever);
|
||||
Pair<Boolean,Integer> pair = parseValue(val);
|
||||
boolean enabled = pair.first();
|
||||
int chanceIn100 = pair.second();
|
||||
// Prevent for continuous pause forever
|
||||
if (enabled && rand.nextInt(100) >= (100 - chanceIn100) && countPrepRecoveryOpPauseForever.get() < 1) {
|
||||
countPrepRecoveryOpPauseForever.incrementAndGet();
|
||||
log.info("inject pause forever for prep recovery op");
|
||||
|
||||
try {
|
||||
Thread.sleep(Integer.MAX_VALUE);
|
||||
notifyPauseForeverDone.await();
|
||||
} catch (InterruptedException e) {
|
||||
Thread.currentThread().interrupt();
|
||||
}
|
||||
|
@ -481,9 +491,12 @@ public class TestInjection {
|
|||
return false;
|
||||
}
|
||||
|
||||
private static Pair<Boolean,Integer> parseValue(String raw) {
|
||||
private static Pair<Boolean,Integer> parseValue(final String raw) {
|
||||
if (raw == null) return new Pair<>(false, 0);
|
||||
Matcher m = ENABLED_PERCENT.matcher(raw);
|
||||
if (!m.matches()) throw new RuntimeException("No match, probably bad syntax: " + raw);
|
||||
if (!m.matches()) {
|
||||
throw new RuntimeException("No match, probably bad syntax: " + raw);
|
||||
}
|
||||
String val = m.group(1);
|
||||
String percent = "100";
|
||||
if (m.groupCount() == 2) {
|
||||
|
@ -511,4 +524,24 @@ public class TestInjection {
|
|||
return true;
|
||||
}
|
||||
|
||||
static Set<Hook> newSearcherHooks = ConcurrentHashMap.newKeySet();
|
||||
|
||||
public interface Hook {
|
||||
public void newSearcher(String collectionName);
|
||||
public void waitForSearcher(String collection, int cnt, int timeoutms, boolean failOnTimeout) throws InterruptedException;
|
||||
}
|
||||
|
||||
public static boolean newSearcherHook(Hook hook) {
|
||||
newSearcherHooks.add(hook);
|
||||
return true;
|
||||
}
|
||||
|
||||
public static boolean injectSearcherHooks(String collectionName) {
|
||||
for (Hook hook : newSearcherHooks) {
|
||||
hook.newSearcher(collectionName);
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
|
||||
}
|
||||
|
|
|
@ -61,8 +61,13 @@ public class TimeOut {
|
|||
public void waitFor(String messageOnTimeOut, Supplier<Boolean> supplier)
|
||||
throws InterruptedException, TimeoutException {
|
||||
while (!supplier.get() && !hasTimedOut()) {
|
||||
Thread.sleep(500);
|
||||
Thread.sleep(250);
|
||||
}
|
||||
if (hasTimedOut()) throw new TimeoutException(messageOnTimeOut);
|
||||
}
|
||||
|
||||
@Override
|
||||
public String toString() {
|
||||
return "TimeOut [timeoutAt=" + timeoutAt + ", startTime=" + startTime + ", timeSource=" + timeSource + "]";
|
||||
}
|
||||
}
|
||||
|
|
|
@ -35,6 +35,7 @@
|
|||
<int name="autoReplicaFailoverWaitAfterExpiration">${autoReplicaFailoverWaitAfterExpiration:10000}</int>
|
||||
<int name="autoReplicaFailoverWorkLoopDelay">${autoReplicaFailoverWorkLoopDelay:10000}</int>
|
||||
<int name="autoReplicaFailoverBadNodeExpiration">${autoReplicaFailoverBadNodeExpiration:60000}</int>
|
||||
<int name="createCollectionWaitTimeTillActive">${createCollectionWaitTimeTillActive:30}</int>
|
||||
</solrcloud>
|
||||
|
||||
<metrics>
|
||||
|
|
|
@ -27,7 +27,7 @@
|
|||
|
||||
<shardHandlerFactory name="shardHandlerFactory" class="HttpShardHandlerFactory">
|
||||
<str name="urlScheme">${urlScheme:}</str>
|
||||
<int name="socketTimeout">${socketTimeout:90000}</int>
|
||||
<int name="socketTimeout">${socketTimeout:15000}</int>
|
||||
<int name="connTimeout">${connTimeout:15000}</int>
|
||||
</shardHandlerFactory>
|
||||
|
||||
|
@ -40,12 +40,12 @@
|
|||
<str name="host">127.0.0.1</str>
|
||||
<int name="hostPort">${hostPort:8983}</int>
|
||||
<str name="hostContext">${hostContext:solr}</str>
|
||||
<int name="zkClientTimeout">${solr.zkclienttimeout:30000}</int>
|
||||
<int name="zkClientTimeout">${solr.zkclienttimeout:60000}</int> <!-- This should be high by default - dc's are expensive -->
|
||||
<bool name="genericCoreNodeNames">${genericCoreNodeNames:true}</bool>
|
||||
<int name="leaderVoteWait">${leaderVoteWait:10000}</int>
|
||||
<int name="leaderConflictResolveWait">${leaderConflictResolveWait:180000}</int>
|
||||
<int name="distribUpdateConnTimeout">${distribUpdateConnTimeout:45000}</int>
|
||||
<int name="distribUpdateSoTimeout">${distribUpdateSoTimeout:340000}</int>
|
||||
<int name="leaderVoteWait">${leaderVoteWait:15000}</int> <!-- We are running tests - the default should be low, not like production -->
|
||||
<int name="leaderConflictResolveWait">${leaderConflictResolveWait:45000}</int>
|
||||
<int name="distribUpdateConnTimeout">${distribUpdateConnTimeout:5000}</int>
|
||||
<int name="distribUpdateSoTimeout">${distribUpdateSoTimeout:15000}</int> <!-- We are running tests - the default should be low, not like production -->
|
||||
<int name="autoReplicaFailoverWaitAfterExpiration">${autoReplicaFailoverWaitAfterExpiration:10000}</int>
|
||||
<int name="autoReplicaFailoverWorkLoopDelay">${autoReplicaFailoverWorkLoopDelay:10000}</int>
|
||||
<int name="autoReplicaFailoverBadNodeExpiration">${autoReplicaFailoverBadNodeExpiration:60000}</int>
|
||||
|
|
|
@ -22,9 +22,14 @@ import java.util.ArrayList;
|
|||
import java.util.Arrays;
|
||||
import java.util.Collections;
|
||||
import java.util.EnumSet;
|
||||
import java.util.HashSet;
|
||||
import java.util.Iterator;
|
||||
import java.util.List;
|
||||
import java.util.Map;
|
||||
import java.util.Set;
|
||||
import java.util.concurrent.Callable;
|
||||
import java.util.concurrent.ExecutorCompletionService;
|
||||
import java.util.concurrent.Future;
|
||||
|
||||
import org.apache.commons.lang.StringUtils;
|
||||
import org.apache.lucene.util.LuceneTestCase.Slow;
|
||||
|
@ -38,16 +43,15 @@ import org.apache.solr.client.solrj.response.FacetField;
|
|||
import org.apache.solr.client.solrj.response.FieldStatsInfo;
|
||||
import org.apache.solr.client.solrj.response.QueryResponse;
|
||||
import org.apache.solr.client.solrj.response.RangeFacet;
|
||||
import org.apache.solr.cloud.ChaosMonkey;
|
||||
import org.apache.solr.common.EnumFieldValue;
|
||||
import org.apache.solr.common.SolrException;
|
||||
import org.apache.solr.common.SolrException.ErrorCode;
|
||||
import org.apache.solr.common.params.CommonParams;
|
||||
import org.apache.solr.common.params.FacetParams.FacetRangeMethod;
|
||||
import org.apache.solr.common.params.ModifiableSolrParams;
|
||||
import org.apache.solr.common.params.ShardParams;
|
||||
import org.apache.solr.common.params.SolrParams;
|
||||
import org.apache.solr.common.params.StatsParams;
|
||||
import org.apache.solr.common.params.FacetParams.FacetRangeMethod;
|
||||
import org.apache.solr.common.util.NamedList;
|
||||
import org.apache.solr.handler.component.ShardResponse;
|
||||
import org.apache.solr.handler.component.StatsComponentTest.StatSetCombinations;
|
||||
|
@ -100,6 +104,11 @@ public class TestDistributedSearch extends BaseDistributedSearchTestCase {
|
|||
// we validate the connection before use on the restarted
|
||||
// server so that we don't use a bad one
|
||||
System.setProperty("validateAfterInactivity", "200");
|
||||
|
||||
System.setProperty("solr.httpclient.retries", "0");
|
||||
System.setProperty("distribUpdateSoTimeout", "5000");
|
||||
|
||||
|
||||
}
|
||||
|
||||
public TestDistributedSearch() {
|
||||
|
@ -109,6 +118,9 @@ public class TestDistributedSearch extends BaseDistributedSearchTestCase {
|
|||
|
||||
@Test
|
||||
public void test() throws Exception {
|
||||
|
||||
assertEquals(clients.size(), jettys.size());
|
||||
|
||||
QueryResponse rsp = null;
|
||||
int backupStress = stress; // make a copy so we can restore
|
||||
|
||||
|
@ -952,74 +964,81 @@ public class TestDistributedSearch extends BaseDistributedSearchTestCase {
|
|||
assertEquals("should have an entry for each shard ["+sinfo+"] "+shards, cnt, sinfo.size());
|
||||
|
||||
// test shards.tolerant=true
|
||||
for(int numDownServers = 0; numDownServers < jettys.size()-1; numDownServers++)
|
||||
{
|
||||
List<JettySolrRunner> upJettys = new ArrayList<>(jettys);
|
||||
List<SolrClient> upClients = new ArrayList<>(clients);
|
||||
List<JettySolrRunner> downJettys = new ArrayList<>();
|
||||
List<String> upShards = new ArrayList<>(Arrays.asList(shardsArr));
|
||||
for(int i=0; i<numDownServers; i++)
|
||||
{
|
||||
// shut down some of the jettys
|
||||
int indexToRemove = r.nextInt(upJettys.size());
|
||||
JettySolrRunner downJetty = upJettys.remove(indexToRemove);
|
||||
upClients.remove(indexToRemove);
|
||||
upShards.remove(indexToRemove);
|
||||
ChaosMonkey.stop(downJetty);
|
||||
downJettys.add(downJetty);
|
||||
}
|
||||
|
||||
queryPartialResults(upShards, upClients,
|
||||
"q","*:*",
|
||||
"facet","true",
|
||||
"facet.field",t1,
|
||||
"facet.field",t1,
|
||||
"facet.limit",5,
|
||||
ShardParams.SHARDS_INFO,"true",
|
||||
ShardParams.SHARDS_TOLERANT,"true");
|
||||
|
||||
queryPartialResults(upShards, upClients,
|
||||
"q", "*:*",
|
||||
"facet", "true",
|
||||
"facet.query", i1 + ":[1 TO 50]",
|
||||
"facet.query", i1 + ":[1 TO 50]",
|
||||
ShardParams.SHARDS_INFO, "true",
|
||||
ShardParams.SHARDS_TOLERANT, "true");
|
||||
|
||||
// test group query
|
||||
queryPartialResults(upShards, upClients,
|
||||
"q", "*:*",
|
||||
"rows", 100,
|
||||
"fl", "id," + i1,
|
||||
"group", "true",
|
||||
"group.query", t1 + ":kings OR " + t1 + ":eggs",
|
||||
"group.limit", 10,
|
||||
"sort", i1 + " asc, id asc",
|
||||
CommonParams.TIME_ALLOWED, 1,
|
||||
ShardParams.SHARDS_INFO, "true",
|
||||
ShardParams.SHARDS_TOLERANT, "true");
|
||||
|
||||
queryPartialResults(upShards, upClients,
|
||||
"q", "*:*",
|
||||
"stats", "true",
|
||||
"stats.field", i1,
|
||||
ShardParams.SHARDS_INFO, "true",
|
||||
ShardParams.SHARDS_TOLERANT, "true");
|
||||
|
||||
queryPartialResults(upShards, upClients,
|
||||
"q", "toyata",
|
||||
"spellcheck", "true",
|
||||
"spellcheck.q", "toyata",
|
||||
"qt", "/spellCheckCompRH_Direct",
|
||||
"shards.qt", "/spellCheckCompRH_Direct",
|
||||
ShardParams.SHARDS_INFO, "true",
|
||||
ShardParams.SHARDS_TOLERANT, "true");
|
||||
|
||||
// restart the jettys
|
||||
for (JettySolrRunner downJetty : downJettys) {
|
||||
ChaosMonkey.start(downJetty);
|
||||
|
||||
List<JettySolrRunner> upJettys = Collections.synchronizedList(new ArrayList<>(jettys));
|
||||
List<SolrClient> upClients = Collections.synchronizedList(new ArrayList<>(clients));
|
||||
List<JettySolrRunner> downJettys = Collections.synchronizedList(new ArrayList<>());
|
||||
List<String> upShards = Collections.synchronizedList(new ArrayList<>(Arrays.asList(shardsArr)));
|
||||
|
||||
int cap = Math.max(upJettys.size() - 1, 1);
|
||||
|
||||
int numDownServers = random().nextInt(cap);
|
||||
for (int i = 0; i < numDownServers; i++) {
|
||||
if (upJettys.size() == 1) {
|
||||
continue;
|
||||
}
|
||||
// shut down some of the jettys
|
||||
int indexToRemove = r.nextInt(upJettys.size() - 1);
|
||||
JettySolrRunner downJetty = upJettys.remove(indexToRemove);
|
||||
upClients.remove(indexToRemove);
|
||||
upShards.remove(indexToRemove);
|
||||
downJetty.stop();
|
||||
downJettys.add(downJetty);
|
||||
}
|
||||
|
||||
Thread.sleep(100);
|
||||
|
||||
queryPartialResults(upShards, upClients,
|
||||
"q", "*:*",
|
||||
"facet", "true",
|
||||
"facet.field", t1,
|
||||
"facet.field", t1,
|
||||
"facet.limit", 5,
|
||||
ShardParams.SHARDS_INFO, "true",
|
||||
ShardParams.SHARDS_TOLERANT, "true");
|
||||
|
||||
queryPartialResults(upShards, upClients,
|
||||
"q", "*:*",
|
||||
"facet", "true",
|
||||
"facet.query", i1 + ":[1 TO 50]",
|
||||
"facet.query", i1 + ":[1 TO 50]",
|
||||
ShardParams.SHARDS_INFO, "true",
|
||||
ShardParams.SHARDS_TOLERANT, "true");
|
||||
|
||||
// test group query
|
||||
queryPartialResults(upShards, upClients,
|
||||
"q", "*:*",
|
||||
"rows", 100,
|
||||
"fl", "id," + i1,
|
||||
"group", "true",
|
||||
"group.query", t1 + ":kings OR " + t1 + ":eggs",
|
||||
"group.limit", 10,
|
||||
"sort", i1 + " asc, id asc",
|
||||
CommonParams.TIME_ALLOWED, 10000,
|
||||
ShardParams.SHARDS_INFO, "true",
|
||||
ShardParams.SHARDS_TOLERANT, "true");
|
||||
|
||||
queryPartialResults(upShards, upClients,
|
||||
"q", "*:*",
|
||||
"stats", "true",
|
||||
"stats.field", i1,
|
||||
ShardParams.SHARDS_INFO, "true",
|
||||
ShardParams.SHARDS_TOLERANT, "true");
|
||||
|
||||
queryPartialResults(upShards, upClients,
|
||||
"q", "toyata",
|
||||
"spellcheck", "true",
|
||||
"spellcheck.q", "toyata",
|
||||
"qt", "/spellCheckCompRH_Direct",
|
||||
"shards.qt", "/spellCheckCompRH_Direct",
|
||||
ShardParams.SHARDS_INFO, "true",
|
||||
ShardParams.SHARDS_TOLERANT, "true");
|
||||
|
||||
// restart the jettys
|
||||
for (JettySolrRunner downJetty : downJettys) {
|
||||
downJetty.start();
|
||||
}
|
||||
|
||||
|
||||
// This index has the same number for every field
|
||||
|
||||
|
@ -1125,17 +1144,22 @@ public class TestDistributedSearch extends BaseDistributedSearchTestCase {
|
|||
params.remove("distrib");
|
||||
setDistributedParams(params);
|
||||
|
||||
QueryResponse rsp = queryRandomUpServer(params,upClients);
|
||||
if (upClients.size() == 0) {
|
||||
return;
|
||||
}
|
||||
QueryResponse rsp = queryRandomUpServer(params, upClients);
|
||||
|
||||
comparePartialResponses(rsp, controlRsp, upShards);
|
||||
|
||||
if (stress > 0) {
|
||||
log.info("starting stress...");
|
||||
Thread[] threads = new Thread[nThreads];
|
||||
Set<Future<Object>> pending = new HashSet<>();;
|
||||
ExecutorCompletionService<Object> cs = new ExecutorCompletionService<>(executor);
|
||||
Callable[] threads = new Callable[nThreads];
|
||||
for (int i = 0; i < threads.length; i++) {
|
||||
threads[i] = new Thread() {
|
||||
threads[i] = new Callable() {
|
||||
@Override
|
||||
public void run() {
|
||||
public Object call() {
|
||||
for (int j = 0; j < stress; j++) {
|
||||
int which = r.nextInt(upClients.size());
|
||||
SolrClient client = upClients.get(which);
|
||||
|
@ -1148,21 +1172,32 @@ public class TestDistributedSearch extends BaseDistributedSearchTestCase {
|
|||
throw new RuntimeException(e);
|
||||
}
|
||||
}
|
||||
return null;
|
||||
}
|
||||
};
|
||||
threads[i].start();
|
||||
pending.add(cs.submit(threads[i]));
|
||||
}
|
||||
|
||||
while (pending.size() > 0) {
|
||||
Future<Object> future = cs.take();
|
||||
pending.remove(future);
|
||||
future.get();
|
||||
}
|
||||
|
||||
for (Thread thread : threads) {
|
||||
thread.join();
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
protected QueryResponse queryRandomUpServer(ModifiableSolrParams params, List<SolrClient> upClients) throws SolrServerException, IOException {
|
||||
protected QueryResponse queryRandomUpServer(ModifiableSolrParams params, List<SolrClient> upClients)
|
||||
throws SolrServerException, IOException {
|
||||
// query a random "up" server
|
||||
int which = r.nextInt(upClients.size());
|
||||
SolrClient client = upClients.get(which);
|
||||
SolrClient client;
|
||||
if (upClients.size() == 1) {
|
||||
client = upClients.get(0);
|
||||
} else {
|
||||
int which = r.nextInt(upClients.size() - 1);
|
||||
client = upClients.get(which);
|
||||
}
|
||||
|
||||
QueryResponse rsp = client.query(params);
|
||||
return rsp;
|
||||
}
|
||||
|
@ -1195,7 +1230,7 @@ public class TestDistributedSearch extends BaseDistributedSearchTestCase {
|
|||
assertTrue("Expected timeAllowedError or to find shardAddress in the up shard info: " + info.toString(), info.get("shardAddress") != null);
|
||||
}
|
||||
} else {
|
||||
assertEquals("Expected to find the " + SolrQueryResponse.RESPONSE_HEADER_PARTIAL_RESULTS_KEY + " header set if a shard is down",
|
||||
assertEquals("Expected to find the " + SolrQueryResponse.RESPONSE_HEADER_PARTIAL_RESULTS_KEY + " header set if a shard is down. Response: " + rsp,
|
||||
Boolean.TRUE, rsp.getHeader().get(SolrQueryResponse.RESPONSE_HEADER_PARTIAL_RESULTS_KEY));
|
||||
assertTrue("Expected to find error in the down shard info: " + info.toString(), info.get("error") != null);
|
||||
}
|
||||
|
|
|
@ -16,14 +16,16 @@
|
|||
*/
|
||||
package org.apache.solr;
|
||||
|
||||
import java.io.IOException;
|
||||
|
||||
import org.apache.lucene.search.TimeLimitingCollector;
|
||||
import org.apache.lucene.util.TestUtil;
|
||||
import org.apache.solr.client.solrj.SolrServerException;
|
||||
import org.apache.solr.client.solrj.response.QueryResponse;
|
||||
import org.apache.solr.common.SolrInputDocument;
|
||||
import org.junit.AfterClass;
|
||||
import org.junit.Test;
|
||||
|
||||
import java.io.IOException;
|
||||
|
||||
/**
|
||||
* Tests that highlighting doesn't break on grouped documents
|
||||
* with duplicate unique key fields stored on multiple shards.
|
||||
|
@ -34,6 +36,12 @@ public class TestHighlightDedupGrouping extends BaseDistributedSearchTestCase {
|
|||
private static final String group_ti1 = "group_ti1";
|
||||
private static final String shard_i1 = "shard_i1";
|
||||
|
||||
@AfterClass
|
||||
public static void afterClass() throws Exception {
|
||||
TimeLimitingCollector.getGlobalTimerThread().stopTimer();
|
||||
TimeLimitingCollector.getGlobalTimerThread().join();
|
||||
}
|
||||
|
||||
@Test
|
||||
@ShardsFixed(num = 2)
|
||||
public void test() throws Exception {
|
||||
|
|
|
@ -57,7 +57,7 @@ public class TestTolerantSearch extends SolrJettyTestBase {
|
|||
@BeforeClass
|
||||
public static void createThings() throws Exception {
|
||||
solrHome = createSolrHome();
|
||||
createJetty(solrHome.getAbsolutePath());
|
||||
createAndStartJetty(solrHome.getAbsolutePath());
|
||||
String url = jetty.getBaseUrl().toString();
|
||||
collection1 = getHttpSolrClient(url + "/collection1");
|
||||
collection2 = getHttpSolrClient(url + "/collection2");
|
||||
|
|
|
@ -16,6 +16,9 @@
|
|||
*/
|
||||
package org.apache.solr.cloud;
|
||||
|
||||
import static org.apache.solr.client.solrj.response.RequestStatusState.COMPLETED;
|
||||
import static org.apache.solr.client.solrj.response.RequestStatusState.FAILED;
|
||||
|
||||
import java.lang.invoke.MethodHandles;
|
||||
import java.util.Collection;
|
||||
import java.util.EnumSet;
|
||||
|
@ -27,26 +30,21 @@ import org.apache.solr.client.solrj.response.RequestStatusState;
|
|||
import org.apache.solr.common.cloud.ClusterState;
|
||||
import org.apache.solr.common.cloud.DocCollection;
|
||||
import org.apache.solr.common.cloud.Replica;
|
||||
import org.apache.solr.util.LogLevel;
|
||||
import org.junit.Before;
|
||||
import org.junit.BeforeClass;
|
||||
import org.junit.Test;
|
||||
import org.slf4j.Logger;
|
||||
import org.slf4j.LoggerFactory;
|
||||
|
||||
import static org.apache.solr.client.solrj.response.RequestStatusState.COMPLETED;
|
||||
import static org.apache.solr.client.solrj.response.RequestStatusState.FAILED;
|
||||
|
||||
/**
|
||||
*
|
||||
*/
|
||||
@LogLevel("org.apache.solr.cloud=DEBUG;org.apache.solr.cloud.Overseer=DEBUG;org.apache.solr.cloud.overseer=DEBUG;")
|
||||
public class AddReplicaTest extends SolrCloudTestCase {
|
||||
private static final Logger log = LoggerFactory.getLogger(MethodHandles.lookup().lookupClass());
|
||||
|
||||
@BeforeClass
|
||||
public static void setupCluster() throws Exception {
|
||||
configureCluster(4)
|
||||
configureCluster(3)
|
||||
.addConfig("conf1", TEST_PATH().resolve("configsets").resolve("cloud-minimal").resolve("conf"))
|
||||
.configure();
|
||||
}
|
||||
|
@ -59,13 +57,14 @@ public class AddReplicaTest extends SolrCloudTestCase {
|
|||
|
||||
@Test
|
||||
public void testAddMultipleReplicas() throws Exception {
|
||||
cluster.waitForAllNodes(5);
|
||||
|
||||
String collection = "testAddMultipleReplicas";
|
||||
CloudSolrClient cloudClient = cluster.getSolrClient();
|
||||
|
||||
CollectionAdminRequest.Create create = CollectionAdminRequest.createCollection(collection, "conf1", 1, 1);
|
||||
create.setMaxShardsPerNode(2);
|
||||
cloudClient.request(create);
|
||||
cluster.waitForActiveCollection(collection, 1, 1);
|
||||
|
||||
CollectionAdminRequest.AddReplica addReplica = CollectionAdminRequest.addReplicaToShard(collection, "shard1")
|
||||
.setNrtReplicas(1)
|
||||
|
@ -73,6 +72,9 @@ public class AddReplicaTest extends SolrCloudTestCase {
|
|||
.setPullReplicas(1);
|
||||
RequestStatusState status = addReplica.processAndWait(collection + "_xyz1", cloudClient, 120);
|
||||
assertEquals(COMPLETED, status);
|
||||
|
||||
cluster.waitForActiveCollection(collection, 1, 4);
|
||||
|
||||
DocCollection docCollection = cloudClient.getZkStateReader().getClusterState().getCollectionOrNull(collection);
|
||||
assertNotNull(docCollection);
|
||||
assertEquals(4, docCollection.getReplicas().size());
|
||||
|
@ -110,6 +112,7 @@ public class AddReplicaTest extends SolrCloudTestCase {
|
|||
.setCreateNodeSet(String.join(",", createNodeSet));
|
||||
status = addReplica.processAndWait(collection + "_xyz1", cloudClient, 120);
|
||||
assertEquals(COMPLETED, status);
|
||||
waitForState("Timedout wait for collection to be created", collection, clusterShape(1, 9));
|
||||
docCollection = cloudClient.getZkStateReader().getClusterState().getCollectionOrNull(collection);
|
||||
assertNotNull(docCollection);
|
||||
// sanity check that everything is as before
|
||||
|
@ -120,9 +123,8 @@ public class AddReplicaTest extends SolrCloudTestCase {
|
|||
}
|
||||
|
||||
@Test
|
||||
//commented 2-Aug-2018 @BadApple(bugUrl="https://issues.apache.org/jira/browse/SOLR-12028") // 09-Apr-2018
|
||||
public void test() throws Exception {
|
||||
cluster.waitForAllNodes(5);
|
||||
|
||||
String collection = "addreplicatest_coll";
|
||||
|
||||
CloudSolrClient cloudClient = cluster.getSolrClient();
|
||||
|
@ -130,6 +132,8 @@ public class AddReplicaTest extends SolrCloudTestCase {
|
|||
CollectionAdminRequest.Create create = CollectionAdminRequest.createCollection(collection, "conf1", 2, 1);
|
||||
create.setMaxShardsPerNode(2);
|
||||
cloudClient.request(create);
|
||||
|
||||
cluster.waitForActiveCollection(collection, 2, 2);
|
||||
|
||||
ClusterState clusterState = cloudClient.getZkStateReader().getClusterState();
|
||||
DocCollection coll = clusterState.getCollection(collection);
|
||||
|
@ -140,6 +144,7 @@ public class AddReplicaTest extends SolrCloudTestCase {
|
|||
CollectionAdminRequest.RequestStatus requestStatus = CollectionAdminRequest.requestStatus("000");
|
||||
CollectionAdminRequest.RequestStatusResponse rsp = requestStatus.process(cloudClient);
|
||||
assertNotSame(rsp.getRequestStatus(), COMPLETED);
|
||||
|
||||
// wait for async request success
|
||||
boolean success = false;
|
||||
for (int i = 0; i < 200; i++) {
|
||||
|
@ -152,11 +157,10 @@ public class AddReplicaTest extends SolrCloudTestCase {
|
|||
Thread.sleep(500);
|
||||
}
|
||||
assertTrue(success);
|
||||
|
||||
Collection<Replica> replicas2 = cloudClient.getZkStateReader().getClusterState().getCollection(collection).getSlice(sliceName).getReplicas();
|
||||
replicas2.removeAll(replicas);
|
||||
assertEquals(1, replicas2.size());
|
||||
Replica r = replicas2.iterator().next();
|
||||
assertNotSame(r.toString(), r.getState(), Replica.State.ACTIVE);
|
||||
|
||||
// use waitForFinalState
|
||||
addReplica.setWaitForFinalState(true);
|
||||
|
|
|
@ -90,7 +90,11 @@ public class AliasIntegrationTest extends SolrCloudTestCase {
|
|||
public void testProperties() throws Exception {
|
||||
CollectionAdminRequest.createCollection("collection1meta", "conf", 2, 1).process(cluster.getSolrClient());
|
||||
CollectionAdminRequest.createCollection("collection2meta", "conf", 1, 1).process(cluster.getSolrClient());
|
||||
waitForState("Expected collection1 to be created with 2 shards and 1 replica", "collection1meta", clusterShape(2, 1));
|
||||
|
||||
cluster.waitForActiveCollection("collection1meta", 2, 2);
|
||||
cluster.waitForActiveCollection("collection2meta", 1, 1);
|
||||
|
||||
waitForState("Expected collection1 to be created with 2 shards and 1 replica", "collection1meta", clusterShape(2, 2));
|
||||
waitForState("Expected collection2 to be created with 1 shard and 1 replica", "collection2meta", clusterShape(1, 1));
|
||||
ZkStateReader zkStateReader = cluster.getSolrClient().getZkStateReader();
|
||||
zkStateReader.createClusterStateWatchersAndUpdate();
|
||||
|
@ -204,7 +208,7 @@ public class AliasIntegrationTest extends SolrCloudTestCase {
|
|||
|
||||
@Test
|
||||
public void testModifyPropertiesV2() throws Exception {
|
||||
final String aliasName = getTestName();
|
||||
final String aliasName = getSaferTestName();
|
||||
ZkStateReader zkStateReader = createColectionsAndAlias(aliasName);
|
||||
final String baseUrl = cluster.getRandomJetty(random()).getBaseUrl().toString();
|
||||
//TODO fix Solr test infra so that this /____v2/ becomes /api/
|
||||
|
@ -226,7 +230,7 @@ public class AliasIntegrationTest extends SolrCloudTestCase {
|
|||
@Test
|
||||
public void testModifyPropertiesV1() throws Exception {
|
||||
// note we don't use TZ in this test, thus it's UTC
|
||||
final String aliasName = getTestName();
|
||||
final String aliasName = getSaferTestName();
|
||||
ZkStateReader zkStateReader = createColectionsAndAlias(aliasName);
|
||||
final String baseUrl = cluster.getRandomJetty(random()).getBaseUrl().toString();
|
||||
HttpGet get = new HttpGet(baseUrl + "/admin/collections?action=ALIASPROP" +
|
||||
|
@ -241,7 +245,7 @@ public class AliasIntegrationTest extends SolrCloudTestCase {
|
|||
@Test
|
||||
public void testModifyPropertiesCAR() throws Exception {
|
||||
// note we don't use TZ in this test, thus it's UTC
|
||||
final String aliasName = getTestName();
|
||||
final String aliasName = getSaferTestName();
|
||||
ZkStateReader zkStateReader = createColectionsAndAlias(aliasName);
|
||||
CollectionAdminRequest.SetAliasProperty setAliasProperty = CollectionAdminRequest.setAliasProperty(aliasName);
|
||||
setAliasProperty.addProperty("foo","baz");
|
||||
|
@ -278,7 +282,11 @@ public class AliasIntegrationTest extends SolrCloudTestCase {
|
|||
private ZkStateReader createColectionsAndAlias(String aliasName) throws SolrServerException, IOException, KeeperException, InterruptedException {
|
||||
CollectionAdminRequest.createCollection("collection1meta", "conf", 2, 1).process(cluster.getSolrClient());
|
||||
CollectionAdminRequest.createCollection("collection2meta", "conf", 1, 1).process(cluster.getSolrClient());
|
||||
waitForState("Expected collection1 to be created with 2 shards and 1 replica", "collection1meta", clusterShape(2, 1));
|
||||
|
||||
cluster.waitForActiveCollection("collection1meta", 2, 2);
|
||||
cluster.waitForActiveCollection("collection2meta", 1, 1);
|
||||
|
||||
waitForState("Expected collection1 to be created with 2 shards and 1 replica", "collection1meta", clusterShape(2, 2));
|
||||
waitForState("Expected collection2 to be created with 1 shard and 1 replica", "collection2meta", clusterShape(1, 1));
|
||||
ZkStateReader zkStateReader = cluster.getSolrClient().getZkStateReader();
|
||||
zkStateReader.createClusterStateWatchersAndUpdate();
|
||||
|
@ -326,7 +334,11 @@ public class AliasIntegrationTest extends SolrCloudTestCase {
|
|||
public void testDeleteAliasWithExistingCollectionName() throws Exception {
|
||||
CollectionAdminRequest.createCollection("collection_old", "conf", 2, 1).process(cluster.getSolrClient());
|
||||
CollectionAdminRequest.createCollection("collection_new", "conf", 1, 1).process(cluster.getSolrClient());
|
||||
waitForState("Expected collection_old to be created with 2 shards and 1 replica", "collection_old", clusterShape(2, 1));
|
||||
|
||||
cluster.waitForActiveCollection("collection_old", 2, 2);
|
||||
cluster.waitForActiveCollection("collection_new", 1, 1);
|
||||
|
||||
waitForState("Expected collection_old to be created with 2 shards and 1 replica", "collection_old", clusterShape(2, 2));
|
||||
waitForState("Expected collection_new to be created with 1 shard and 1 replica", "collection_new", clusterShape(1, 1));
|
||||
|
||||
new UpdateRequest()
|
||||
|
@ -399,7 +411,11 @@ public class AliasIntegrationTest extends SolrCloudTestCase {
|
|||
public void testDeleteOneOfTwoCollectionsAliased() throws Exception {
|
||||
CollectionAdminRequest.createCollection("collection_one", "conf", 2, 1).process(cluster.getSolrClient());
|
||||
CollectionAdminRequest.createCollection("collection_two", "conf", 1, 1).process(cluster.getSolrClient());
|
||||
waitForState("Expected collection_one to be created with 2 shards and 1 replica", "collection_one", clusterShape(2, 1));
|
||||
|
||||
cluster.waitForActiveCollection("collection_one", 2, 2);
|
||||
cluster.waitForActiveCollection("collection_two", 1, 1);
|
||||
|
||||
waitForState("Expected collection_one to be created with 2 shards and 1 replica", "collection_one", clusterShape(2, 2));
|
||||
waitForState("Expected collection_two to be created with 1 shard and 1 replica", "collection_two", clusterShape(1, 1));
|
||||
|
||||
new UpdateRequest()
|
||||
|
@ -439,8 +455,9 @@ public class AliasIntegrationTest extends SolrCloudTestCase {
|
|||
// was deleted (and, assuming that it only points to collection_old).
|
||||
try {
|
||||
cluster.getSolrClient().query("collection_one", new SolrQuery("*:*"));
|
||||
} catch (SolrServerException se) {
|
||||
assertTrue(se.getMessage().contains("No live SolrServers"));
|
||||
fail("should have failed");
|
||||
} catch (SolrServerException | SolrException se) {
|
||||
|
||||
}
|
||||
|
||||
// Clean up
|
||||
|
@ -464,7 +481,11 @@ public class AliasIntegrationTest extends SolrCloudTestCase {
|
|||
public void test() throws Exception {
|
||||
CollectionAdminRequest.createCollection("collection1", "conf", 2, 1).process(cluster.getSolrClient());
|
||||
CollectionAdminRequest.createCollection("collection2", "conf", 1, 1).process(cluster.getSolrClient());
|
||||
waitForState("Expected collection1 to be created with 2 shards and 1 replica", "collection1", clusterShape(2, 1));
|
||||
|
||||
cluster.waitForActiveCollection("collection1", 2, 2);
|
||||
cluster.waitForActiveCollection("collection2", 1, 1);
|
||||
|
||||
waitForState("Expected collection1 to be created with 2 shards and 1 replica", "collection1", clusterShape(2, 2));
|
||||
waitForState("Expected collection2 to be created with 1 shard and 1 replica", "collection2", clusterShape(1, 1));
|
||||
|
||||
new UpdateRequest()
|
||||
|
@ -495,6 +516,8 @@ public class AliasIntegrationTest extends SolrCloudTestCase {
|
|||
// test alias pointing to two collections. collection2 first because it's not on every node
|
||||
CollectionAdminRequest.createAlias("testalias2", "collection2,collection1").process(cluster.getSolrClient());
|
||||
|
||||
Thread.sleep(100);
|
||||
|
||||
searchSeveralWays("testalias2", new SolrQuery("*:*"), 5);
|
||||
|
||||
///////////////
|
||||
|
@ -618,7 +641,9 @@ public class AliasIntegrationTest extends SolrCloudTestCase {
|
|||
@Test
|
||||
public void testErrorChecks() throws Exception {
|
||||
CollectionAdminRequest.createCollection("testErrorChecks-collection", "conf", 2, 1).process(cluster.getSolrClient());
|
||||
waitForState("Expected testErrorChecks-collection to be created with 2 shards and 1 replica", "testErrorChecks-collection", clusterShape(2, 1));
|
||||
|
||||
cluster.waitForActiveCollection("testErrorChecks-collection", 2, 2);
|
||||
waitForState("Expected testErrorChecks-collection to be created with 2 shards and 1 replica", "testErrorChecks-collection", clusterShape(2, 2));
|
||||
|
||||
ignoreException(".");
|
||||
|
||||
|
|
|
@ -56,8 +56,6 @@ public class AssignBackwardCompatibilityTest extends SolrCloudTestCase {
|
|||
}
|
||||
|
||||
@Test
|
||||
//05-Jul-2018 @BadApple(bugUrl="https://issues.apache.org/jira/browse/SOLR-12028") // 21-May-2018
|
||||
@BadApple(bugUrl="https://issues.apache.org/jira/browse/SOLR-12028") // 2-Aug-2018
|
||||
public void test() throws IOException, SolrServerException, KeeperException, InterruptedException {
|
||||
Set<String> coreNames = new HashSet<>();
|
||||
Set<String> coreNodeNames = new HashSet<>();
|
||||
|
@ -81,6 +79,7 @@ public class AssignBackwardCompatibilityTest extends SolrCloudTestCase {
|
|||
DocCollection dc = getCollectionState(COLLECTION);
|
||||
Replica replica = getRandomReplica(dc.getSlice("shard1"), (r) -> r.getState() == Replica.State.ACTIVE);
|
||||
CollectionAdminRequest.deleteReplica(COLLECTION, "shard1", replica.getName()).process(cluster.getSolrClient());
|
||||
coreNames.remove(replica.getCoreName());
|
||||
numLiveReplicas--;
|
||||
} else {
|
||||
CollectionAdminResponse response = CollectionAdminRequest.addReplicaToShard(COLLECTION, "shard1")
|
||||
|
|
|
@ -40,7 +40,7 @@ public class AsyncCallRequestStatusResponseTest extends SolrCloudTestCase {
|
|||
String asyncId =
|
||||
CollectionAdminRequest.createCollection("asynccall", "conf", 2, 1).processAsync(cluster.getSolrClient());
|
||||
|
||||
waitForState("Expected collection 'asynccall' to have 2 shards and 1 replica", "asynccall", clusterShape(2, 1));
|
||||
waitForState("Expected collection 'asynccall' to have 2 shards and 1 replica", "asynccall", clusterShape(2, 2));
|
||||
|
||||
int tries = 0;
|
||||
while (true) {
|
||||
|
|
|
@ -67,7 +67,7 @@ public class BasicDistributedZk2Test extends AbstractFullDistribZkTestBase {
|
|||
|
||||
@Override
|
||||
protected boolean useTlogReplicas() {
|
||||
return onlyLeaderIndexes;
|
||||
return false; // TODO: tlog replicas makes commits take way to long due to what is likely a bug and it's TestInjection use
|
||||
}
|
||||
|
||||
@Test
|
||||
|
@ -351,7 +351,7 @@ public class BasicDistributedZk2Test extends AbstractFullDistribZkTestBase {
|
|||
// query("q","matchesnothing","fl","*,score", "debugQuery", "true");
|
||||
|
||||
// this should trigger a recovery phase on deadShard
|
||||
ChaosMonkey.start(deadShard.jetty);
|
||||
deadShard.jetty.start();
|
||||
|
||||
// make sure we have published we are recovering
|
||||
Thread.sleep(1500);
|
||||
|
@ -381,7 +381,7 @@ public class BasicDistributedZk2Test extends AbstractFullDistribZkTestBase {
|
|||
|
||||
Thread.sleep(1500);
|
||||
|
||||
ChaosMonkey.start(deadShard.jetty);
|
||||
deadShard.jetty.start();
|
||||
|
||||
// make sure we have published we are recovering
|
||||
Thread.sleep(1500);
|
||||
|
|
|
@ -28,12 +28,16 @@ import java.util.Map;
|
|||
import java.util.Set;
|
||||
import java.util.concurrent.Callable;
|
||||
import java.util.concurrent.CompletionService;
|
||||
import java.util.concurrent.CountDownLatch;
|
||||
import java.util.concurrent.ExecutorCompletionService;
|
||||
import java.util.concurrent.Future;
|
||||
import java.util.concurrent.SynchronousQueue;
|
||||
import java.util.concurrent.ThreadPoolExecutor;
|
||||
import java.util.concurrent.TimeUnit;
|
||||
import java.util.concurrent.TimeoutException;
|
||||
import java.util.concurrent.atomic.AtomicInteger;
|
||||
import java.util.concurrent.atomic.AtomicLong;
|
||||
import java.util.concurrent.atomic.AtomicReference;
|
||||
|
||||
import org.apache.commons.lang.StringUtils;
|
||||
import org.apache.lucene.util.IOUtils;
|
||||
|
@ -74,7 +78,9 @@ import org.apache.solr.common.params.UpdateParams;
|
|||
import org.apache.solr.common.util.ExecutorUtil;
|
||||
import org.apache.solr.common.util.NamedList;
|
||||
import org.apache.solr.util.DefaultSolrThreadFactory;
|
||||
import org.apache.solr.util.RTimer;
|
||||
import org.apache.solr.util.TestInjection;
|
||||
import org.apache.solr.util.TestInjection.Hook;
|
||||
import org.junit.BeforeClass;
|
||||
import org.junit.Test;
|
||||
import org.slf4j.Logger;
|
||||
import org.slf4j.LoggerFactory;
|
||||
|
@ -86,7 +92,6 @@ import org.slf4j.LoggerFactory;
|
|||
*/
|
||||
@Slow
|
||||
@SuppressSSL(bugUrl = "https://issues.apache.org/jira/browse/SOLR-5776")
|
||||
// DO NOT ENABLE @LuceneTestCase.BadApple(bugUrl="https://issues.apache.org/jira/browse/SOLR-12028") // 2018-06-18
|
||||
public class BasicDistributedZkTest extends AbstractFullDistribZkTestBase {
|
||||
|
||||
private static final Logger log = LoggerFactory.getLogger(MethodHandles.lookup().lookupClass());
|
||||
|
@ -94,6 +99,7 @@ public class BasicDistributedZkTest extends AbstractFullDistribZkTestBase {
|
|||
private static final String DEFAULT_COLLECTION = "collection1";
|
||||
|
||||
private final boolean onlyLeaderIndexes = random().nextBoolean();
|
||||
|
||||
String t1="a_t";
|
||||
String i1="a_i1";
|
||||
String tlong = "other_tl1";
|
||||
|
@ -108,13 +114,37 @@ public class BasicDistributedZkTest extends AbstractFullDistribZkTestBase {
|
|||
|
||||
private AtomicInteger nodeCounter = new AtomicInteger();
|
||||
|
||||
ThreadPoolExecutor executor = new ExecutorUtil.MDCAwareThreadPoolExecutor(0,
|
||||
Integer.MAX_VALUE, 5, TimeUnit.SECONDS, new SynchronousQueue<Runnable>(),
|
||||
new DefaultSolrThreadFactory("testExecutor"));
|
||||
|
||||
CompletionService<Object> completionService;
|
||||
Set<Future<Object>> pending;
|
||||
|
||||
private static Hook newSearcherHook = new Hook() {
|
||||
volatile CountDownLatch latch;
|
||||
AtomicReference<String> collection = new AtomicReference<>();
|
||||
|
||||
@Override
|
||||
public void newSearcher(String collectionName) {
|
||||
String c = collection.get();
|
||||
if (c != null && c.equals(collectionName)) {
|
||||
log.info("Hook detected newSearcher");
|
||||
try {
|
||||
latch.countDown();
|
||||
} catch (NullPointerException e) {
|
||||
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
public void waitForSearcher(String collection, int cnt, int timeoutms, boolean failOnTimeout) throws InterruptedException {
|
||||
latch = new CountDownLatch(cnt);
|
||||
this.collection.set(collection);
|
||||
boolean timeout = !latch.await(timeoutms, TimeUnit.MILLISECONDS);
|
||||
if (timeout && failOnTimeout) {
|
||||
fail("timed out waiting for new searcher event " + latch.getCount());
|
||||
}
|
||||
}
|
||||
|
||||
};
|
||||
|
||||
public BasicDistributedZkTest() {
|
||||
// we need DVs on point fields to compute stats & facets
|
||||
if (Boolean.getBoolean(NUMERIC_POINTS_SYSPROP)) System.setProperty(NUMERIC_DOCVALUES_SYSPROP,"true");
|
||||
|
@ -124,10 +154,15 @@ public class BasicDistributedZkTest extends AbstractFullDistribZkTestBase {
|
|||
pending = new HashSet<>();
|
||||
|
||||
}
|
||||
|
||||
@BeforeClass
|
||||
public static void beforeBDZKTClass() {
|
||||
TestInjection.newSearcherHook(newSearcherHook);
|
||||
}
|
||||
|
||||
@Override
|
||||
protected boolean useTlogReplicas() {
|
||||
return onlyLeaderIndexes;
|
||||
return false; // TODO: tlog replicas makes commits take way to long due to what is likely a bug and it's TestInjection use
|
||||
}
|
||||
|
||||
@Override
|
||||
|
@ -149,8 +184,6 @@ public class BasicDistributedZkTest extends AbstractFullDistribZkTestBase {
|
|||
|
||||
@Test
|
||||
@ShardsFixed(num = 4)
|
||||
//DO NOT ENABLE @BadApple(bugUrl="https://issues.apache.org/jira/browse/SOLR-12028") // 12-Jun-2018
|
||||
@BadApple(bugUrl="https://issues.apache.org/jira/browse/SOLR-12028") // 14-Oct-2018
|
||||
public void test() throws Exception {
|
||||
// setLoggingLevel(null);
|
||||
|
||||
|
@ -345,23 +378,33 @@ public class BasicDistributedZkTest extends AbstractFullDistribZkTestBase {
|
|||
params.set("commitWithin", 10);
|
||||
add(cloudClient, params , getDoc("id", 300), getDoc("id", 301));
|
||||
|
||||
waitForDocCount(before + 2, 30000, "add commitWithin did not work");
|
||||
newSearcherHook.waitForSearcher(DEFAULT_COLLECTION, 2, 20000, false);
|
||||
|
||||
ClusterState clusterState = getCommonCloudSolrClient().getZkStateReader().getClusterState();
|
||||
DocCollection dColl = clusterState.getCollection(DEFAULT_COLLECTION);
|
||||
|
||||
assertSliceCounts("should have found 2 docs, 300 and 301", before + 2, dColl);
|
||||
|
||||
// try deleteById commitWithin
|
||||
UpdateRequest deleteByIdReq = new UpdateRequest();
|
||||
deleteByIdReq.deleteById("300");
|
||||
deleteByIdReq.setCommitWithin(10);
|
||||
deleteByIdReq.process(cloudClient);
|
||||
|
||||
newSearcherHook.waitForSearcher(DEFAULT_COLLECTION, 2, 20000, false);
|
||||
|
||||
waitForDocCount(before + 1, 30000, "deleteById commitWithin did not work");
|
||||
|
||||
assertSliceCounts("deleteById commitWithin did not work", before + 1, dColl);
|
||||
|
||||
// try deleteByQuery commitWithin
|
||||
UpdateRequest deleteByQueryReq = new UpdateRequest();
|
||||
deleteByQueryReq.deleteByQuery("id:301");
|
||||
deleteByQueryReq.setCommitWithin(10);
|
||||
deleteByQueryReq.process(cloudClient);
|
||||
|
||||
waitForDocCount(before, 30000, "deleteByQuery commitWithin did not work");
|
||||
newSearcherHook.waitForSearcher(DEFAULT_COLLECTION, 2, 20000, false);
|
||||
|
||||
assertSliceCounts("deleteByQuery commitWithin did not work", before, dColl);
|
||||
|
||||
|
||||
// TODO: This test currently fails because debug info is obtained only
|
||||
// on shards with matches.
|
||||
|
@ -384,24 +427,41 @@ public class BasicDistributedZkTest extends AbstractFullDistribZkTestBase {
|
|||
testStopAndStartCoresInOneInstance();
|
||||
}
|
||||
|
||||
// Insure that total docs found is the expected number.
|
||||
private void assertSliceCounts(String msg, long expected, DocCollection dColl) throws Exception {
|
||||
long found = checkSlicesSameCounts(dColl);
|
||||
|
||||
if (found != expected) {
|
||||
// we get one do over in a bad race
|
||||
Thread.sleep(1000);
|
||||
found = checkSlicesSameCounts(dColl);
|
||||
}
|
||||
|
||||
assertEquals(msg, expected, checkSlicesSameCounts(dColl));
|
||||
}
|
||||
|
||||
// Ensure that total docs found is the expected number.
|
||||
private void waitForDocCount(long expectedNumFound, long waitMillis, String failureMessage)
|
||||
throws Exception {
|
||||
RTimer timer = new RTimer();
|
||||
long timeout = (long)timer.getTime() + waitMillis;
|
||||
|
||||
ClusterState clusterState = getCommonCloudSolrClient().getZkStateReader().getClusterState();
|
||||
DocCollection dColl = clusterState.getCollection(DEFAULT_COLLECTION);
|
||||
long docTotal = -1; // Could use this for 0 hits too!
|
||||
|
||||
while (docTotal != expectedNumFound && timeout > (long) timer.getTime()) {
|
||||
docTotal = checkSlicesSameCounts(dColl);
|
||||
if (docTotal != expectedNumFound) {
|
||||
Thread.sleep(100);
|
||||
}
|
||||
AtomicLong total = new AtomicLong(-1);
|
||||
try {
|
||||
getCommonCloudSolrClient().getZkStateReader().waitForState(DEFAULT_COLLECTION, waitMillis, TimeUnit.MILLISECONDS, (n, c) -> {
|
||||
long docTotal;
|
||||
try {
|
||||
docTotal = checkSlicesSameCounts(c);
|
||||
} catch (SolrServerException | IOException e) {
|
||||
throw new RuntimeException(e);
|
||||
}
|
||||
total.set(docTotal);
|
||||
if (docTotal == expectedNumFound) {
|
||||
return true;
|
||||
}
|
||||
return false;
|
||||
});
|
||||
} catch (TimeoutException | InterruptedException e) {
|
||||
|
||||
}
|
||||
// We could fail here if we broke out of the above because we exceeded the time allowed.
|
||||
assertEquals(failureMessage, expectedNumFound, docTotal);
|
||||
assertEquals(failureMessage, expectedNumFound, total.get());
|
||||
|
||||
// This should be redundant, but it caught a test error after all.
|
||||
for (SolrClient client : clients) {
|
||||
|
@ -557,11 +617,10 @@ public class BasicDistributedZkTest extends AbstractFullDistribZkTestBase {
|
|||
}
|
||||
}
|
||||
|
||||
ChaosMonkey.stop(cloudJettys.get(0).jetty);
|
||||
cloudJettys.get(0).jetty.stop();
|
||||
printLayout();
|
||||
|
||||
Thread.sleep(5000);
|
||||
ChaosMonkey.start(cloudJettys.get(0).jetty);
|
||||
cloudJettys.get(0).jetty.start();
|
||||
cloudClient.getZkStateReader().forceUpdateCollection("multiunload2");
|
||||
try {
|
||||
cloudClient.getZkStateReader().getLeaderRetry("multiunload2", "shard1", 30000);
|
||||
|
@ -803,6 +862,8 @@ public class BasicDistributedZkTest extends AbstractFullDistribZkTestBase {
|
|||
for (String coreName : resp.getCollectionCoresStatus().keySet()) {
|
||||
collectionClients.add(createNewSolrClient(coreName, jettys.get(0).getBaseUrl().toString()));
|
||||
}
|
||||
|
||||
|
||||
}
|
||||
|
||||
SolrClient client1 = collectionClients.get(0);
|
||||
|
@ -863,15 +924,36 @@ public class BasicDistributedZkTest extends AbstractFullDistribZkTestBase {
|
|||
unloadCmd.setCoreName(props.getCoreName());
|
||||
|
||||
String leader = props.getCoreUrl();
|
||||
|
||||
unloadClient.request(unloadCmd);
|
||||
|
||||
int tries = 50;
|
||||
while (leader.equals(zkStateReader.getLeaderUrl(oneInstanceCollection2, "shard1", 10000))) {
|
||||
Thread.sleep(100);
|
||||
if (tries-- == 0) {
|
||||
fail("Leader never changed");
|
||||
|
||||
testExecutor.execute(new Runnable() {
|
||||
|
||||
@Override
|
||||
public void run() {
|
||||
try {
|
||||
unloadClient.request(unloadCmd);
|
||||
} catch (SolrServerException e) {
|
||||
throw new RuntimeException(e);
|
||||
} catch (IOException e) {
|
||||
throw new RuntimeException(e);
|
||||
}
|
||||
}
|
||||
});
|
||||
|
||||
try {
|
||||
getCommonCloudSolrClient().getZkStateReader().waitForState(oneInstanceCollection2, 20000, TimeUnit.MILLISECONDS, (n, c) -> {
|
||||
|
||||
|
||||
try {
|
||||
if (leader.equals(zkStateReader.getLeaderUrl(oneInstanceCollection2, "shard1", 10000))) {
|
||||
return false;
|
||||
}
|
||||
} catch (InterruptedException e) {
|
||||
throw new RuntimeException(e);
|
||||
}
|
||||
return true;
|
||||
});
|
||||
} catch (TimeoutException | InterruptedException e) {
|
||||
fail("Leader never changed");
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -1036,10 +1118,10 @@ public class BasicDistributedZkTest extends AbstractFullDistribZkTestBase {
|
|||
|
||||
long collection2Docs = otherCollectionClients.get("collection2").get(0)
|
||||
.query(new SolrQuery("*:*")).getResults().getNumFound();
|
||||
System.out.println("found2: "+ collection2Docs);
|
||||
|
||||
long collection3Docs = otherCollectionClients.get("collection3").get(0)
|
||||
.query(new SolrQuery("*:*")).getResults().getNumFound();
|
||||
System.out.println("found3: "+ collection3Docs);
|
||||
|
||||
|
||||
SolrQuery query = new SolrQuery("*:*");
|
||||
query.set("collection", "collection2,collection3");
|
||||
|
|
|
@ -115,7 +115,7 @@ public class BasicZkTest extends AbstractZkTestCase {
|
|||
|
||||
// try a reconnect from disconnect
|
||||
zkServer = new ZkTestServer(zkDir, zkPort);
|
||||
zkServer.run();
|
||||
zkServer.run(false);
|
||||
|
||||
Thread.sleep(300);
|
||||
|
||||
|
|
|
@ -23,7 +23,6 @@ import java.util.Set;
|
|||
|
||||
import org.apache.lucene.util.LuceneTestCase;
|
||||
import org.apache.lucene.util.LuceneTestCase.Slow;
|
||||
import org.apache.solr.SolrTestCaseJ4.SuppressObjectReleaseTracker;
|
||||
import org.apache.solr.SolrTestCaseJ4.SuppressSSL;
|
||||
import org.apache.solr.client.solrj.SolrQuery;
|
||||
import org.apache.solr.client.solrj.impl.CloudSolrClient;
|
||||
|
@ -35,8 +34,6 @@ import org.junit.Test;
|
|||
|
||||
@Slow
|
||||
@SuppressSSL(bugUrl = "https://issues.apache.org/jira/browse/SOLR-5776")
|
||||
//@ThreadLeakLingering(linger = 60000)
|
||||
@SuppressObjectReleaseTracker(bugUrl="Testing purposes")
|
||||
public class ChaosMonkeyNothingIsSafeTest extends AbstractFullDistribZkTestBase {
|
||||
private static final int FAIL_TOLERANCE = 100;
|
||||
|
||||
|
@ -48,6 +45,9 @@ public class ChaosMonkeyNothingIsSafeTest extends AbstractFullDistribZkTestBase
|
|||
public static void beforeSuperClass() {
|
||||
schemaString = "schema15.xml"; // we need a string id
|
||||
System.setProperty("solr.autoCommit.maxTime", "15000");
|
||||
System.clearProperty("solr.httpclient.retries");
|
||||
System.clearProperty("solr.retries.on.forward");
|
||||
System.clearProperty("solr.retries.to.followers");
|
||||
setErrorHook();
|
||||
}
|
||||
|
||||
|
@ -57,10 +57,22 @@ public class ChaosMonkeyNothingIsSafeTest extends AbstractFullDistribZkTestBase
|
|||
clearErrorHook();
|
||||
}
|
||||
|
||||
|
||||
|
||||
@Override
|
||||
protected void destroyServers() throws Exception {
|
||||
|
||||
super.destroyServers();
|
||||
}
|
||||
|
||||
protected static final String[] fieldNames = new String[]{"f_i", "f_f", "f_d", "f_l", "f_dt"};
|
||||
protected static final RandVal[] randVals = new RandVal[]{rint, rfloat, rdouble, rlong, rdate};
|
||||
|
||||
private int clientSoTimeout = 60000;
|
||||
|
||||
private volatile FullThrottleStoppableIndexingThread ftIndexThread;
|
||||
|
||||
private final boolean runFullThrottle;
|
||||
|
||||
public String[] getFieldNames() {
|
||||
return fieldNames;
|
||||
|
@ -78,6 +90,16 @@ public class ChaosMonkeyNothingIsSafeTest extends AbstractFullDistribZkTestBase
|
|||
useFactory("solr.StandardDirectoryFactory");
|
||||
}
|
||||
|
||||
@Override
|
||||
public void distribTearDown() throws Exception {
|
||||
try {
|
||||
ftIndexThread.safeStop();
|
||||
} catch (NullPointerException e) {
|
||||
// okay
|
||||
}
|
||||
super.distribTearDown();
|
||||
}
|
||||
|
||||
public ChaosMonkeyNothingIsSafeTest() {
|
||||
super();
|
||||
sliceCount = Integer.parseInt(System.getProperty("solr.tests.cloud.cm.slicecount", "-1"));
|
||||
|
@ -94,11 +116,15 @@ public class ChaosMonkeyNothingIsSafeTest extends AbstractFullDistribZkTestBase
|
|||
fixShardCount(numShards);
|
||||
|
||||
|
||||
// TODO: we only do this sometimes so that we can sometimes compare against control,
|
||||
// it's currently hard to know what requests failed when using ConcurrentSolrUpdateServer
|
||||
runFullThrottle = random().nextBoolean();
|
||||
|
||||
}
|
||||
|
||||
@Override
|
||||
protected boolean useTlogReplicas() {
|
||||
return onlyLeaderIndexes;
|
||||
return false; // TODO: tlog replicas makes commits take way to long due to what is likely a bug and it's TestInjection use
|
||||
}
|
||||
|
||||
@Override
|
||||
|
@ -119,9 +145,9 @@ public class ChaosMonkeyNothingIsSafeTest extends AbstractFullDistribZkTestBase
|
|||
// None of the operations used here are particularly costly, so this should work.
|
||||
// Using this low timeout will also help us catch index stalling.
|
||||
clientSoTimeout = 5000;
|
||||
cloudClient = createCloudClient(DEFAULT_COLLECTION);
|
||||
|
||||
boolean testSuccessful = false;
|
||||
try {
|
||||
try (CloudSolrClient ourCloudClient = createCloudClient(DEFAULT_COLLECTION)) {
|
||||
handle.clear();
|
||||
handle.put("timestamp", SKIPVAL);
|
||||
ZkStateReader zkStateReader = cloudClient.getZkStateReader();
|
||||
|
@ -155,13 +181,9 @@ public class ChaosMonkeyNothingIsSafeTest extends AbstractFullDistribZkTestBase
|
|||
searchThread.start();
|
||||
}
|
||||
|
||||
// TODO: we only do this sometimes so that we can sometimes compare against control,
|
||||
// it's currently hard to know what requests failed when using ConcurrentSolrUpdateServer
|
||||
boolean runFullThrottle = random().nextBoolean();
|
||||
if (runFullThrottle) {
|
||||
FullThrottleStoppableIndexingThread ftIndexThread =
|
||||
new FullThrottleStoppableIndexingThread(controlClient, cloudClient, clients, "ft1", true, this.clientSoTimeout);
|
||||
threads.add(ftIndexThread);
|
||||
ftIndexThread =
|
||||
new FullThrottleStoppableIndexingThread(cloudClient.getHttpClient(),controlClient, cloudClient, clients, "ft1", true, this.clientSoTimeout);
|
||||
ftIndexThread.start();
|
||||
}
|
||||
|
||||
|
@ -189,6 +211,11 @@ public class ChaosMonkeyNothingIsSafeTest extends AbstractFullDistribZkTestBase
|
|||
// ideally this should go into chaosMonkey
|
||||
restartZk(1000 * (5 + random().nextInt(4)));
|
||||
|
||||
|
||||
if (runFullThrottle) {
|
||||
ftIndexThread.safeStop();
|
||||
}
|
||||
|
||||
for (StoppableThread indexThread : threads) {
|
||||
indexThread.safeStop();
|
||||
}
|
||||
|
@ -219,7 +246,6 @@ public class ChaosMonkeyNothingIsSafeTest extends AbstractFullDistribZkTestBase
|
|||
zkStateReader.updateLiveNodes();
|
||||
assertTrue(zkStateReader.getClusterState().getLiveNodes().size() > 0);
|
||||
|
||||
|
||||
// we expect full throttle fails, but cloud client should not easily fail
|
||||
for (StoppableThread indexThread : threads) {
|
||||
if (indexThread instanceof StoppableIndexingThread && !(indexThread instanceof FullThrottleStoppableIndexingThread)) {
|
||||
|
@ -230,6 +256,10 @@ public class ChaosMonkeyNothingIsSafeTest extends AbstractFullDistribZkTestBase
|
|||
}
|
||||
|
||||
|
||||
waitForThingsToLevelOut(20);
|
||||
|
||||
commit();
|
||||
|
||||
Set<String> addFails = getAddFails(indexTreads);
|
||||
Set<String> deleteFails = getDeleteFails(indexTreads);
|
||||
// full throttle thread can
|
||||
|
@ -253,7 +283,7 @@ public class ChaosMonkeyNothingIsSafeTest extends AbstractFullDistribZkTestBase
|
|||
|
||||
// sometimes we restart zookeeper as well
|
||||
if (random().nextBoolean()) {
|
||||
restartZk(1000 * (5 + random().nextInt(4)));
|
||||
// restartZk(1000 * (5 + random().nextInt(4)));
|
||||
}
|
||||
|
||||
try (CloudSolrClient client = createCloudClient("collection1", 30000)) {
|
||||
|
|
|
@ -25,7 +25,6 @@ import java.util.Set;
|
|||
import java.util.concurrent.TimeUnit;
|
||||
|
||||
import org.apache.lucene.util.LuceneTestCase.Slow;
|
||||
import org.apache.solr.SolrTestCaseJ4.SuppressObjectReleaseTracker;
|
||||
import org.apache.solr.SolrTestCaseJ4.SuppressSSL;
|
||||
import org.apache.solr.client.solrj.SolrQuery;
|
||||
import org.apache.solr.client.solrj.impl.CloudSolrClient;
|
||||
|
@ -43,12 +42,8 @@ import org.junit.Test;
|
|||
import org.slf4j.Logger;
|
||||
import org.slf4j.LoggerFactory;
|
||||
|
||||
import com.carrotsearch.randomizedtesting.annotations.ThreadLeakLingering;
|
||||
|
||||
@Slow
|
||||
@SuppressSSL(bugUrl = "https://issues.apache.org/jira/browse/SOLR-5776")
|
||||
@ThreadLeakLingering(linger = 60000)
|
||||
@SuppressObjectReleaseTracker(bugUrl="Testing purposes")
|
||||
public class ChaosMonkeyNothingIsSafeWithPullReplicasTest extends AbstractFullDistribZkTestBase {
|
||||
private static final int FAIL_TOLERANCE = 100;
|
||||
|
||||
|
@ -71,6 +66,9 @@ public class ChaosMonkeyNothingIsSafeWithPullReplicasTest extends AbstractFullDi
|
|||
if (usually()) {
|
||||
System.setProperty("solr.autoCommit.maxTime", "15000");
|
||||
}
|
||||
System.clearProperty("solr.httpclient.retries");
|
||||
System.clearProperty("solr.retries.on.forward");
|
||||
System.clearProperty("solr.retries.to.followers");
|
||||
TestInjection.waitForReplicasInSync = null;
|
||||
setErrorHook();
|
||||
}
|
||||
|
@ -85,7 +83,11 @@ public class ChaosMonkeyNothingIsSafeWithPullReplicasTest extends AbstractFullDi
|
|||
protected static final String[] fieldNames = new String[]{"f_i", "f_f", "f_d", "f_l", "f_dt"};
|
||||
protected static final RandVal[] randVals = new RandVal[]{rint, rfloat, rdouble, rlong, rdate};
|
||||
|
||||
private int clientSoTimeout = 60000;
|
||||
private int clientSoTimeout;
|
||||
|
||||
private volatile FullThrottleStoppableIndexingThread ftIndexThread;
|
||||
|
||||
private final boolean runFullThrottle;
|
||||
|
||||
public String[] getFieldNames() {
|
||||
return fieldNames;
|
||||
|
@ -103,6 +105,16 @@ public class ChaosMonkeyNothingIsSafeWithPullReplicasTest extends AbstractFullDi
|
|||
useFactory("solr.StandardDirectoryFactory");
|
||||
}
|
||||
|
||||
@Override
|
||||
public void distribTearDown() throws Exception {
|
||||
try {
|
||||
ftIndexThread.safeStop();
|
||||
} catch (NullPointerException e) {
|
||||
// okay
|
||||
}
|
||||
super.distribTearDown();
|
||||
}
|
||||
|
||||
public ChaosMonkeyNothingIsSafeWithPullReplicasTest() {
|
||||
super();
|
||||
numPullReplicas = random().nextInt(TEST_NIGHTLY ? 2 : 1) + 1;
|
||||
|
@ -116,12 +128,12 @@ public class ChaosMonkeyNothingIsSafeWithPullReplicasTest extends AbstractFullDi
|
|||
fixShardCount(numNodes);
|
||||
log.info("Starting ChaosMonkey test with {} shards and {} nodes", sliceCount, numNodes);
|
||||
|
||||
|
||||
runFullThrottle = random().nextBoolean();
|
||||
}
|
||||
|
||||
@Override
|
||||
protected boolean useTlogReplicas() {
|
||||
return useTlogReplicas;
|
||||
return false; // TODO: tlog replicas makes commits take way to long due to what is likely a bug and it's TestInjection use
|
||||
}
|
||||
|
||||
@Override
|
||||
|
@ -140,8 +152,8 @@ public class ChaosMonkeyNothingIsSafeWithPullReplicasTest extends AbstractFullDi
|
|||
public void test() throws Exception {
|
||||
// None of the operations used here are particularly costly, so this should work.
|
||||
// Using this low timeout will also help us catch index stalling.
|
||||
clientSoTimeout = 5000;
|
||||
cloudClient = createCloudClient(DEFAULT_COLLECTION);
|
||||
clientSoTimeout = 8000;
|
||||
|
||||
DocCollection docCollection = cloudClient.getZkStateReader().getClusterState().getCollection(DEFAULT_COLLECTION);
|
||||
assertEquals(this.sliceCount, docCollection.getSlices().size());
|
||||
Slice s = docCollection.getSlice("shard1");
|
||||
|
@ -162,9 +174,7 @@ public class ChaosMonkeyNothingIsSafeWithPullReplicasTest extends AbstractFullDi
|
|||
} // make sure we again have leaders for each shard
|
||||
|
||||
waitForRecoveriesToFinish(false);
|
||||
|
||||
// we cannot do delete by query
|
||||
// as it's not supported for recovery
|
||||
|
||||
del("*:*");
|
||||
|
||||
List<StoppableThread> threads = new ArrayList<>();
|
||||
|
@ -172,7 +182,7 @@ public class ChaosMonkeyNothingIsSafeWithPullReplicasTest extends AbstractFullDi
|
|||
int threadCount = TEST_NIGHTLY ? 3 : 1;
|
||||
int i = 0;
|
||||
for (i = 0; i < threadCount; i++) {
|
||||
StoppableIndexingThread indexThread = new StoppableIndexingThread(controlClient, cloudClient, Integer.toString(i), true);
|
||||
StoppableIndexingThread indexThread = new StoppableIndexingThread(controlClient, cloudClient, Integer.toString(i), true, 35, 1, true);
|
||||
threads.add(indexThread);
|
||||
indexTreads.add(indexThread);
|
||||
indexThread.start();
|
||||
|
@ -192,13 +202,9 @@ public class ChaosMonkeyNothingIsSafeWithPullReplicasTest extends AbstractFullDi
|
|||
commitThread.start();
|
||||
}
|
||||
|
||||
// TODO: we only do this sometimes so that we can sometimes compare against control,
|
||||
// it's currently hard to know what requests failed when using ConcurrentSolrUpdateServer
|
||||
boolean runFullThrottle = random().nextBoolean();
|
||||
if (runFullThrottle) {
|
||||
FullThrottleStoppableIndexingThread ftIndexThread =
|
||||
new FullThrottleStoppableIndexingThread(controlClient, cloudClient, clients, "ft1", true, this.clientSoTimeout);
|
||||
threads.add(ftIndexThread);
|
||||
ftIndexThread =
|
||||
new FullThrottleStoppableIndexingThread(cloudClient.getHttpClient(), controlClient, cloudClient, clients, "ft1", true, this.clientSoTimeout);
|
||||
ftIndexThread.start();
|
||||
}
|
||||
|
||||
|
@ -213,7 +219,7 @@ public class ChaosMonkeyNothingIsSafeWithPullReplicasTest extends AbstractFullDi
|
|||
runTimes = new int[] {5000, 6000, 10000, 15000, 25000, 30000,
|
||||
30000, 45000, 90000, 120000};
|
||||
} else {
|
||||
runTimes = new int[] {5000, 7000, 15000};
|
||||
runTimes = new int[] {5000, 7000, 10000};
|
||||
}
|
||||
runLength = runTimes[random().nextInt(runTimes.length - 1)];
|
||||
}
|
||||
|
@ -225,6 +231,10 @@ public class ChaosMonkeyNothingIsSafeWithPullReplicasTest extends AbstractFullDi
|
|||
// ideally this should go into chaosMonkey
|
||||
restartZk(1000 * (5 + random().nextInt(4)));
|
||||
|
||||
if (runFullThrottle) {
|
||||
ftIndexThread.safeStop();
|
||||
}
|
||||
|
||||
for (StoppableThread indexThread : threads) {
|
||||
indexThread.safeStop();
|
||||
}
|
||||
|
|
|
@ -38,6 +38,9 @@ public class ChaosMonkeySafeLeaderTest extends AbstractFullDistribZkTestBase {
|
|||
public static void beforeSuperClass() {
|
||||
schemaString = "schema15.xml"; // we need a string id
|
||||
System.setProperty("solr.autoCommit.maxTime", "15000");
|
||||
System.clearProperty("solr.httpclient.retries");
|
||||
System.clearProperty("solr.retries.on.forward");
|
||||
System.clearProperty("solr.retries.to.followers");
|
||||
setErrorHook();
|
||||
}
|
||||
|
||||
|
@ -81,7 +84,6 @@ public class ChaosMonkeySafeLeaderTest extends AbstractFullDistribZkTestBase {
|
|||
}
|
||||
|
||||
@Test
|
||||
// 29-June-2018 @BadApple(bugUrl="https://issues.apache.org/jira/browse/SOLR-12028")
|
||||
public void test() throws Exception {
|
||||
|
||||
handle.clear();
|
||||
|
@ -170,7 +172,7 @@ public class ChaosMonkeySafeLeaderTest extends AbstractFullDistribZkTestBase {
|
|||
if (random().nextBoolean()) {
|
||||
zkServer.shutdown();
|
||||
zkServer = new ZkTestServer(zkServer.getZkDir(), zkServer.getPort());
|
||||
zkServer.run();
|
||||
zkServer.run(false);
|
||||
}
|
||||
|
||||
try (CloudSolrClient client = createCloudClient("collection1")) {
|
||||
|
|
|
@ -23,7 +23,6 @@ import java.util.List;
|
|||
import java.util.concurrent.TimeUnit;
|
||||
|
||||
import org.apache.lucene.util.LuceneTestCase.Slow;
|
||||
import org.apache.solr.SolrTestCaseJ4.SuppressObjectReleaseTracker;
|
||||
import org.apache.solr.client.solrj.SolrQuery;
|
||||
import org.apache.solr.client.solrj.SolrServerException;
|
||||
import org.apache.solr.client.solrj.impl.CloudSolrClient;
|
||||
|
@ -42,7 +41,6 @@ import org.slf4j.Logger;
|
|||
import org.slf4j.LoggerFactory;
|
||||
|
||||
@Slow
|
||||
@SuppressObjectReleaseTracker(bugUrl="Testing purposes")
|
||||
public class ChaosMonkeySafeLeaderWithPullReplicasTest extends AbstractFullDistribZkTestBase {
|
||||
private static final Logger log = LoggerFactory.getLogger(MethodHandles.lookup().lookupClass());
|
||||
|
||||
|
@ -60,7 +58,7 @@ public class ChaosMonkeySafeLeaderWithPullReplicasTest extends AbstractFullDistr
|
|||
|
||||
@Override
|
||||
protected boolean useTlogReplicas() {
|
||||
return useTlogReplicas;
|
||||
return false; // TODO: tlog replicas makes commits take way to long due to what is likely a bug and it's TestInjection use
|
||||
}
|
||||
|
||||
@BeforeClass
|
||||
|
@ -69,6 +67,9 @@ public class ChaosMonkeySafeLeaderWithPullReplicasTest extends AbstractFullDistr
|
|||
if (usually()) {
|
||||
System.setProperty("solr.autoCommit.maxTime", "15000");
|
||||
}
|
||||
System.clearProperty("solr.httpclient.retries");
|
||||
System.clearProperty("solr.retries.on.forward");
|
||||
System.clearProperty("solr.retries.to.followers");
|
||||
TestInjection.waitForReplicasInSync = null;
|
||||
setErrorHook();
|
||||
}
|
||||
|
@ -99,8 +100,8 @@ public class ChaosMonkeySafeLeaderWithPullReplicasTest extends AbstractFullDistr
|
|||
|
||||
public ChaosMonkeySafeLeaderWithPullReplicasTest() {
|
||||
super();
|
||||
numPullReplicas = random().nextInt(TEST_NIGHTLY ? 3 : 2) + 1;;
|
||||
numRealtimeOrTlogReplicas = random().nextInt(TEST_NIGHTLY ? 3 : 2) + 1;;
|
||||
numPullReplicas = random().nextInt(TEST_NIGHTLY ? 3 : 2) + 1;
|
||||
numRealtimeOrTlogReplicas = random().nextInt(TEST_NIGHTLY ? 3 : 2) + 1;
|
||||
sliceCount = Integer.parseInt(System.getProperty("solr.tests.cloud.cm.slicecount", "-1"));
|
||||
if (sliceCount == -1) {
|
||||
sliceCount = random().nextInt(TEST_NIGHTLY ? 3 : 2) + 1;
|
||||
|
@ -219,7 +220,7 @@ public class ChaosMonkeySafeLeaderWithPullReplicasTest extends AbstractFullDistr
|
|||
if (random().nextBoolean()) {
|
||||
zkServer.shutdown();
|
||||
zkServer = new ZkTestServer(zkServer.getZkDir(), zkServer.getPort());
|
||||
zkServer.run();
|
||||
zkServer.run(false);
|
||||
}
|
||||
|
||||
try (CloudSolrClient client = createCloudClient("collection1")) {
|
||||
|
|
|
@ -36,10 +36,12 @@ import org.apache.solr.common.cloud.Slice;
|
|||
import org.apache.solr.common.cloud.SolrZkClient;
|
||||
import org.apache.solr.common.cloud.ZkStateReader;
|
||||
import org.apache.solr.core.CloudConfig;
|
||||
import org.apache.solr.handler.component.HttpShardHandler;
|
||||
import org.apache.solr.handler.component.HttpShardHandlerFactory;
|
||||
import org.apache.solr.update.UpdateShardHandler;
|
||||
import org.apache.solr.update.UpdateShardHandlerConfig;
|
||||
import org.apache.zookeeper.KeeperException;
|
||||
import org.junit.BeforeClass;
|
||||
import org.junit.Ignore;
|
||||
import org.junit.Test;
|
||||
import org.slf4j.Logger;
|
||||
|
@ -56,6 +58,13 @@ public class ChaosMonkeyShardSplitTest extends ShardSplitTest {
|
|||
|
||||
static final int TIMEOUT = 10000;
|
||||
private AtomicInteger killCounter = new AtomicInteger();
|
||||
|
||||
@BeforeClass
|
||||
public static void beforeSuperClass() {
|
||||
System.clearProperty("solr.httpclient.retries");
|
||||
System.clearProperty("solr.retries.on.forward");
|
||||
System.clearProperty("solr.retries.to.followers");
|
||||
}
|
||||
|
||||
@Test
|
||||
public void test() throws Exception {
|
||||
|
@ -100,7 +109,7 @@ public class ChaosMonkeyShardSplitTest extends ShardSplitTest {
|
|||
|
||||
// kill the leader
|
||||
CloudJettyRunner leaderJetty = shardToLeaderJetty.get("shard1");
|
||||
chaosMonkey.killJetty(leaderJetty);
|
||||
leaderJetty.jetty.stop();
|
||||
|
||||
Thread.sleep(2000);
|
||||
|
||||
|
@ -122,7 +131,7 @@ public class ChaosMonkeyShardSplitTest extends ShardSplitTest {
|
|||
}
|
||||
|
||||
// bring back dead node
|
||||
ChaosMonkey.start(deadJetty.jetty); // he is not the leader anymore
|
||||
deadJetty.jetty.start(); // he is not the leader anymore
|
||||
|
||||
waitTillRecovered();
|
||||
|
||||
|
@ -251,7 +260,7 @@ public class ChaosMonkeyShardSplitTest extends ShardSplitTest {
|
|||
LeaderElector overseerElector = new LeaderElector(zkClient);
|
||||
UpdateShardHandler updateShardHandler = new UpdateShardHandler(UpdateShardHandlerConfig.DEFAULT);
|
||||
// TODO: close Overseer
|
||||
Overseer overseer = new Overseer(new HttpShardHandlerFactory().getShardHandler(), updateShardHandler, "/admin/cores",
|
||||
Overseer overseer = new Overseer((HttpShardHandler) new HttpShardHandlerFactory().getShardHandler(), updateShardHandler, "/admin/cores",
|
||||
reader, null, new CloudConfig.CloudConfigBuilder("127.0.0.1", 8983, "solr").build());
|
||||
overseer.close();
|
||||
ElectionContext ec = new OverseerElectionContext(zkClient, overseer,
|
||||
|
|
|
@ -96,13 +96,13 @@ public class CleanupOldIndexTest extends SolrCloudTestCase {
|
|||
assertTrue(oldIndexDir2.isDirectory());
|
||||
|
||||
// bring shard replica down
|
||||
ChaosMonkey.stop(jetty);
|
||||
jetty.stop();
|
||||
|
||||
// wait a moment - lets allow some docs to be indexed so replication time is non 0
|
||||
Thread.sleep(waitTimes[random().nextInt(waitTimes.length - 1)]);
|
||||
|
||||
// bring shard replica up
|
||||
ChaosMonkey.start(jetty);
|
||||
jetty.start();
|
||||
|
||||
// make sure replication can start
|
||||
Thread.sleep(3000);
|
||||
|
|
|
@ -136,12 +136,12 @@ public class CloudTestUtils {
|
|||
boolean requireLeaders) {
|
||||
return (liveNodes, collectionState) -> {
|
||||
if (collectionState == null) {
|
||||
log.trace("-- null collection");
|
||||
log.info("-- null collection");
|
||||
return false;
|
||||
}
|
||||
Collection<Slice> slices = withInactive ? collectionState.getSlices() : collectionState.getActiveSlices();
|
||||
if (slices.size() != expectedShards) {
|
||||
log.trace("-- wrong number of slices, expected={}, found={}: {}", expectedShards, collectionState.getSlices().size(), collectionState.getSlices());
|
||||
log.info("-- wrong number of slices, expected={}, found={}: {}", expectedShards, collectionState.getSlices().size(), collectionState.getSlices());
|
||||
return false;
|
||||
}
|
||||
Set<String> leaderless = new HashSet<>();
|
||||
|
@ -160,14 +160,14 @@ public class CloudTestUtils {
|
|||
activeReplicas++;
|
||||
}
|
||||
if (activeReplicas != expectedReplicas) {
|
||||
log.trace("-- wrong number of active replicas in slice {}, expected={}, found={}", slice.getName(), expectedReplicas, activeReplicas);
|
||||
log.info("-- wrong number of active replicas in slice {}, expected={}, found={}", slice.getName(), expectedReplicas, activeReplicas);
|
||||
return false;
|
||||
}
|
||||
}
|
||||
if (leaderless.isEmpty()) {
|
||||
return true;
|
||||
} else {
|
||||
log.trace("-- shards without leaders: {}", leaderless);
|
||||
log.info("-- shards without leaders: {}", leaderless);
|
||||
return false;
|
||||
}
|
||||
};
|
||||
|
|
|
@ -22,6 +22,7 @@ import java.util.Map;
|
|||
import java.util.Set;
|
||||
|
||||
import org.apache.lucene.util.LuceneTestCase.Slow;
|
||||
import org.apache.solr.client.solrj.embedded.JettySolrRunner;
|
||||
import org.apache.solr.client.solrj.request.CollectionAdminRequest;
|
||||
import org.apache.solr.common.cloud.ClusterState;
|
||||
import org.apache.solr.common.cloud.DocCollection;
|
||||
|
@ -44,7 +45,6 @@ public class ClusterStateUpdateTest extends SolrCloudTestCase {
|
|||
configureCluster(3)
|
||||
.addConfig("conf", configset("cloud-minimal"))
|
||||
.configure();
|
||||
|
||||
}
|
||||
|
||||
@BeforeClass
|
||||
|
@ -112,7 +112,7 @@ public class ClusterStateUpdateTest extends SolrCloudTestCase {
|
|||
assertEquals(3, liveNodes.size());
|
||||
|
||||
// shut down node 2
|
||||
cluster.stopJettySolrRunner(2);
|
||||
JettySolrRunner j = cluster.stopJettySolrRunner(2);
|
||||
|
||||
// slight pause (15s timeout) for watch to trigger
|
||||
for(int i = 0; i < (5 * 15); i++) {
|
||||
|
@ -121,6 +121,8 @@ public class ClusterStateUpdateTest extends SolrCloudTestCase {
|
|||
}
|
||||
Thread.sleep(200);
|
||||
}
|
||||
|
||||
cluster.waitForJettyToStop(j);
|
||||
|
||||
assertEquals(2, zkController2.getClusterState().getLiveNodes().size());
|
||||
|
||||
|
|
Some files were not shown because too many files have changed in this diff Show More
Loading…
Reference in New Issue