mirror of
https://github.com/apache/lucene.git
synced 2025-02-22 10:15:27 +00:00
SOLR-11126: New Node-level health check handler at /admin/info/healthcheck and /node/health paths that checks if the node is live, connected to zookeeper and not shutdown
This commit is contained in:
parent
46592e981f
commit
2bd6f246b0
@ -185,6 +185,9 @@ New Features
|
||||
|
||||
* SOLR-7896: Add a login page to Admin UI, with initial support for Basic Auth (janhoy)
|
||||
|
||||
* SOLR-11126: New Node-level health check handler at /admin/info/healthcheck and /node/health paths that
|
||||
checks if the node is live, connected to zookeeper and not shutdown. (Anshum Gupta, Amrit Sarkar, shalin)
|
||||
|
||||
Bug Fixes
|
||||
----------------------
|
||||
|
||||
|
@ -21,7 +21,6 @@ import static org.apache.solr.common.params.CommonParams.AUTHC_PATH;
|
||||
import static org.apache.solr.common.params.CommonParams.AUTHZ_PATH;
|
||||
import static org.apache.solr.common.params.CommonParams.AUTOSCALING_HISTORY_PATH;
|
||||
import static org.apache.solr.common.params.CommonParams.COLLECTIONS_HANDLER_PATH;
|
||||
import static org.apache.solr.common.params.CommonParams.HEALTH_CHECK_HANDLER_PATH;
|
||||
import static org.apache.solr.common.params.CommonParams.CONFIGSETS_HANDLER_PATH;
|
||||
import static org.apache.solr.common.params.CommonParams.CORES_HANDLER_PATH;
|
||||
import static org.apache.solr.common.params.CommonParams.INFO_HANDLER_PATH;
|
||||
@ -576,7 +575,6 @@ public class CoreContainer {
|
||||
createHandler(ZK_PATH, ZookeeperInfoHandler.class.getName(), ZookeeperInfoHandler.class);
|
||||
createHandler(ZK_STATUS_PATH, ZookeeperStatusHandler.class.getName(), ZookeeperStatusHandler.class);
|
||||
collectionsHandler = createHandler(COLLECTIONS_HANDLER_PATH, cfg.getCollectionsHandlerClass(), CollectionsHandler.class);
|
||||
healthCheckHandler = createHandler(HEALTH_CHECK_HANDLER_PATH, cfg.getHealthCheckHandlerClass(), HealthCheckHandler.class);
|
||||
infoHandler = createHandler(INFO_HANDLER_PATH, cfg.getInfoHandlerClass(), InfoHandler.class);
|
||||
coreAdminHandler = createHandler(CORES_HANDLER_PATH, cfg.getCoreAdminHandlerClass(), CoreAdminHandler.class);
|
||||
configSetsHandler = createHandler(CONFIGSETS_HANDLER_PATH, cfg.getConfigSetsHandlerClass(), ConfigSetsHandler.class);
|
||||
|
@ -18,7 +18,6 @@
|
||||
package org.apache.solr.handler.admin;
|
||||
|
||||
import java.lang.invoke.MethodHandles;
|
||||
|
||||
import org.apache.solr.common.SolrException;
|
||||
import org.apache.solr.common.cloud.ClusterState;
|
||||
import org.apache.solr.common.cloud.ZkStateReader;
|
||||
@ -30,6 +29,7 @@ import org.apache.solr.response.SolrQueryResponse;
|
||||
import org.slf4j.Logger;
|
||||
import org.slf4j.LoggerFactory;
|
||||
|
||||
|
||||
import static org.apache.solr.common.params.CommonParams.FAILURE;
|
||||
import static org.apache.solr.common.params.CommonParams.OK;
|
||||
import static org.apache.solr.common.params.CommonParams.STATUS;
|
||||
@ -38,8 +38,9 @@ import static org.apache.solr.common.params.CommonParams.STATUS;
|
||||
* Health Check Handler for reporting the health of a specific node.
|
||||
*
|
||||
* This checks if the node is:
|
||||
* 1. Connected to zookeeper
|
||||
* 2. listed in 'live_nodes'.
|
||||
* 1. Cores container is active.
|
||||
* 1. Connected to zookeeper.
|
||||
* 2. Listed in 'live_nodes' in zookeeper.
|
||||
*/
|
||||
public class HealthCheckHandler extends RequestHandlerBase {
|
||||
|
||||
@ -47,6 +48,8 @@ public class HealthCheckHandler extends RequestHandlerBase {
|
||||
|
||||
CoreContainer coreContainer;
|
||||
|
||||
public HealthCheckHandler() {}
|
||||
|
||||
public HealthCheckHandler(final CoreContainer coreContainer) {
|
||||
super();
|
||||
this.coreContainer = coreContainer;
|
||||
@ -54,7 +57,6 @@ public class HealthCheckHandler extends RequestHandlerBase {
|
||||
|
||||
@Override
|
||||
final public void init(NamedList args) {
|
||||
|
||||
}
|
||||
|
||||
public CoreContainer getCoreContainer() {
|
||||
@ -67,8 +69,9 @@ public class HealthCheckHandler extends RequestHandlerBase {
|
||||
log.debug("Invoked HealthCheckHandler on [{}]", coreContainer.getZkController().getNodeName());
|
||||
CoreContainer cores = getCoreContainer();
|
||||
|
||||
if(cores == null) {
|
||||
rsp.setException(new SolrException(SolrException.ErrorCode.SERVER_ERROR, "Core container not initialized"));
|
||||
// Core container should not be null and active (redundant check)
|
||||
if(cores == null || cores.isShutDown()) {
|
||||
rsp.setException(new SolrException(SolrException.ErrorCode.SERVER_ERROR, "CoreContainer is either not initialized or shutting down"));
|
||||
return;
|
||||
}
|
||||
if(!cores.isZooKeeperAware()) {
|
||||
@ -94,8 +97,6 @@ public class HealthCheckHandler extends RequestHandlerBase {
|
||||
}
|
||||
|
||||
rsp.setHttpCaching(false);
|
||||
|
||||
return;
|
||||
}
|
||||
|
||||
@Override
|
||||
@ -107,4 +108,9 @@ public class HealthCheckHandler extends RequestHandlerBase {
|
||||
public Category getCategory() {
|
||||
return Category.ADMIN;
|
||||
}
|
||||
|
||||
@Override
|
||||
public Boolean registerV2() {
|
||||
return Boolean.TRUE;
|
||||
}
|
||||
}
|
||||
|
@ -50,6 +50,8 @@ public class InfoHandler extends RequestHandlerBase {
|
||||
handlers.put("properties", new PropertiesRequestHandler());
|
||||
handlers.put("logging", new LoggingHandler(coreContainer));
|
||||
handlers.put("system", new SystemInfoHandler(coreContainer));
|
||||
handlers.put("health", new HealthCheckHandler(coreContainer));
|
||||
|
||||
}
|
||||
|
||||
|
||||
|
@ -91,6 +91,10 @@
|
||||
"class": "solr.LoggingHandler",
|
||||
"useParams":"_ADMIN_LOGGING"
|
||||
},
|
||||
"/admin/health": {
|
||||
"class": "solr.HealthCheckHandler",
|
||||
"useParams":"_ADMIN_HEALTH"
|
||||
},
|
||||
"/admin/file": {
|
||||
"class": "solr.ShowFileRequestHandler",
|
||||
"useParams":"_ADMIN_FILE"
|
||||
|
@ -24,13 +24,16 @@ import org.apache.solr.client.solrj.SolrResponse;
|
||||
import org.apache.solr.client.solrj.SolrServerException;
|
||||
import org.apache.solr.client.solrj.embedded.JettySolrRunner;
|
||||
import org.apache.solr.client.solrj.impl.HttpSolrClient;
|
||||
import org.apache.solr.client.solrj.request.CollectionAdminRequest;
|
||||
import org.apache.solr.client.solrj.request.GenericSolrRequest;
|
||||
import org.apache.solr.client.solrj.request.HealthCheckRequest;
|
||||
import org.apache.solr.client.solrj.request.V2Request;
|
||||
import org.apache.solr.client.solrj.response.CollectionAdminResponse;
|
||||
import org.apache.solr.client.solrj.response.HealthCheckResponse;
|
||||
import org.apache.solr.client.solrj.response.V2Response;
|
||||
import org.apache.solr.common.SolrException;
|
||||
import org.apache.solr.common.params.CommonParams;
|
||||
import org.apache.solr.common.params.ModifiableSolrParams;
|
||||
|
||||
import org.junit.BeforeClass;
|
||||
import org.junit.Test;
|
||||
|
||||
@ -53,42 +56,78 @@ public class HealthCheckHandlerTest extends SolrCloudTestCase {
|
||||
// as compared with testHealthCheckHandlerWithCloudClient
|
||||
// (Not sure if that's actaully a good thing -- but it's how the existing test worked)
|
||||
assertEquals(CommonParams.OK,
|
||||
req.process(cluster.getSolrClient()).getResponse().get(CommonParams.STATUS));
|
||||
|
||||
req.process(cluster.getSolrClient()).getResponse().get(CommonParams.STATUS));
|
||||
|
||||
// positive check that our exiting "healthy" node works with direct http client
|
||||
try (HttpSolrClient httpSolrClient = getHttpSolrClient(cluster.getJettySolrRunner(0).getBaseUrl().toString())) {
|
||||
SolrResponse response = req.process(httpSolrClient);
|
||||
assertEquals(CommonParams.OK, response.getResponse().get(CommonParams.STATUS));
|
||||
}
|
||||
|
||||
// successfully create a dummy collection
|
||||
try (HttpSolrClient httpSolrClient = getHttpSolrClient(cluster.getJettySolrRunner(0).getBaseUrl().toString())) {
|
||||
CollectionAdminResponse collectionAdminResponse = CollectionAdminRequest.createCollection("test", "_default", 1, 1)
|
||||
.withProperty("solr.directoryFactory", "solr.StandardDirectoryFactory")
|
||||
.process(httpSolrClient);
|
||||
assertEquals(0, collectionAdminResponse.getStatus());
|
||||
SolrResponse response = req.process(httpSolrClient);
|
||||
assertEquals(CommonParams.OK, response.getResponse().get(CommonParams.STATUS));
|
||||
} finally {
|
||||
cluster.deleteAllCollections();
|
||||
cluster.deleteAllConfigSets();
|
||||
}
|
||||
|
||||
// add a new node for the purpose of negative testing
|
||||
JettySolrRunner newJetty = cluster.startJettySolrRunner();
|
||||
try (HttpSolrClient httpSolrClient = getHttpSolrClient(newJetty.getBaseUrl().toString())) {
|
||||
|
||||
|
||||
// postive check that our (new) "healthy" node works with direct http client
|
||||
assertEquals(CommonParams.OK, req.process(httpSolrClient).getResponse().get(CommonParams.STATUS));
|
||||
|
||||
|
||||
// now "break" our (new) node
|
||||
newJetty.getCoreContainer().getZkController().getZkClient().close();
|
||||
|
||||
|
||||
// negative check of our (new) "broken" node that we deliberately put into an unhealth state
|
||||
HttpSolrClient.RemoteSolrException e = expectThrows(HttpSolrClient.RemoteSolrException.class, () ->
|
||||
{
|
||||
req.process(httpSolrClient);
|
||||
});
|
||||
{
|
||||
req.process(httpSolrClient);
|
||||
});
|
||||
assertTrue(e.getMessage(), e.getMessage().contains("Host Unavailable"));
|
||||
assertEquals(SolrException.ErrorCode.SERVICE_UNAVAILABLE.code, e.code());
|
||||
} finally {
|
||||
newJetty.stop();
|
||||
}
|
||||
|
||||
// add a new node for the purpose of negative testing
|
||||
// negative check that if core container is not available at the node
|
||||
newJetty = cluster.startJettySolrRunner();
|
||||
try (HttpSolrClient httpSolrClient = getHttpSolrClient(newJetty.getBaseUrl().toString())) {
|
||||
|
||||
// postive check that our (new) "healthy" node works with direct http client
|
||||
assertEquals(CommonParams.OK, req.process(httpSolrClient).getResponse().get(CommonParams.STATUS));
|
||||
|
||||
// shutdown the core container of new node
|
||||
newJetty.getCoreContainer().shutdown();
|
||||
|
||||
// api shouldn't unreachable
|
||||
SolrException thrown = expectThrows(SolrException.class, () -> {
|
||||
req.process(httpSolrClient).getResponse().get(CommonParams.STATUS);
|
||||
fail("API shouldn't be available, and fail at above request");
|
||||
});
|
||||
assertEquals("Exception code should be 404", 404, thrown.code());
|
||||
assertTrue("Should have seen an exception containing the an error", thrown.getMessage().contains(
|
||||
"Error processing the request. CoreContainer is either not initialized or shutting down."));
|
||||
} finally {
|
||||
newJetty.stop();
|
||||
}
|
||||
|
||||
// (redundent) positive check that our (previously) exiting "healthy" node (still) works
|
||||
// after getting negative results from our broken node
|
||||
// after getting negative results from our broken node and failed core container
|
||||
try (HttpSolrClient httpSolrClient = getHttpSolrClient(cluster.getJettySolrRunner(0).getBaseUrl().toString())) {
|
||||
|
||||
assertEquals(CommonParams.OK, req.process(httpSolrClient).getResponse().get(CommonParams.STATUS));
|
||||
}
|
||||
|
||||
|
||||
}
|
||||
|
||||
@Test
|
||||
@ -101,11 +140,40 @@ public class HealthCheckHandlerTest extends SolrCloudTestCase {
|
||||
}
|
||||
}
|
||||
|
||||
@Test (expected = AssertionError.class)
|
||||
@Test(expected = AssertionError.class)
|
||||
public void testHealthCheckHandlerWithCloudClient() throws IOException, SolrServerException {
|
||||
// negative check of a HealthCheckRequest using cloud solr client
|
||||
HealthCheckRequest req = new HealthCheckRequest();
|
||||
req.process(cluster.getSolrClient());
|
||||
}
|
||||
|
||||
}
|
||||
@Test
|
||||
public void testHealthCheckV2Api() throws Exception {
|
||||
V2Response res = new V2Request.Builder("/node/health").build().process(cluster.getSolrClient());
|
||||
assertEquals(0, res.getStatus());
|
||||
assertEquals(CommonParams.OK, res.getResponse().get(CommonParams.STATUS));
|
||||
|
||||
// add a new node for the purpose of negative testing
|
||||
JettySolrRunner newJetty = cluster.startJettySolrRunner();
|
||||
try (HttpSolrClient httpSolrClient = getHttpSolrClient(newJetty.getBaseUrl().toString())) {
|
||||
|
||||
// postive check that our (new) "healthy" node works with direct http client
|
||||
assertEquals(CommonParams.OK, new V2Request.Builder("/node/health").build().process(httpSolrClient).
|
||||
getResponse().get(CommonParams.STATUS));
|
||||
|
||||
// now "break" our (new) node
|
||||
newJetty.getCoreContainer().getZkController().getZkClient().close();
|
||||
|
||||
// negative check of our (new) "broken" node that we deliberately put into an unhealth state
|
||||
HttpSolrClient.RemoteSolrException e = expectThrows(HttpSolrClient.RemoteSolrException.class, () ->
|
||||
{
|
||||
new V2Request.Builder("/node/health").build().process(httpSolrClient);
|
||||
});
|
||||
assertTrue(e.getMessage(), e.getMessage().contains("Host Unavailable"));
|
||||
assertEquals(SolrException.ErrorCode.SERVICE_UNAVAILABLE.code, e.code());
|
||||
} finally {
|
||||
newJetty.stop();
|
||||
}
|
||||
}
|
||||
|
||||
}
|
@ -87,6 +87,7 @@ public class SolrCoreTest extends SolrTestCaseJ4 {
|
||||
|
||||
int ihCount = 0;
|
||||
{
|
||||
++ihCount; assertEquals(pathToClassMap.get("/admin/health"), "solr.HealthCheckHandler");
|
||||
++ihCount; assertEquals(pathToClassMap.get("/admin/file"), "solr.ShowFileRequestHandler");
|
||||
++ihCount; assertEquals(pathToClassMap.get("/admin/logging"), "solr.LoggingHandler");
|
||||
++ihCount; assertEquals(pathToClassMap.get("/admin/luke"), "solr.LukeRequestHandler");
|
||||
|
@ -128,6 +128,18 @@ Threads:: Return info on all JVM threads.
|
||||
v2: `api/node/threads` |{solr-javadocs}/solr-core/org/apache/solr/handler/admin/ThreadDumpHandler.html[ThreadDumpHandler] |`_ADMIN_THREADS`
|
||||
|===
|
||||
|
||||
Health:: Reporting the health of the node (_available only in Solrcloud mode_)
|
||||
+
|
||||
[cols="3*.",frame=none,grid=cols,options="header"]
|
||||
|===
|
||||
|API Endpoints |Class & Javadocs |Paramset
|
||||
|v1: `solr/admin/info/health`
|
||||
|
||||
v2: `api/node/health` |{solr-javadocs}/solr-core/org/apache/solr/handler/admin/HealthCheckHandler.html[HealthCheckHandler] |`_ADMIN_HEALTH`
|
||||
|===
|
||||
|
||||
This endpoint can also take the collection or core name in the path (`solr/<collection>/admin/health` or `solr/<core>/admin/health`).
|
||||
|
||||
=== Analysis Handlers
|
||||
|
||||
[horizontal]
|
||||
|
@ -176,8 +176,8 @@ public interface CommonParams {
|
||||
String OMIT_HEADER = "omitHeader";
|
||||
String CORES_HANDLER_PATH = "/admin/cores";
|
||||
String COLLECTIONS_HANDLER_PATH = "/admin/collections";
|
||||
String HEALTH_CHECK_HANDLER_PATH = "/admin/health";
|
||||
String INFO_HANDLER_PATH = "/admin/info";
|
||||
String HEALTH_CHECK_HANDLER_PATH = INFO_HANDLER_PATH + "/health";
|
||||
String CONFIGSETS_HANDLER_PATH = "/admin/configs";
|
||||
String AUTHZ_PATH = "/admin/authorization";
|
||||
String AUTHC_PATH = "/admin/authentication";
|
||||
|
@ -1,11 +1,12 @@
|
||||
{
|
||||
"description": "Provides information about system properties, threads, logging settings, and system details for a node.",
|
||||
"description": "Provides information about system properties, threads, logging settings, system details and health (available in Solrcloud mode) for a node.",
|
||||
"methods": ["GET"],
|
||||
"url": {
|
||||
"paths": [
|
||||
"/node/properties",
|
||||
"/node/threads",
|
||||
"/node/logging",
|
||||
"/node/system"]
|
||||
"/node/system",
|
||||
"/node/health"]
|
||||
}
|
||||
}
|
||||
|
Loading…
x
Reference in New Issue
Block a user