ARTEMIS-1367 Add LOG to the Critical Analyzer policies

This commit is contained in:
Francesco Nigro 2017-08-22 17:37:01 +02:00
parent c54a26da3c
commit 249745506e
13 changed files with 152 additions and 71 deletions

View File

@ -66,7 +66,7 @@ ${ping-config.settings}${journal-buffer.settings}${connector-config.settings}
<critical-analyzer-check-period>60000</critical-analyzer-check-period>
<critical-analyzer-halt>true</critical-analyzer-halt>
<critical-analyzer-policy>HALT</critical-analyzer-policy>
${global-max-section}
<acceptors>

View File

@ -481,7 +481,7 @@ public final class ActiveMQDefaultConfiguration {
public static final long DEFAULT_ANALYZE_CRITICAL_TIMEOUT = 120000;
public static final boolean DEFAULT_ANALYZE_CRITICAL_HALT = false;
public static final CriticalAnalyzerPolicy DEFAULT_ANALYZE_CRITICAL_POLICY = CriticalAnalyzerPolicy.LOG;
/**
* If true then the ActiveMQ Artemis Server will make use of any Protocol Managers that are in available on the classpath. If false then only the core protocol will be available, unless in Embedded mode where users can inject their own Protocol Managers.
@ -1303,8 +1303,8 @@ public final class ActiveMQDefaultConfiguration {
return timeout / 2;
}
public static boolean getCriticalAnalyzerHalt() {
return DEFAULT_ANALYZE_CRITICAL_HALT;
public static CriticalAnalyzerPolicy getCriticalAnalyzerPolicy() {
return DEFAULT_ANALYZE_CRITICAL_POLICY;
}

View File

@ -0,0 +1,22 @@
/**
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.activemq.artemis.api.config;
public enum CriticalAnalyzerPolicy {
HALT, SHUTDOWN, LOG;
}

View File

@ -23,6 +23,7 @@ import java.util.Map;
import java.util.Properties;
import java.util.Set;
import org.apache.activemq.artemis.api.config.CriticalAnalyzerPolicy;
import org.apache.activemq.artemis.api.core.BroadcastGroupConfiguration;
import org.apache.activemq.artemis.api.core.DiscoveryGroupConfiguration;
import org.apache.activemq.artemis.api.core.SimpleString;
@ -87,9 +88,9 @@ public interface Configuration {
Configuration setCriticalAnalyzerCheckPeriod(long checkPeriod);
boolean isCriticalAnalyzerHalt();
CriticalAnalyzerPolicy getCriticalAnalyzerPolicy();
Configuration setCriticalAnalyzerHalt(boolean halt);
Configuration setCriticalAnalyzerPolicy(CriticalAnalyzerPolicy policy);
/**

View File

@ -41,6 +41,7 @@ import java.util.Set;
import java.util.concurrent.CopyOnWriteArrayList;
import org.apache.activemq.artemis.api.config.ActiveMQDefaultConfiguration;
import org.apache.activemq.artemis.api.config.CriticalAnalyzerPolicy;
import org.apache.activemq.artemis.api.core.BroadcastGroupConfiguration;
import org.apache.activemq.artemis.api.core.DiscoveryGroupConfiguration;
import org.apache.activemq.artemis.api.core.SimpleString;
@ -296,7 +297,7 @@ public class ConfigurationImpl implements Configuration, Serializable {
private boolean criticalAnalyzer = ActiveMQDefaultConfiguration.getCriticalAnalyzer();
private boolean criticalAnalyzerHalt = ActiveMQDefaultConfiguration.getCriticalAnalyzerHalt();
private CriticalAnalyzerPolicy criticalAnalyzerPolicy = ActiveMQDefaultConfiguration.getCriticalAnalyzerPolicy();
private long criticalAnalyzerTimeout = ActiveMQDefaultConfiguration.getCriticalAnalyzerTimeout();
@ -2109,13 +2110,13 @@ public class ConfigurationImpl implements Configuration, Serializable {
}
@Override
public boolean isCriticalAnalyzerHalt() {
return criticalAnalyzerHalt;
public CriticalAnalyzerPolicy getCriticalAnalyzerPolicy() {
return criticalAnalyzerPolicy;
}
@Override
public Configuration setCriticalAnalyzerHalt(boolean halt) {
this.criticalAnalyzerHalt = halt;
public Configuration setCriticalAnalyzerPolicy(CriticalAnalyzerPolicy policy) {
this.criticalAnalyzerPolicy = policy;
return this;
}

View File

@ -31,6 +31,7 @@ import java.util.Set;
import org.apache.activemq.artemis.ArtemisConstants;
import org.apache.activemq.artemis.api.config.ActiveMQDefaultConfiguration;
import org.apache.activemq.artemis.api.config.CriticalAnalyzerPolicy;
import org.apache.activemq.artemis.api.core.BroadcastEndpointFactory;
import org.apache.activemq.artemis.api.core.BroadcastGroupConfiguration;
import org.apache.activemq.artemis.api.core.DiscoveryGroupConfiguration;
@ -614,7 +615,7 @@ public final class FileConfigurationParser extends XMLConfigurationUtil {
config.setCriticalAnalyzerCheckPeriod(getLong(e, "critical-analyzer-check-period", config.getCriticalAnalyzerCheckPeriod(), Validators.GE_ZERO));
config.setCriticalAnalyzerHalt(getBoolean(e, "critical-analyzer-halt", config.isCriticalAnalyzerHalt()));
config.setCriticalAnalyzerPolicy(CriticalAnalyzerPolicy.valueOf(getString(e, "critical-analyzer-policy", config.getCriticalAnalyzerPolicy().name(), Validators.NOT_NULL_OR_EMPTY)));
parseAddressSettings(e, config);

View File

@ -1639,6 +1639,10 @@ public interface ActiveMQServerLogger extends BasicLogger {
@Message(id = 224080, value = "The server process will now be stopped, as component {0} is not responsive", format = Message.Format.MESSAGE_FORMAT)
void criticalSystemShutdown(Object component);
@LogMessage(level = Logger.Level.WARN)
@Message(id = 224081, value = "The component {0} is not responsive", format = Message.Format.MESSAGE_FORMAT)
void criticalSystemLog(Object component);
@LogMessage(level = Logger.Level.INFO)
@Message(id = 224076, value = "UnDeploying address {0}", format = Message.Format.MESSAGE_FORMAT)
void undeployAddress(SimpleString addressName);

View File

@ -49,6 +49,7 @@ import java.util.concurrent.atomic.AtomicInteger;
import java.util.stream.Collectors;
import org.apache.activemq.artemis.api.config.ActiveMQDefaultConfiguration;
import org.apache.activemq.artemis.api.config.CriticalAnalyzerPolicy;
import org.apache.activemq.artemis.api.core.ActiveMQDeleteAddressException;
import org.apache.activemq.artemis.api.core.ActiveMQException;
import org.apache.activemq.artemis.api.core.Pair;
@ -171,9 +172,9 @@ import org.apache.activemq.artemis.utils.TimeUtils;
import org.apache.activemq.artemis.utils.VersionLoader;
import org.apache.activemq.artemis.utils.actors.OrderedExecutorFactory;
import org.apache.activemq.artemis.utils.collections.ConcurrentHashSet;
import org.apache.activemq.artemis.utils.critical.CriticalAction;
import org.apache.activemq.artemis.utils.critical.CriticalAnalyzer;
import org.apache.activemq.artemis.utils.critical.CriticalAnalyzerImpl;
import org.apache.activemq.artemis.utils.critical.CriticalComponent;
import org.apache.activemq.artemis.utils.critical.EmptyCriticalAnalyzer;
import org.jboss.logging.Logger;
@ -511,58 +512,95 @@ public class ActiveMQServerImpl implements ActiveMQServer {
this.getCriticalAnalyzer().start();
}
this.getCriticalAnalyzer().addAction((CriticalComponent c) -> {
CriticalAction criticalAction = null;
final CriticalAnalyzerPolicy criticalAnalyzerPolicy = configuration.getCriticalAnalyzerPolicy();
switch (criticalAnalyzerPolicy) {
if (configuration.isCriticalAnalyzerHalt()) {
ActiveMQServerLogger.LOGGER.criticalSystemHalt(c);
} else {
ActiveMQServerLogger.LOGGER.criticalSystemShutdown(c);
}
case HALT:
criticalAction = criticalComponent -> {
threadDump();
ActiveMQServerLogger.LOGGER.criticalSystemHalt(criticalComponent);
// on the case of a critical failure, -1 cannot simply means forever.
// in case graceful is -1, we will set it to 30 seconds
long timeout = configuration.getGracefulShutdownTimeout() < 0 ? 30000 : configuration.getGracefulShutdownTimeout();
threadDump();
Thread notificationSender = new Thread() {
@Override
public void run() {
try {
callBrokerPlugins(hasBrokerPlugins() ? plugin -> plugin.criticalFailure(c) : null);
} catch (Throwable e) {
logger.warn(e.getMessage(), e);
}
}
};
// on the case of a critical failure, -1 cannot simply means forever.
// in case graceful is -1, we will set it to 30 seconds
long timeout = configuration.getGracefulShutdownTimeout() < 0 ? 30000 : configuration.getGracefulShutdownTimeout();
// I'm using a different thread here as we need to manage timeouts
notificationSender.start();
try {
notificationSender.join(timeout);
} catch (InterruptedException ignored) {
}
if (configuration.isCriticalAnalyzerHalt()) {
Runtime.getRuntime().halt(70); // Linux systems will have /usr/include/sysexits.h showing 70 as internal software error
} else {
// you can't stop from the check thread,
// nor can use an executor
Thread stopThread = new Thread() {
@Override
public void run() {
try {
ActiveMQServerImpl.this.stop();
} catch (Throwable e) {
logger.warn(e.getMessage(), e);
Thread notificationSender = new Thread() {
@Override
public void run() {
try {
callBrokerPlugins(hasBrokerPlugins() ? plugin -> plugin.criticalFailure(criticalComponent) : null);
} catch (Throwable e) {
logger.warn(e.getMessage(), e);
}
}
}
};
stopThread.start();
};
}
});
// I'm using a different thread here as we need to manage timeouts
notificationSender.start();
try {
notificationSender.join(timeout);
} catch (InterruptedException ignored) {
}
Runtime.getRuntime().halt(70); // Linux systems will have /usr/include/sysexits.h showing 70 as internal software error
};
break;
case SHUTDOWN:
criticalAction = criticalComponent -> {
ActiveMQServerLogger.LOGGER.criticalSystemShutdown(criticalComponent);
threadDump();
// on the case of a critical failure, -1 cannot simply means forever.
// in case graceful is -1, we will set it to 30 seconds
long timeout = configuration.getGracefulShutdownTimeout() < 0 ? 30000 : configuration.getGracefulShutdownTimeout();
Thread notificationSender = new Thread() {
@Override
public void run() {
try {
callBrokerPlugins(hasBrokerPlugins() ? plugin -> plugin.criticalFailure(criticalComponent) : null);
} catch (Throwable e) {
logger.warn(e.getMessage(), e);
}
}
};
// I'm using a different thread here as we need to manage timeouts
notificationSender.start();
try {
notificationSender.join(timeout);
} catch (InterruptedException ignored) {
}
// you can't stop from the check thread,
// nor can use an executor
Thread stopThread = new Thread() {
@Override
public void run() {
try {
ActiveMQServerImpl.this.stop();
} catch (Throwable e) {
logger.warn(e.getMessage(), e);
}
}
};
stopThread.start();
};
break;
case LOG:
criticalAction = ActiveMQServerLogger.LOGGER::criticalSystemLog;
break;
}
this.getCriticalAnalyzer().addAction(criticalAction);
configuration.parseSystemProperties();

View File

@ -771,7 +771,7 @@
<xsd:element name="critical-analyzer" type="xsd:boolean" default="true" maxOccurs="1" minOccurs="0">
<xsd:annotation>
<xsd:documentation>
should analyze response time on critical paths and decide for broker shutdown or halt.
should analyze response time on critical paths and decide for broker log, shutdown or halt.
</xsd:documentation>
</xsd:annotation>
</xsd:element>
@ -792,12 +792,19 @@
</xsd:annotation>
</xsd:element>
<xsd:element name="critical-analyzer-halt" type="xsd:boolean" default="false" maxOccurs="1" minOccurs="0">
<xsd:element name="critical-analyzer-policy" default="LOG" maxOccurs="1" minOccurs="0">
<xsd:annotation>
<xsd:documentation>
Should the server be shutdown or halted upon critical analysis failure.
Should the server log, be shutdown or halted upon critical analysis failure.
</xsd:documentation>
</xsd:annotation>
<xsd:simpleType>
<xsd:restriction base="xsd:string">
<xsd:enumeration value="LOG"/>
<xsd:enumeration value="HALT"/>
<xsd:enumeration value="SHUTDOWN"/>
</xsd:restriction>
</xsd:simpleType>
</xsd:element>
<xsd:element name="security-settings" maxOccurs="1" minOccurs="0">

View File

@ -29,6 +29,7 @@ import java.util.Map;
import java.util.Set;
import org.apache.activemq.artemis.api.config.ActiveMQDefaultConfiguration;
import org.apache.activemq.artemis.api.config.CriticalAnalyzerPolicy;
import org.apache.activemq.artemis.api.core.BroadcastGroupConfiguration;
import org.apache.activemq.artemis.api.core.DiscoveryGroupConfiguration;
import org.apache.activemq.artemis.api.core.RoutingType;
@ -385,7 +386,7 @@ public class FileConfigurationTest extends ConfigurationImplTest {
assertEquals(333, conf.getCriticalAnalyzerCheckPeriod());
assertEquals(777, conf.getCriticalAnalyzerTimeout());
assertEquals(false, conf.isCriticalAnalyzer());
assertEquals(true, conf.isCriticalAnalyzerHalt());
assertEquals(CriticalAnalyzerPolicy.HALT, conf.getCriticalAnalyzerPolicy());
assertEquals(false, conf.isJournalDatasync());
}

View File

@ -57,7 +57,7 @@
<global-max-size>1234567</global-max-size>
<max-disk-usage>37</max-disk-usage>
<disk-scan-period>123</disk-scan-period>
<critical-analyzer-halt>true</critical-analyzer-halt>
<critical-analyzer-policy>HALT</critical-analyzer-policy>
<critical-analyzer-check-period>333</critical-analyzer-check-period>
<critical-analyzer-timeout>777</critical-analyzer-timeout>
<critical-analyzer>false</critical-analyzer>

View File

@ -121,7 +121,7 @@ system-property-prefix | Prefix for replacing configuration settings using Bean
[critical-analyzer](critical-analysis.md) | Enable or disable the critical analysis (default true)
[critical-analyzer-timeout](critical-analysis.md) | Timeout used to do the critical analysis (default 120000 milliseconds)
[critical-analyzer-check-period](critical-analysis.md) | Time used to check the response times (default half of critical-analyzer-timeout)
[critical-analyzer-halt](critical-analysis.md) | Should the VM be halted upon failures (default false)
[critical-analyzer-policy](critical-analysis.md) | Should the server log, be halted or shutdown upon failures (default `LOG`)
#address-setting type

View File

@ -26,28 +26,34 @@ Name | Description
critical-analyzer | Enable or disable the critical analysis (default true)
critical-analyzer-timeout | Timeout used to do the critical analysis (default 120000 milliseconds)
critical-analyzer-check-period | Time used to check the response times (default half of critical-analyzer-timeout)
critical-analyzer-halt | Should the VM be halted upon failures (default false)
critical-analyzer-policy | Should the server log, be halted or shutdown upon failures (default `LOG`)
The default for critical-analyzer-halt is false, however the generated broker.xml will have it set to true. That is because we cannot halt the VM if you are embedding ActiveMQ Artemis into an application server or on a multi tenant environment.
The default for critical-analyzer-policy is `LOG`, however the generated broker.xml will have it set to `HALT`. That is because we cannot halt the VM if you are embedding ActiveMQ Artemis into an application server or on a multi tenant environment.
The broker on the distribution will then have it set to true, but if you use it in any other way the default will be false.
The broker on the distribution will then have it set to `HALT`, but if you use it in any other way the default will be `LOG`.
## What would you expect
- You will see some logs
If you have critical-analyzer-halt=true
If you have critical-analyzer-policy=HALT
```
[Artemis Critical Analyzer] 18:10:00,831 ERROR [org.apache.activemq.artemis.core.server] AMQ224079: The process for the virtual machine will be killed, as component org.apache.activemq.artemis.tests.integration.critical.CriticalSimpleTest$2@5af97850 is not responsive
```
Or if you have critical-analyzer-halt=false
While if you have critical-analyzer-policy=SHUTDOWN
```
[Artemis Critical Analyzer] 18:07:53,475 ERROR [org.apache.activemq.artemis.core.server] AMQ224080: The server process will now be stopped, as component org.apache.activemq.artemis.tests.integration.critical.CriticalSimpleTest$2@5af97850 is not responsive
```
Or if you have critical-analyzer-policy=LOG
```
[Artemis Critical Analyzer] 18:11:52,145 WARN [org.apache.activemq.artemis.core.server] AMQ224081: The component org.apache.activemq.artemis.tests.integration.critical.CriticalSimpleTest$2@5af97850 is not responsive
```
You will see a simple thread dump of the server
```
@ -77,9 +83,9 @@ AMQ119003: End Thread dump
```
- The Server will be halted if configured to halt
- The Server will be halted if configured to `HALT`
- The system will be stopped if no halt is used:
- The system will be stopped if `SHUTDOWN` is used:
* Notice that if the system is not behaving well, there is no guarantees the stop will work.