Restart running monitors if no heartbeat (#3952)

This commit is contained in:
Louis Lam 2023-11-01 09:36:12 +08:00 committed by GitHub
parent 9f170a68d7
commit c43223a16d
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
2 changed files with 106 additions and 1 deletions

View File

@ -3,7 +3,7 @@ const dayjs = require("dayjs");
const axios = require("axios"); const axios = require("axios");
const { Prometheus } = require("../prometheus"); const { Prometheus } = require("../prometheus");
const { log, UP, DOWN, PENDING, MAINTENANCE, flipStatus, TimeLogger, MAX_INTERVAL_SECOND, MIN_INTERVAL_SECOND, const { log, UP, DOWN, PENDING, MAINTENANCE, flipStatus, TimeLogger, MAX_INTERVAL_SECOND, MIN_INTERVAL_SECOND,
SQL_DATETIME_FORMAT SQL_DATETIME_FORMAT, isDev, sleep, getRandomInt
} = require("../../src/util"); } = require("../../src/util");
const { tcping, ping, dnsResolve, checkCertificate, checkStatusCode, getTotalClientInRoom, setting, mssqlQuery, postgresQuery, mysqlQuery, mqttAsync, setSetting, httpNtlm, radius, grpcQuery, const { tcping, ping, dnsResolve, checkCertificate, checkStatusCode, getTotalClientInRoom, setting, mssqlQuery, postgresQuery, mysqlQuery, mqttAsync, setSetting, httpNtlm, radius, grpcQuery,
redisPingAsync, mongodbPing, kafkaProducerAsync, getOidcTokenClientCredentials, rootCertificatesFingerprints redisPingAsync, mongodbPing, kafkaProducerAsync, getOidcTokenClientCredentials, rootCertificatesFingerprints
@ -328,6 +328,16 @@ class Monitor extends BeanModel {
} }
} }
// Evil
if (isDev) {
if (process.env.EVIL_RANDOM_MONITOR_SLEEP === "SURE") {
if (getRandomInt(0, 100) === 0) {
log.debug("evil", `[${this.name}] Evil mode: Random sleep: ` + beatInterval * 10000);
await sleep(beatInterval * 10000);
}
}
}
// Expose here for prometheus update // Expose here for prometheus update
// undefined if not https // undefined if not https
let tlsInfo = undefined; let tlsInfo = undefined;
@ -995,6 +1005,7 @@ class Monitor extends BeanModel {
if (! this.isStop) { if (! this.isStop) {
log.debug("monitor", `[${this.name}] SetTimeout for next check.`); log.debug("monitor", `[${this.name}] SetTimeout for next check.`);
this.heartbeatInterval = setTimeout(safeBeat, beatInterval * 1000); this.heartbeatInterval = setTimeout(safeBeat, beatInterval * 1000);
this.lastScheduleBeatTime = dayjs();
} else { } else {
log.info("monitor", `[${this.name}] isStop = true, no next check.`); log.info("monitor", `[${this.name}] isStop = true, no next check.`);
} }
@ -1004,7 +1015,9 @@ class Monitor extends BeanModel {
/** Get a heartbeat and handle errors */ /** Get a heartbeat and handle errors */
const safeBeat = async () => { const safeBeat = async () => {
try { try {
this.lastStartBeatTime = dayjs();
await beat(); await beat();
this.lastEndBeatTime = dayjs();
} catch (e) { } catch (e) {
console.trace(e); console.trace(e);
UptimeKumaServer.errorLog(e, false); UptimeKumaServer.errorLog(e, false);
@ -1013,6 +1026,9 @@ class Monitor extends BeanModel {
if (! this.isStop) { if (! this.isStop) {
log.info("monitor", "Try to restart the monitor"); log.info("monitor", "Try to restart the monitor");
this.heartbeatInterval = setTimeout(safeBeat, this.interval * 1000); this.heartbeatInterval = setTimeout(safeBeat, this.interval * 1000);
this.lastScheduleBeatTime = dayjs();
} else {
log.info("monitor", "isStop = true, no next check.");
} }
} }
}; };

View File

@ -12,6 +12,7 @@ const { Settings } = require("./settings");
const dayjs = require("dayjs"); const dayjs = require("dayjs");
const childProcess = require("child_process"); const childProcess = require("child_process");
const path = require("path"); const path = require("path");
const axios = require("axios");
// DO NOT IMPORT HERE IF THE MODULES USED `UptimeKumaServer.getInstance()`, put at the bottom of this file instead. // DO NOT IMPORT HERE IF THE MODULES USED `UptimeKumaServer.getInstance()`, put at the bottom of this file instead.
/** /**
@ -62,6 +63,8 @@ class UptimeKumaServer {
*/ */
jwtSecret = null; jwtSecret = null;
checkMonitorsInterval = null;
static getInstance(args) { static getInstance(args) {
if (UptimeKumaServer.instance == null) { if (UptimeKumaServer.instance == null) {
UptimeKumaServer.instance = new UptimeKumaServer(args); UptimeKumaServer.instance = new UptimeKumaServer(args);
@ -75,6 +78,9 @@ class UptimeKumaServer {
const sslCert = args["ssl-cert"] || process.env.UPTIME_KUMA_SSL_CERT || process.env.SSL_CERT || undefined; const sslCert = args["ssl-cert"] || process.env.UPTIME_KUMA_SSL_CERT || process.env.SSL_CERT || undefined;
const sslKeyPassphrase = args["ssl-key-passphrase"] || process.env.UPTIME_KUMA_SSL_KEY_PASSPHRASE || process.env.SSL_KEY_PASSPHRASE || undefined; const sslKeyPassphrase = args["ssl-key-passphrase"] || process.env.UPTIME_KUMA_SSL_KEY_PASSPHRASE || process.env.SSL_KEY_PASSPHRASE || undefined;
// Set default axios timeout to 5 minutes instead of infinity
axios.defaults.timeout = 300 * 1000;
log.info("server", "Creating express and socket.io instance"); log.info("server", "Creating express and socket.io instance");
this.app = express(); this.app = express();
if (sslKey && sslCert) { if (sslKey && sslCert) {
@ -346,6 +352,10 @@ class UptimeKumaServer {
if (enable || enable === null) { if (enable || enable === null) {
this.startNSCDServices(); this.startNSCDServices();
} }
this.checkMonitorsInterval = setInterval(() => {
this.checkMonitors();
}, 60 * 1000);
} }
/** /**
@ -358,6 +368,8 @@ class UptimeKumaServer {
if (enable || enable === null) { if (enable || enable === null) {
this.stopNSCDServices(); this.stopNSCDServices();
} }
clearInterval(this.checkMonitorsInterval);
} }
/** /**
@ -388,6 +400,83 @@ class UptimeKumaServer {
} }
} }
} }
/**
* Start the specified monitor
* @param {number} monitorID ID of monitor to start
* @returns {Promise<void>}
*/
async startMonitor(monitorID) {
log.info("manage", `Resume Monitor: ${monitorID} by server`);
await R.exec("UPDATE monitor SET active = 1 WHERE id = ?", [
monitorID,
]);
let monitor = await R.findOne("monitor", " id = ? ", [
monitorID,
]);
if (monitor.id in this.monitorList) {
this.monitorList[monitor.id].stop();
}
this.monitorList[monitor.id] = monitor;
monitor.start(this.io);
}
/**
* Restart a given monitor
* @param {number} monitorID ID of monitor to start
* @returns {Promise<void>}
*/
async restartMonitor(monitorID) {
return await this.startMonitor(monitorID);
}
/**
* Check if monitors are running properly
*/
async checkMonitors() {
log.debug("monitor_checker", "Checking monitors");
for (let monitorID in this.monitorList) {
let monitor = this.monitorList[monitorID];
// Not for push monitor
if (monitor.type === "push") {
continue;
}
if (!monitor.active) {
continue;
}
// Check the lastStartBeatTime, if it is too long, then restart
if (monitor.lastScheduleBeatTime ) {
let diff = dayjs().diff(monitor.lastStartBeatTime, "second");
if (diff > monitor.interval * 1.5) {
log.error("monitor_checker", `Monitor Interval: ${monitor.interval} Monitor ` + monitorID + " lastStartBeatTime diff: " + diff);
log.error("monitor_checker", "Unexpected error: Monitor " + monitorID + " is struck for unknown reason");
log.error("monitor_checker", "Last start beat time: " + R.isoDateTime(monitor.lastStartBeatTime));
log.error("monitor_checker", "Last end beat time: " + R.isoDateTime(monitor.lastEndBeatTime));
log.error("monitor_checker", "Last ScheduleBeatTime: " + R.isoDateTime(monitor.lastScheduleBeatTime));
// Restart
log.error("monitor_checker", `Restarting monitor ${monitorID} automatically now`);
this.restartMonitor(monitorID);
} else {
//log.debug("monitor_checker", "Monitor " + monitorID + " is running normally");
}
} else {
//log.debug("monitor_checker", "Monitor " + monitorID + " is not started yet, skipp");
}
}
log.debug("monitor_checker", "Checking monitors end");
}
} }
module.exports = { module.exports = {