104 lines
3.6 KiB
TypeScript
Raw Normal View History

/**
* @license
* Copyright Google LLC All Rights Reserved.
*
* Use of this source code is governed by an MIT-style license that can be
* found in the LICENSE file at https://angular.io/license
*/
/// <reference types="node" />
import * as cluster from 'cluster';
import {Logger} from '../../../../src/ngtsc/logging';
import {parseCommandLineOptions} from '../../command_line_options';
import {getSharedSetup} from '../../ngcc_options';
import {CreateCompileFn} from '../api';
import {getCreateCompileFn} from '../create_compile_function';
import {stringifyTask} from '../tasks/utils';
import {MessageToWorker} from './api';
import {ClusterWorkerPackageJsonUpdater} from './package_json_updater';
import {sendMessageToMaster} from './utils';
// Cluster worker entry point
if (require.main === module) {
(async () => {
process.title = 'ngcc (worker)';
try {
const {
fix(ngcc): support recovering when a worker process crashes (#36626) Previously, when running in parallel mode and a worker process crashed while processing a task, it was not possible for ngcc to continue without risking ending up with a corrupted entry-point and therefore it exited with an error. This, for example, could happen when a worker process received a `SIGKILL` signal, which was frequently observed in CI environments. This was probably the result of Docker killing processes due to increased memory pressure. One factor that amplifies the problem under Docker (which is often used in CI) is that it is not possible to distinguish between the available CPU cores on the host machine and the ones made available to Docker containers, thus resulting in ngcc spawning too many worker processes. This commit addresses these issues in the following ways: 1. We take advantage of the fact that files are written to disk only after an entry-point has been fully analyzed/compiled. The master process can now determine whether a worker process has not yet started writing files to disk (even if it was in the middle of processing a task) and just put the task back into the tasks queue if the worker process crashes. 2. The master process keeps track of the transformed files that a worker process will attempt to write to disk. If the worker process crashes while writing files, the master process can revert any changes and put the task back into the tasks queue (without risking corruption). 3. When a worker process crashes while processing a task (which can be a result of increased memory pressure or too many worker processes), the master process will not try to re-spawn it. This way the number or worker processes is gradually adjusted to a level that can be accomodated by the system's resources. Examples of ngcc being able to recover after a worker process crashed: - While idling: https://circleci.com/gh/angular/angular/682197 - While compiling: https://circleci.com/gh/angular/angular/682209 - While writing files: https://circleci.com/gh/angular/angular/682267 Jira issue: [FW-2008](https://angular-team.atlassian.net/browse/FW-2008) Fixes #36278 PR Close #36626
2020-04-29 21:28:22 +03:00
logger,
pathMappings,
fix(ngcc): support recovering when a worker process crashes (#36626) Previously, when running in parallel mode and a worker process crashed while processing a task, it was not possible for ngcc to continue without risking ending up with a corrupted entry-point and therefore it exited with an error. This, for example, could happen when a worker process received a `SIGKILL` signal, which was frequently observed in CI environments. This was probably the result of Docker killing processes due to increased memory pressure. One factor that amplifies the problem under Docker (which is often used in CI) is that it is not possible to distinguish between the available CPU cores on the host machine and the ones made available to Docker containers, thus resulting in ngcc spawning too many worker processes. This commit addresses these issues in the following ways: 1. We take advantage of the fact that files are written to disk only after an entry-point has been fully analyzed/compiled. The master process can now determine whether a worker process has not yet started writing files to disk (even if it was in the middle of processing a task) and just put the task back into the tasks queue if the worker process crashes. 2. The master process keeps track of the transformed files that a worker process will attempt to write to disk. If the worker process crashes while writing files, the master process can revert any changes and put the task back into the tasks queue (without risking corruption). 3. When a worker process crashes while processing a task (which can be a result of increased memory pressure or too many worker processes), the master process will not try to re-spawn it. This way the number or worker processes is gradually adjusted to a level that can be accomodated by the system's resources. Examples of ngcc being able to recover after a worker process crashed: - While idling: https://circleci.com/gh/angular/angular/682197 - While compiling: https://circleci.com/gh/angular/angular/682209 - While writing files: https://circleci.com/gh/angular/angular/682267 Jira issue: [FW-2008](https://angular-team.atlassian.net/browse/FW-2008) Fixes #36278 PR Close #36626
2020-04-29 21:28:22 +03:00
enableI18nLegacyMessageIdFormat,
fileSystem,
fix(ngcc): support recovering when a worker process crashes (#36626) Previously, when running in parallel mode and a worker process crashed while processing a task, it was not possible for ngcc to continue without risking ending up with a corrupted entry-point and therefore it exited with an error. This, for example, could happen when a worker process received a `SIGKILL` signal, which was frequently observed in CI environments. This was probably the result of Docker killing processes due to increased memory pressure. One factor that amplifies the problem under Docker (which is often used in CI) is that it is not possible to distinguish between the available CPU cores on the host machine and the ones made available to Docker containers, thus resulting in ngcc spawning too many worker processes. This commit addresses these issues in the following ways: 1. We take advantage of the fact that files are written to disk only after an entry-point has been fully analyzed/compiled. The master process can now determine whether a worker process has not yet started writing files to disk (even if it was in the middle of processing a task) and just put the task back into the tasks queue if the worker process crashes. 2. The master process keeps track of the transformed files that a worker process will attempt to write to disk. If the worker process crashes while writing files, the master process can revert any changes and put the task back into the tasks queue (without risking corruption). 3. When a worker process crashes while processing a task (which can be a result of increased memory pressure or too many worker processes), the master process will not try to re-spawn it. This way the number or worker processes is gradually adjusted to a level that can be accomodated by the system's resources. Examples of ngcc being able to recover after a worker process crashed: - While idling: https://circleci.com/gh/angular/angular/682197 - While compiling: https://circleci.com/gh/angular/angular/682209 - While writing files: https://circleci.com/gh/angular/angular/682267 Jira issue: [FW-2008](https://angular-team.atlassian.net/browse/FW-2008) Fixes #36278 PR Close #36626
2020-04-29 21:28:22 +03:00
tsConfig,
getFileWriter,
} = getSharedSetup(parseCommandLineOptions(process.argv.slice(2)));
// NOTE: To avoid file corruption, `ngcc` invocation only creates _one_ instance of
// `PackageJsonUpdater` that actually writes to disk (across all processes).
// In cluster workers we use a `PackageJsonUpdater` that delegates to the cluster master.
const pkgJsonUpdater = new ClusterWorkerPackageJsonUpdater();
fix(ngcc): support recovering when a worker process crashes (#36626) Previously, when running in parallel mode and a worker process crashed while processing a task, it was not possible for ngcc to continue without risking ending up with a corrupted entry-point and therefore it exited with an error. This, for example, could happen when a worker process received a `SIGKILL` signal, which was frequently observed in CI environments. This was probably the result of Docker killing processes due to increased memory pressure. One factor that amplifies the problem under Docker (which is often used in CI) is that it is not possible to distinguish between the available CPU cores on the host machine and the ones made available to Docker containers, thus resulting in ngcc spawning too many worker processes. This commit addresses these issues in the following ways: 1. We take advantage of the fact that files are written to disk only after an entry-point has been fully analyzed/compiled. The master process can now determine whether a worker process has not yet started writing files to disk (even if it was in the middle of processing a task) and just put the task back into the tasks queue if the worker process crashes. 2. The master process keeps track of the transformed files that a worker process will attempt to write to disk. If the worker process crashes while writing files, the master process can revert any changes and put the task back into the tasks queue (without risking corruption). 3. When a worker process crashes while processing a task (which can be a result of increased memory pressure or too many worker processes), the master process will not try to re-spawn it. This way the number or worker processes is gradually adjusted to a level that can be accomodated by the system's resources. Examples of ngcc being able to recover after a worker process crashed: - While idling: https://circleci.com/gh/angular/angular/682197 - While compiling: https://circleci.com/gh/angular/angular/682209 - While writing files: https://circleci.com/gh/angular/angular/682267 Jira issue: [FW-2008](https://angular-team.atlassian.net/browse/FW-2008) Fixes #36278 PR Close #36626
2020-04-29 21:28:22 +03:00
const fileWriter = getFileWriter(pkgJsonUpdater);
// The function for creating the `compile()` function.
const createCompileFn = getCreateCompileFn(
fix(ngcc): support recovering when a worker process crashes (#36626) Previously, when running in parallel mode and a worker process crashed while processing a task, it was not possible for ngcc to continue without risking ending up with a corrupted entry-point and therefore it exited with an error. This, for example, could happen when a worker process received a `SIGKILL` signal, which was frequently observed in CI environments. This was probably the result of Docker killing processes due to increased memory pressure. One factor that amplifies the problem under Docker (which is often used in CI) is that it is not possible to distinguish between the available CPU cores on the host machine and the ones made available to Docker containers, thus resulting in ngcc spawning too many worker processes. This commit addresses these issues in the following ways: 1. We take advantage of the fact that files are written to disk only after an entry-point has been fully analyzed/compiled. The master process can now determine whether a worker process has not yet started writing files to disk (even if it was in the middle of processing a task) and just put the task back into the tasks queue if the worker process crashes. 2. The master process keeps track of the transformed files that a worker process will attempt to write to disk. If the worker process crashes while writing files, the master process can revert any changes and put the task back into the tasks queue (without risking corruption). 3. When a worker process crashes while processing a task (which can be a result of increased memory pressure or too many worker processes), the master process will not try to re-spawn it. This way the number or worker processes is gradually adjusted to a level that can be accomodated by the system's resources. Examples of ngcc being able to recover after a worker process crashed: - While idling: https://circleci.com/gh/angular/angular/682197 - While compiling: https://circleci.com/gh/angular/angular/682209 - While writing files: https://circleci.com/gh/angular/angular/682267 Jira issue: [FW-2008](https://angular-team.atlassian.net/browse/FW-2008) Fixes #36278 PR Close #36626
2020-04-29 21:28:22 +03:00
fileSystem, logger, fileWriter, enableI18nLegacyMessageIdFormat, tsConfig, pathMappings);
await startWorker(logger, createCompileFn);
process.exitCode = 0;
} catch (e) {
console.error(e.stack || e.message);
process.exit(1);
}
})();
}
export async function startWorker(logger: Logger, createCompileFn: CreateCompileFn): Promise<void> {
if (cluster.isMaster) {
throw new Error('Tried to run cluster worker on the master process.');
}
const compile = createCompileFn(
transformedFiles => sendMessageToMaster({
type: 'transformed-files',
files: transformedFiles.map(f => f.path),
}),
(_task, outcome, message) => sendMessageToMaster({type: 'task-completed', outcome, message}));
// Listen for `ProcessTaskMessage`s and process tasks.
cluster.worker.on('message', async (msg: MessageToWorker) => {
try {
switch (msg.type) {
case 'process-task':
logger.debug(
`[Worker #${cluster.worker.id}] Processing task: ${stringifyTask(msg.task)}`);
return await compile(msg.task);
default:
throw new Error(
`[Worker #${cluster.worker.id}] Invalid message received: ${JSON.stringify(msg)}`);
}
} catch (err) {
switch (err && err.code) {
case 'ENOMEM':
// Not being able to allocate enough memory is not necessarily a problem with processing
// the current task. It could just mean that there are too many tasks being processed
// simultaneously.
//
// Exit with an error and let the cluster master decide how to handle this.
logger.warn(`[Worker #${cluster.worker.id}] ${err.stack || err.message}`);
return process.exit(1);
default:
await sendMessageToMaster({
type: 'error',
error: (err instanceof Error) ? (err.stack || err.message) : err,
});
}
}
});
// Return a promise that is never resolved.
return new Promise(() => undefined);
}