mirror of
https://github.com/game-ci/unity-builder.git
synced 2026-06-01 06:16:14 -07:00
fix(cli-provider): add timeout protection for external CLI processes
Prevent builds from hanging indefinitely when CLI provider subprocess is unresponsive. Default 2h for runTaskInWorkflow, 1h for watchWorkflow. Graceful SIGTERM with 10s grace before SIGKILL. - Added RUN_TASK_TIMEOUT_MS (2 hours) and WATCH_WORKFLOW_TIMEOUT_MS (1 hour) - Added gracefulKill helper: SIGTERM first, SIGKILL after 10s grace period - runTaskInWorkflow and watchWorkflow now have timeout protection - Existing execute() method upgraded to use gracefulKill - core.error() called with clear human-readable timeout message - Added comprehensive tests: timeout triggers, SIGKILL escalation, grace period cancellation on voluntary exit, normal completion Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
@@ -20,6 +20,7 @@ jest.mock('@actions/core', () => ({
|
||||
jest.mock('../provider-git-manager');
|
||||
|
||||
import { spawn } from 'child_process';
|
||||
import * as core from '@actions/core';
|
||||
import CliProvider from './cli-provider';
|
||||
|
||||
const mockSpawn = spawn as jest.MockedFunction<typeof spawn>;
|
||||
@@ -215,6 +216,18 @@ describe('CliProvider', () => {
|
||||
const result = await promise;
|
||||
expect(result).toBe('line 1\nline 2');
|
||||
});
|
||||
|
||||
it('rejects on spawn error', async () => {
|
||||
const child = createMockChildProcess();
|
||||
mockSpawn.mockReturnValue(child);
|
||||
|
||||
const provider = new CliProvider('/nonexistent/path', {} as any);
|
||||
const promise = provider.runTaskInWorkflow('guid', 'image', 'cmd', '/mnt', '/work', [], []);
|
||||
|
||||
child.emit('error', new Error('ENOENT'));
|
||||
|
||||
await expect(promise).rejects.toThrow('failed to spawn executable');
|
||||
});
|
||||
});
|
||||
|
||||
describe('cleanupWorkflow', () => {
|
||||
@@ -404,7 +417,7 @@ describe('CliProvider', () => {
|
||||
jest.useRealTimers();
|
||||
});
|
||||
|
||||
it('rejects and kills process when command times out', async () => {
|
||||
it('rejects and kills process when execute command times out', async () => {
|
||||
const child = createMockChildProcess();
|
||||
mockSpawn.mockReturnValue(child);
|
||||
|
||||
@@ -417,6 +430,97 @@ describe('CliProvider', () => {
|
||||
await expect(promise).rejects.toThrow('timed out');
|
||||
expect(child.kill).toHaveBeenCalledWith('SIGTERM');
|
||||
});
|
||||
|
||||
it('rejects and kills process when runTaskInWorkflow times out', async () => {
|
||||
const child = createMockChildProcess();
|
||||
mockSpawn.mockReturnValue(child);
|
||||
|
||||
const provider = new CliProvider('/path/to/exe', {} as any);
|
||||
const promise = provider.runTaskInWorkflow('guid', 'image', 'cmd', '/mnt', '/work', [], []);
|
||||
|
||||
// Advance past the 2-hour timeout (7_200_000ms)
|
||||
jest.advanceTimersByTime(7_200_001);
|
||||
|
||||
await expect(promise).rejects.toThrow('run-task timed out');
|
||||
expect(child.kill).toHaveBeenCalledWith('SIGTERM');
|
||||
expect(core.error).toHaveBeenCalledWith(expect.stringContaining('CLI provider timed out after 120 minutes'));
|
||||
});
|
||||
|
||||
it('rejects and kills process when watchWorkflow times out', async () => {
|
||||
const child = createMockChildProcess();
|
||||
mockSpawn.mockReturnValue(child);
|
||||
|
||||
const provider = new CliProvider('/path/to/exe', {} as any);
|
||||
const promise = provider.watchWorkflow();
|
||||
|
||||
// Advance past the 1-hour timeout (3_600_000ms)
|
||||
jest.advanceTimersByTime(3_600_001);
|
||||
|
||||
await expect(promise).rejects.toThrow('watch-workflow timed out');
|
||||
expect(child.kill).toHaveBeenCalledWith('SIGTERM');
|
||||
expect(core.error).toHaveBeenCalledWith(expect.stringContaining('CLI provider timed out after 60 minutes'));
|
||||
});
|
||||
|
||||
it('escalates to SIGKILL after grace period on runTaskInWorkflow timeout', async () => {
|
||||
const child = createMockChildProcess();
|
||||
mockSpawn.mockReturnValue(child);
|
||||
|
||||
const provider = new CliProvider('/path/to/exe', {} as any);
|
||||
const promise = provider.runTaskInWorkflow('guid', 'image', 'cmd', '/mnt', '/work', [], []);
|
||||
|
||||
// Trigger the timeout
|
||||
jest.advanceTimersByTime(7_200_001);
|
||||
|
||||
await expect(promise).rejects.toThrow('timed out');
|
||||
|
||||
// SIGTERM was sent
|
||||
expect(child.kill).toHaveBeenCalledWith('SIGTERM');
|
||||
|
||||
// Advance past the 10s grace period — SIGKILL should fire
|
||||
jest.advanceTimersByTime(10_001);
|
||||
expect(child.kill).toHaveBeenCalledWith('SIGKILL');
|
||||
});
|
||||
|
||||
it('does not send SIGKILL if process exits before grace period', async () => {
|
||||
const child = createMockChildProcess();
|
||||
mockSpawn.mockReturnValue(child);
|
||||
|
||||
const provider = new CliProvider('/path/to/exe', {} as any);
|
||||
const promise = provider.runTaskInWorkflow('guid', 'image', 'cmd', '/mnt', '/work', [], []);
|
||||
|
||||
// Trigger the timeout
|
||||
jest.advanceTimersByTime(7_200_001);
|
||||
|
||||
await expect(promise).rejects.toThrow('timed out');
|
||||
|
||||
// Process exits voluntarily after SIGTERM
|
||||
child.emit('close', 143);
|
||||
|
||||
// Advance past the grace period — SIGKILL should NOT fire because process already exited
|
||||
jest.advanceTimersByTime(10_001);
|
||||
expect(child.kill).toHaveBeenCalledWith('SIGTERM');
|
||||
// SIGKILL should not have been called because the close event cleared the timer
|
||||
expect(child.kill).not.toHaveBeenCalledWith('SIGKILL');
|
||||
});
|
||||
|
||||
it('clears timeout when runTaskInWorkflow completes normally', async () => {
|
||||
const child = createMockChildProcess();
|
||||
mockSpawn.mockReturnValue(child);
|
||||
|
||||
const provider = new CliProvider('/path/to/exe', {} as any);
|
||||
const promise = provider.runTaskInWorkflow('guid', 'image', 'cmd', '/mnt', '/work', [], []);
|
||||
|
||||
// Process completes before timeout
|
||||
child.stdout.emit('data', Buffer.from(JSON.stringify({ success: true, output: 'done' }) + '\n'));
|
||||
child.emit('close', 0);
|
||||
|
||||
const result = await promise;
|
||||
expect(result).toBe('done');
|
||||
|
||||
// Advance far past timeout — should NOT reject
|
||||
jest.advanceTimersByTime(8_000_000);
|
||||
expect(child.kill).not.toHaveBeenCalled();
|
||||
});
|
||||
});
|
||||
|
||||
describe('available providers list', () => {
|
||||
|
||||
@@ -1,4 +1,5 @@
|
||||
import { spawn } from 'child_process';
|
||||
import { spawn, ChildProcess } from 'child_process';
|
||||
import * as core from '@actions/core';
|
||||
import { ProviderInterface } from '../provider-interface';
|
||||
import BuildParameters from '../../../build-parameters';
|
||||
import OrchestratorEnvironmentVariable from '../../options/orchestrator-environment-variable';
|
||||
@@ -9,6 +10,29 @@ import OrchestratorLogger from '../../services/core/orchestrator-logger';
|
||||
import { CliProviderRequest, CliProviderResponse, CliProviderSubcommand } from './cli-provider-protocol';
|
||||
|
||||
const DEFAULT_TIMEOUT_MS = 300_000; // 300 seconds
|
||||
const RUN_TASK_TIMEOUT_MS = 7_200_000; // 2 hours
|
||||
const WATCH_WORKFLOW_TIMEOUT_MS = 3_600_000; // 1 hour
|
||||
const SIGKILL_GRACE_MS = 10_000; // 10 seconds grace period before SIGKILL
|
||||
|
||||
/**
|
||||
* Gracefully kill a child process: SIGTERM first, then SIGKILL after a grace period.
|
||||
*/
|
||||
function gracefulKill(child: ChildProcess, graceMs: number = SIGKILL_GRACE_MS): void {
|
||||
child.kill('SIGTERM');
|
||||
|
||||
const forceKillTimer = setTimeout(() => {
|
||||
try {
|
||||
child.kill('SIGKILL');
|
||||
} catch {
|
||||
// Process may already be dead
|
||||
}
|
||||
}, graceMs);
|
||||
|
||||
// Clear the force-kill timer if the process exits on its own
|
||||
child.on('close', () => {
|
||||
clearTimeout(forceKillTimer);
|
||||
});
|
||||
}
|
||||
|
||||
class CliProvider implements ProviderInterface {
|
||||
private readonly executablePath: string;
|
||||
@@ -74,6 +98,8 @@ class CliProvider implements ProviderInterface {
|
||||
},
|
||||
};
|
||||
|
||||
const timeoutMs = RUN_TASK_TIMEOUT_MS;
|
||||
|
||||
return new Promise<string>((resolve, reject) => {
|
||||
const child = spawn(this.executablePath, ['run-task'], {
|
||||
stdio: ['pipe', 'pipe', 'pipe'],
|
||||
@@ -83,6 +109,17 @@ class CliProvider implements ProviderInterface {
|
||||
let lastJsonResponse: CliProviderResponse | undefined;
|
||||
const outputLines: string[] = [];
|
||||
let stderrOutput = '';
|
||||
let timedOut = false;
|
||||
|
||||
// Set up timeout to prevent indefinite hangs
|
||||
const timer = setTimeout(() => {
|
||||
timedOut = true;
|
||||
const minutes = Math.round(timeoutMs / 60_000);
|
||||
const message = `CLI provider timed out after ${minutes} minutes. The external provider may be unresponsive.`;
|
||||
core.error(message);
|
||||
gracefulKill(child);
|
||||
reject(new Error(`CliProvider run-task timed out after ${timeoutMs}ms`));
|
||||
}, timeoutMs);
|
||||
|
||||
child.stdin.write(JSON.stringify(request));
|
||||
child.stdin.end();
|
||||
@@ -123,10 +160,16 @@ class CliProvider implements ProviderInterface {
|
||||
});
|
||||
|
||||
child.on('error', (error: Error) => {
|
||||
reject(new Error(`CliProvider: failed to spawn executable '${this.executablePath}': ${error.message}`));
|
||||
clearTimeout(timer);
|
||||
if (!timedOut) {
|
||||
reject(new Error(`CliProvider: failed to spawn executable '${this.executablePath}': ${error.message}`));
|
||||
}
|
||||
});
|
||||
|
||||
child.on('close', (code: number | null) => {
|
||||
clearTimeout(timer);
|
||||
if (timedOut) return;
|
||||
|
||||
if (lastJsonResponse) {
|
||||
if (lastJsonResponse.success) {
|
||||
resolve(lastJsonResponse.output || outputLines.join('\n'));
|
||||
@@ -182,6 +225,8 @@ class CliProvider implements ProviderInterface {
|
||||
params: {},
|
||||
};
|
||||
|
||||
const timeoutMs = WATCH_WORKFLOW_TIMEOUT_MS;
|
||||
|
||||
return new Promise<string>((resolve, reject) => {
|
||||
const child = spawn(this.executablePath, ['watch-workflow'], {
|
||||
stdio: ['pipe', 'pipe', 'pipe'],
|
||||
@@ -190,6 +235,17 @@ class CliProvider implements ProviderInterface {
|
||||
|
||||
let lastJsonResponse: CliProviderResponse | undefined;
|
||||
const outputLines: string[] = [];
|
||||
let timedOut = false;
|
||||
|
||||
// Set up timeout to prevent indefinite hangs
|
||||
const timer = setTimeout(() => {
|
||||
timedOut = true;
|
||||
const minutes = Math.round(timeoutMs / 60_000);
|
||||
const message = `CLI provider timed out after ${minutes} minutes. The external provider may be unresponsive.`;
|
||||
core.error(message);
|
||||
gracefulKill(child);
|
||||
reject(new Error(`CliProvider watch-workflow timed out after ${timeoutMs}ms`));
|
||||
}, timeoutMs);
|
||||
|
||||
child.stdin.write(JSON.stringify(request));
|
||||
child.stdin.end();
|
||||
@@ -225,10 +281,16 @@ class CliProvider implements ProviderInterface {
|
||||
});
|
||||
|
||||
child.on('error', (error: Error) => {
|
||||
reject(new Error(`CliProvider: failed to spawn executable '${this.executablePath}': ${error.message}`));
|
||||
clearTimeout(timer);
|
||||
if (!timedOut) {
|
||||
reject(new Error(`CliProvider: failed to spawn executable '${this.executablePath}': ${error.message}`));
|
||||
}
|
||||
});
|
||||
|
||||
child.on('close', (code: number | null) => {
|
||||
clearTimeout(timer);
|
||||
if (timedOut) return;
|
||||
|
||||
if (lastJsonResponse) {
|
||||
if (lastJsonResponse.success) {
|
||||
resolve(lastJsonResponse.output || outputLines.join('\n'));
|
||||
@@ -246,7 +308,7 @@ class CliProvider implements ProviderInterface {
|
||||
|
||||
/**
|
||||
* Execute a CLI provider subcommand with a default timeout.
|
||||
* Used for all methods except runTaskInWorkflow and watchWorkflow (which have no timeout).
|
||||
* Timeout applies a graceful SIGTERM followed by SIGKILL after a grace period.
|
||||
*/
|
||||
private execute(
|
||||
command: CliProviderSubcommand,
|
||||
@@ -265,10 +327,10 @@ class CliProvider implements ProviderInterface {
|
||||
let stderrData = '';
|
||||
let timedOut = false;
|
||||
|
||||
// Set up timeout
|
||||
// Set up timeout with graceful kill
|
||||
const timer = setTimeout(() => {
|
||||
timedOut = true;
|
||||
child.kill('SIGTERM');
|
||||
gracefulKill(child);
|
||||
reject(new Error(`CliProvider: command '${command}' timed out after ${timeoutMs}ms`));
|
||||
}, timeoutMs);
|
||||
|
||||
|
||||
Reference in New Issue
Block a user