diff --git a/src/model/orchestrator/providers/cli/cli-provider.test.ts b/src/model/orchestrator/providers/cli/cli-provider.test.ts index a734a9a6..ee000bb0 100644 --- a/src/model/orchestrator/providers/cli/cli-provider.test.ts +++ b/src/model/orchestrator/providers/cli/cli-provider.test.ts @@ -20,6 +20,7 @@ jest.mock('@actions/core', () => ({ jest.mock('../provider-git-manager'); import { spawn } from 'child_process'; +import * as core from '@actions/core'; import CliProvider from './cli-provider'; const mockSpawn = spawn as jest.MockedFunction; @@ -215,6 +216,18 @@ describe('CliProvider', () => { const result = await promise; expect(result).toBe('line 1\nline 2'); }); + + it('rejects on spawn error', async () => { + const child = createMockChildProcess(); + mockSpawn.mockReturnValue(child); + + const provider = new CliProvider('/nonexistent/path', {} as any); + const promise = provider.runTaskInWorkflow('guid', 'image', 'cmd', '/mnt', '/work', [], []); + + child.emit('error', new Error('ENOENT')); + + await expect(promise).rejects.toThrow('failed to spawn executable'); + }); }); describe('cleanupWorkflow', () => { @@ -404,7 +417,7 @@ describe('CliProvider', () => { jest.useRealTimers(); }); - it('rejects and kills process when command times out', async () => { + it('rejects and kills process when execute command times out', async () => { const child = createMockChildProcess(); mockSpawn.mockReturnValue(child); @@ -417,6 +430,97 @@ describe('CliProvider', () => { await expect(promise).rejects.toThrow('timed out'); expect(child.kill).toHaveBeenCalledWith('SIGTERM'); }); + + it('rejects and kills process when runTaskInWorkflow times out', async () => { + const child = createMockChildProcess(); + mockSpawn.mockReturnValue(child); + + const provider = new CliProvider('/path/to/exe', {} as any); + const promise = provider.runTaskInWorkflow('guid', 'image', 'cmd', '/mnt', '/work', [], []); + + // Advance past the 2-hour timeout (7_200_000ms) + jest.advanceTimersByTime(7_200_001); + + await expect(promise).rejects.toThrow('run-task timed out'); + expect(child.kill).toHaveBeenCalledWith('SIGTERM'); + expect(core.error).toHaveBeenCalledWith(expect.stringContaining('CLI provider timed out after 120 minutes')); + }); + + it('rejects and kills process when watchWorkflow times out', async () => { + const child = createMockChildProcess(); + mockSpawn.mockReturnValue(child); + + const provider = new CliProvider('/path/to/exe', {} as any); + const promise = provider.watchWorkflow(); + + // Advance past the 1-hour timeout (3_600_000ms) + jest.advanceTimersByTime(3_600_001); + + await expect(promise).rejects.toThrow('watch-workflow timed out'); + expect(child.kill).toHaveBeenCalledWith('SIGTERM'); + expect(core.error).toHaveBeenCalledWith(expect.stringContaining('CLI provider timed out after 60 minutes')); + }); + + it('escalates to SIGKILL after grace period on runTaskInWorkflow timeout', async () => { + const child = createMockChildProcess(); + mockSpawn.mockReturnValue(child); + + const provider = new CliProvider('/path/to/exe', {} as any); + const promise = provider.runTaskInWorkflow('guid', 'image', 'cmd', '/mnt', '/work', [], []); + + // Trigger the timeout + jest.advanceTimersByTime(7_200_001); + + await expect(promise).rejects.toThrow('timed out'); + + // SIGTERM was sent + expect(child.kill).toHaveBeenCalledWith('SIGTERM'); + + // Advance past the 10s grace period — SIGKILL should fire + jest.advanceTimersByTime(10_001); + expect(child.kill).toHaveBeenCalledWith('SIGKILL'); + }); + + it('does not send SIGKILL if process exits before grace period', async () => { + const child = createMockChildProcess(); + mockSpawn.mockReturnValue(child); + + const provider = new CliProvider('/path/to/exe', {} as any); + const promise = provider.runTaskInWorkflow('guid', 'image', 'cmd', '/mnt', '/work', [], []); + + // Trigger the timeout + jest.advanceTimersByTime(7_200_001); + + await expect(promise).rejects.toThrow('timed out'); + + // Process exits voluntarily after SIGTERM + child.emit('close', 143); + + // Advance past the grace period — SIGKILL should NOT fire because process already exited + jest.advanceTimersByTime(10_001); + expect(child.kill).toHaveBeenCalledWith('SIGTERM'); + // SIGKILL should not have been called because the close event cleared the timer + expect(child.kill).not.toHaveBeenCalledWith('SIGKILL'); + }); + + it('clears timeout when runTaskInWorkflow completes normally', async () => { + const child = createMockChildProcess(); + mockSpawn.mockReturnValue(child); + + const provider = new CliProvider('/path/to/exe', {} as any); + const promise = provider.runTaskInWorkflow('guid', 'image', 'cmd', '/mnt', '/work', [], []); + + // Process completes before timeout + child.stdout.emit('data', Buffer.from(JSON.stringify({ success: true, output: 'done' }) + '\n')); + child.emit('close', 0); + + const result = await promise; + expect(result).toBe('done'); + + // Advance far past timeout — should NOT reject + jest.advanceTimersByTime(8_000_000); + expect(child.kill).not.toHaveBeenCalled(); + }); }); describe('available providers list', () => { diff --git a/src/model/orchestrator/providers/cli/cli-provider.ts b/src/model/orchestrator/providers/cli/cli-provider.ts index ab2e7ff1..9cde4110 100644 --- a/src/model/orchestrator/providers/cli/cli-provider.ts +++ b/src/model/orchestrator/providers/cli/cli-provider.ts @@ -1,4 +1,5 @@ -import { spawn } from 'child_process'; +import { spawn, ChildProcess } from 'child_process'; +import * as core from '@actions/core'; import { ProviderInterface } from '../provider-interface'; import BuildParameters from '../../../build-parameters'; import OrchestratorEnvironmentVariable from '../../options/orchestrator-environment-variable'; @@ -9,6 +10,29 @@ import OrchestratorLogger from '../../services/core/orchestrator-logger'; import { CliProviderRequest, CliProviderResponse, CliProviderSubcommand } from './cli-provider-protocol'; const DEFAULT_TIMEOUT_MS = 300_000; // 300 seconds +const RUN_TASK_TIMEOUT_MS = 7_200_000; // 2 hours +const WATCH_WORKFLOW_TIMEOUT_MS = 3_600_000; // 1 hour +const SIGKILL_GRACE_MS = 10_000; // 10 seconds grace period before SIGKILL + +/** + * Gracefully kill a child process: SIGTERM first, then SIGKILL after a grace period. + */ +function gracefulKill(child: ChildProcess, graceMs: number = SIGKILL_GRACE_MS): void { + child.kill('SIGTERM'); + + const forceKillTimer = setTimeout(() => { + try { + child.kill('SIGKILL'); + } catch { + // Process may already be dead + } + }, graceMs); + + // Clear the force-kill timer if the process exits on its own + child.on('close', () => { + clearTimeout(forceKillTimer); + }); +} class CliProvider implements ProviderInterface { private readonly executablePath: string; @@ -74,6 +98,8 @@ class CliProvider implements ProviderInterface { }, }; + const timeoutMs = RUN_TASK_TIMEOUT_MS; + return new Promise((resolve, reject) => { const child = spawn(this.executablePath, ['run-task'], { stdio: ['pipe', 'pipe', 'pipe'], @@ -83,6 +109,17 @@ class CliProvider implements ProviderInterface { let lastJsonResponse: CliProviderResponse | undefined; const outputLines: string[] = []; let stderrOutput = ''; + let timedOut = false; + + // Set up timeout to prevent indefinite hangs + const timer = setTimeout(() => { + timedOut = true; + const minutes = Math.round(timeoutMs / 60_000); + const message = `CLI provider timed out after ${minutes} minutes. The external provider may be unresponsive.`; + core.error(message); + gracefulKill(child); + reject(new Error(`CliProvider run-task timed out after ${timeoutMs}ms`)); + }, timeoutMs); child.stdin.write(JSON.stringify(request)); child.stdin.end(); @@ -123,10 +160,16 @@ class CliProvider implements ProviderInterface { }); child.on('error', (error: Error) => { - reject(new Error(`CliProvider: failed to spawn executable '${this.executablePath}': ${error.message}`)); + clearTimeout(timer); + if (!timedOut) { + reject(new Error(`CliProvider: failed to spawn executable '${this.executablePath}': ${error.message}`)); + } }); child.on('close', (code: number | null) => { + clearTimeout(timer); + if (timedOut) return; + if (lastJsonResponse) { if (lastJsonResponse.success) { resolve(lastJsonResponse.output || outputLines.join('\n')); @@ -182,6 +225,8 @@ class CliProvider implements ProviderInterface { params: {}, }; + const timeoutMs = WATCH_WORKFLOW_TIMEOUT_MS; + return new Promise((resolve, reject) => { const child = spawn(this.executablePath, ['watch-workflow'], { stdio: ['pipe', 'pipe', 'pipe'], @@ -190,6 +235,17 @@ class CliProvider implements ProviderInterface { let lastJsonResponse: CliProviderResponse | undefined; const outputLines: string[] = []; + let timedOut = false; + + // Set up timeout to prevent indefinite hangs + const timer = setTimeout(() => { + timedOut = true; + const minutes = Math.round(timeoutMs / 60_000); + const message = `CLI provider timed out after ${minutes} minutes. The external provider may be unresponsive.`; + core.error(message); + gracefulKill(child); + reject(new Error(`CliProvider watch-workflow timed out after ${timeoutMs}ms`)); + }, timeoutMs); child.stdin.write(JSON.stringify(request)); child.stdin.end(); @@ -225,10 +281,16 @@ class CliProvider implements ProviderInterface { }); child.on('error', (error: Error) => { - reject(new Error(`CliProvider: failed to spawn executable '${this.executablePath}': ${error.message}`)); + clearTimeout(timer); + if (!timedOut) { + reject(new Error(`CliProvider: failed to spawn executable '${this.executablePath}': ${error.message}`)); + } }); child.on('close', (code: number | null) => { + clearTimeout(timer); + if (timedOut) return; + if (lastJsonResponse) { if (lastJsonResponse.success) { resolve(lastJsonResponse.output || outputLines.join('\n')); @@ -246,7 +308,7 @@ class CliProvider implements ProviderInterface { /** * Execute a CLI provider subcommand with a default timeout. - * Used for all methods except runTaskInWorkflow and watchWorkflow (which have no timeout). + * Timeout applies a graceful SIGTERM followed by SIGKILL after a grace period. */ private execute( command: CliProviderSubcommand, @@ -265,10 +327,10 @@ class CliProvider implements ProviderInterface { let stderrData = ''; let timedOut = false; - // Set up timeout + // Set up timeout with graceful kill const timer = setTimeout(() => { timedOut = true; - child.kill('SIGTERM'); + gracefulKill(child); reject(new Error(`CliProvider: command '${command}' timed out after ${timeoutMs}ms`)); }, timeoutMs);