Compare commits

..

7 Commits

Author SHA1 Message Date
frostebite
dcb0894d0e fix: replace orchestrator-develop branch references with main
The orchestrator-develop branch no longer exists. Update all fallback
clone commands and test fixtures to use main instead.

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
2026-03-09 20:08:44 +00:00
frostebite
90b9b0c7b0 ci: set macOS builds to continue-on-error
Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
2026-03-05 23:33:48 +00:00
frostebite
e9c247f04f style: fix prettier formatting
Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
2026-03-05 14:10:24 +00:00
frostebite
cff759721a fix(load-balancing): add pagination limits and rate-limit detection
Cap pagination at 100 pages (10,000 runners max), detect GitHub API
rate limiting (403/429) with reset time reporting, add 30-second total
timeout for pagination loop. Log clear diagnostic when no runners found
suggesting possible causes (token permissions, runner registration).

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
2026-03-05 13:00:17 +00:00
frostebite
7e9d0bf53e test(orchestrator): add runner availability service tests
Covers: no token skip, no runners fallback, busy/offline runners,
label filtering (case-insensitive), minAvailable threshold,
fail-open on API error, mixed runner states.

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
2026-03-05 07:58:58 +00:00
frostebite
8194790728 feat(orchestrator): add retry-on-fallback and provider init timeout
Adds retryOnFallback (retry failed builds on alternate provider) and
providerInitTimeout (swap provider if init takes too long). Refactors
run() into run()/runWithProvider() to support retry loop.

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
2026-03-05 07:45:54 +00:00
frostebite
786ee3799c feat(orchestrator): automatic provider fallback with runner availability check
Adds built-in load balancing: check GitHub runner availability before
builds start, auto-route to a fallback provider when runners are busy
or offline. Eliminates the need for a separate check-runner job.

New inputs: fallbackProviderStrategy, runnerCheckEnabled,
runnerCheckLabels, runnerCheckMinAvailable.

Outputs providerFallbackUsed and providerFallbackReason for workflow
visibility.

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
2026-03-05 07:39:23 +00:00
18 changed files with 973 additions and 2583 deletions

View File

@@ -182,8 +182,8 @@ inputs:
required: false
default: ''
description:
'[Orchestrator] Run a custom job instead of the standard build automation for orchestrator (in yaml format with
the keys image, secrets (name, value object array), command line string)'
'[Orchestrator] Run a custom job instead of the standard build automation for orchestrator (in yaml format with the
keys image, secrets (name, value object array), command line string)'
awsStackName:
default: 'game-ci'
required: false
@@ -194,6 +194,42 @@ inputs:
description:
'[Orchestrator] Either local, k8s or aws can be used to run builds on a remote cluster. Additional parameters must
be configured.'
fallbackProviderStrategy:
default: ''
required: false
description:
'[Orchestrator] Fallback provider when the primary is unavailable. Used with runnerCheckEnabled for automatic
failover, or as a catch-all if the primary provider fails to initialize.'
runnerCheckEnabled:
default: 'false'
required: false
description:
'[Orchestrator] Check GitHub Actions runner availability before starting a build. When no suitable runners are
available and fallbackProviderStrategy is set, automatically routes to the fallback provider.'
runnerCheckLabels:
default: ''
required: false
description:
'[Orchestrator] Comma-separated runner labels to filter when checking availability (e.g. self-hosted,linux).
When empty, checks all runners in the repository.'
runnerCheckMinAvailable:
default: '1'
required: false
description:
'[Orchestrator] Minimum number of idle runners required for the primary provider. If fewer are available,
routes to fallbackProviderStrategy.'
retryOnFallback:
default: 'false'
required: false
description:
'[Orchestrator] When true and fallbackProviderStrategy is set, automatically retry the build on the fallback
provider if the primary provider fails. Useful for long builds where transient cloud failures are common.'
providerInitTimeout:
default: '0'
required: false
description:
'[Orchestrator] Maximum seconds to wait for the primary provider to initialize (setupWorkflow). If exceeded
and fallbackProviderStrategy is set, switches to the fallback. Set to 0 to disable (default).'
resourceTracking:
default: 'false'
required: false
@@ -280,35 +316,6 @@ inputs:
'[Orchestrator] Specifies the repo for the unity builder. Useful if you forked the repo for testing, features, or
fixes.'
hotRunnerEnabled:
description: '[HotRunner] Use persistent hot runner for builds (requires pre-registered runners)'
required: false
default: 'false'
hotRunnerTransport:
description: '[HotRunner] Transport protocol for hot runner communication: websocket, grpc, named-pipe'
required: false
default: 'websocket'
hotRunnerHost:
description: '[HotRunner] Hot runner host address'
required: false
default: 'localhost'
hotRunnerPort:
description: '[HotRunner] Hot runner port number'
required: false
default: '9090'
hotRunnerHealthInterval:
description: '[HotRunner] Health check interval in seconds'
required: false
default: '30'
hotRunnerMaxIdle:
description: '[HotRunner] Maximum idle time in seconds before recycling runner'
required: false
default: '3600'
hotRunnerFallbackToCold:
description: '[HotRunner] Fall back to cold build if no hot runner available'
required: false
default: 'true'
outputs:
volume:
description: 'The Persistent Volume (PV) where the build artifacts have been stored by Kubernetes'

1063
dist/index.js generated vendored

File diff suppressed because it is too large Load Diff

2
dist/index.js.map generated vendored

File diff suppressed because one or more lines are too long

View File

@@ -3,8 +3,6 @@ import { Action, BuildParameters, Cache, Orchestrator, Docker, ImageTag, Output
import { Cli } from './model/cli/cli';
import MacBuilder from './model/mac-builder';
import PlatformSetup from './model/platform-setup';
import { HotRunnerService } from './model/orchestrator/services/hot-runner';
import { HotRunnerConfig } from './model/orchestrator/services/hot-runner/hot-runner-types';
async function runMain() {
try {
@@ -23,46 +21,17 @@ async function runMain() {
let exitCode = -1;
// Hot runner path: attempt to use a persistent Unity editor instance
if (buildParameters.hotRunnerEnabled) {
core.info('[HotRunner] Hot runner mode enabled, attempting hot build...');
const hotRunnerConfig: HotRunnerConfig = {
enabled: true,
transport: buildParameters.hotRunnerTransport,
host: buildParameters.hotRunnerHost,
port: buildParameters.hotRunnerPort,
healthCheckInterval: buildParameters.hotRunnerHealthInterval,
maxIdleTime: buildParameters.hotRunnerMaxIdle,
maxJobsBeforeRecycle: 0, // no automatic recycle by job count
};
const hotRunnerService = new HotRunnerService();
try {
await hotRunnerService.initialize(hotRunnerConfig);
const result = await hotRunnerService.submitBuild(buildParameters, (output) => {
core.info(output);
});
exitCode = result.exitCode;
core.info(`[HotRunner] Build completed with exit code ${exitCode}`);
await hotRunnerService.shutdown();
} catch (hotRunnerError) {
await hotRunnerService.shutdown();
if (buildParameters.hotRunnerFallbackToCold) {
core.warning(
`[HotRunner] Hot runner failed: ${(hotRunnerError as Error).message}. Falling back to cold build.`,
);
exitCode = await runColdBuild(buildParameters, baseImage, workspace, actionFolder);
} else {
throw hotRunnerError;
}
}
} else if (buildParameters.providerStrategy === 'local') {
if (buildParameters.providerStrategy === 'local') {
core.info('Building locally');
exitCode = await runColdBuild(buildParameters, baseImage, workspace, actionFolder);
await PlatformSetup.setup(buildParameters, actionFolder);
exitCode =
process.platform === 'darwin'
? await MacBuilder.run(actionFolder)
: await Docker.run(baseImage.toString(), {
workspace,
actionFolder,
...buildParameters,
});
} else {
await Orchestrator.run(buildParameters, baseImage.toString());
exitCode = 0;
@@ -81,28 +50,4 @@ async function runMain() {
}
}
async function runColdBuild(
buildParameters: BuildParameters,
baseImage: ImageTag,
workspace: string,
actionFolder: string,
): Promise<number> {
if (buildParameters.providerStrategy === 'local') {
core.info('Building locally');
await PlatformSetup.setup(buildParameters, actionFolder);
return process.platform === 'darwin'
? await MacBuilder.run(actionFolder)
: await Docker.run(baseImage.toString(), {
workspace,
actionFolder,
...buildParameters,
});
} else {
await Orchestrator.run(buildParameters, baseImage.toString());
return 0;
}
}
runMain();

View File

@@ -54,6 +54,12 @@ class BuildParameters {
public sshAgent!: string;
public sshPublicKeysDirectoryPath!: string;
public providerStrategy!: string;
public fallbackProviderStrategy!: string;
public runnerCheckEnabled!: boolean;
public runnerCheckLabels!: string[];
public runnerCheckMinAvailable!: number;
public retryOnFallback!: boolean;
public providerInitTimeout!: number;
public gitPrivateToken!: string;
public awsStackName!: string;
public awsEndpoint?: string;
@@ -106,13 +112,6 @@ class BuildParameters {
public cacheUnityInstallationOnMac!: boolean;
public unityHubVersionOnMac!: string;
public dockerWorkspacePath!: string;
public hotRunnerEnabled!: boolean;
public hotRunnerTransport!: 'websocket' | 'grpc' | 'named-pipe';
public hotRunnerHost!: string;
public hotRunnerPort!: number;
public hotRunnerHealthInterval!: number;
public hotRunnerMaxIdle!: number;
public hotRunnerFallbackToCold!: boolean;
public static shouldUseRetainedWorkspaceMode(buildParameters: BuildParameters) {
return buildParameters.maxRetainedWorkspaces > 0 && Orchestrator.lockedWorkspace !== ``;
@@ -201,6 +200,12 @@ class BuildParameters {
containerRegistryRepository: Input.containerRegistryRepository,
containerRegistryImageVersion: Input.containerRegistryImageVersion,
providerStrategy: OrchestratorOptions.providerStrategy,
fallbackProviderStrategy: OrchestratorOptions.fallbackProviderStrategy,
runnerCheckEnabled: OrchestratorOptions.runnerCheckEnabled,
runnerCheckLabels: OrchestratorOptions.runnerCheckLabels,
runnerCheckMinAvailable: OrchestratorOptions.runnerCheckMinAvailable,
retryOnFallback: OrchestratorOptions.retryOnFallback,
providerInitTimeout: OrchestratorOptions.providerInitTimeout,
buildPlatform: OrchestratorOptions.buildPlatform,
kubeConfig: OrchestratorOptions.kubeConfig,
containerMemory: OrchestratorOptions.containerMemory,
@@ -249,13 +254,6 @@ class BuildParameters {
cacheUnityInstallationOnMac: Input.cacheUnityInstallationOnMac,
unityHubVersionOnMac: Input.unityHubVersionOnMac,
dockerWorkspacePath: Input.dockerWorkspacePath,
hotRunnerEnabled: Input.hotRunnerEnabled,
hotRunnerTransport: Input.hotRunnerTransport,
hotRunnerHost: Input.hotRunnerHost,
hotRunnerPort: Input.hotRunnerPort,
hotRunnerHealthInterval: Input.hotRunnerHealthInterval,
hotRunnerMaxIdle: Input.hotRunnerMaxIdle,
hotRunnerFallbackToCold: Input.hotRunnerFallbackToCold,
};
}

View File

@@ -282,38 +282,6 @@ class Input {
return Input.getInput('skipActivation')?.toLowerCase() ?? 'false';
}
static get hotRunnerEnabled(): boolean {
const input = Input.getInput('hotRunnerEnabled') ?? false;
return input === 'true';
}
static get hotRunnerTransport(): 'websocket' | 'grpc' | 'named-pipe' {
return (Input.getInput('hotRunnerTransport') ?? 'websocket') as 'websocket' | 'grpc' | 'named-pipe';
}
static get hotRunnerHost(): string {
return Input.getInput('hotRunnerHost') ?? 'localhost';
}
static get hotRunnerPort(): number {
return Number.parseInt(Input.getInput('hotRunnerPort') ?? '9090', 10);
}
static get hotRunnerHealthInterval(): number {
return Number.parseInt(Input.getInput('hotRunnerHealthInterval') ?? '30', 10);
}
static get hotRunnerMaxIdle(): number {
return Number.parseInt(Input.getInput('hotRunnerMaxIdle') ?? '3600', 10);
}
static get hotRunnerFallbackToCold(): boolean {
const input = Input.getInput('hotRunnerFallbackToCold') ?? 'true';
return input === 'true';
}
public static ToEnvVarFormat(input: string) {
if (input.toUpperCase() === input) {
return input;

View File

@@ -138,6 +138,32 @@ class OrchestratorOptions {
return provider || 'local';
}
static get fallbackProviderStrategy(): string {
return OrchestratorOptions.getInput('fallbackProviderStrategy') || '';
}
static get runnerCheckEnabled(): boolean {
return OrchestratorOptions.getInput('runnerCheckEnabled') === 'true';
}
static get runnerCheckLabels(): string[] {
const labels = OrchestratorOptions.getInput('runnerCheckLabels');
return labels ? labels.split(',').map((l) => l.trim()) : [];
}
static get runnerCheckMinAvailable(): number {
return Number(OrchestratorOptions.getInput('runnerCheckMinAvailable')) || 1;
}
static get retryOnFallback(): boolean {
return OrchestratorOptions.getInput('retryOnFallback') === 'true';
}
static get providerInitTimeout(): number {
return Number(OrchestratorOptions.getInput('providerInitTimeout')) || 0;
}
static get containerCpu(): string {
return OrchestratorOptions.getInput('containerCpu') || `1024`;
}

View File

@@ -20,6 +20,7 @@ import { FollowLogStreamService } from './services/core/follow-log-stream-servic
import OrchestratorResult from './services/core/orchestrator-result';
import OrchestratorOptions from './options/orchestrator-options';
import ResourceTracking from './services/core/resource-tracking';
import { RunnerAvailabilityService } from './services/core/runner-availability-service';
class Orchestrator {
public static Provider: ProviderInterface;
@@ -76,6 +77,42 @@ class Orchestrator {
private static async setupSelectedBuildPlatform() {
OrchestratorLogger.log(`Orchestrator platform selected ${Orchestrator.buildParameters.providerStrategy}`);
// Check runner availability and apply fallback if needed
if (Orchestrator.buildParameters.runnerCheckEnabled && Orchestrator.buildParameters.fallbackProviderStrategy) {
const owner = OrchestratorOptions.githubOwner;
const repo = OrchestratorOptions.githubRepoName;
const token = Orchestrator.buildParameters.gitPrivateToken || process.env.GITHUB_TOKEN || '';
OrchestratorLogger.log(
`Checking runner availability (labels: [${Orchestrator.buildParameters.runnerCheckLabels.join(', ')}], min: ${
Orchestrator.buildParameters.runnerCheckMinAvailable
})`,
);
const result = await RunnerAvailabilityService.checkAvailability(
owner,
repo,
token,
Orchestrator.buildParameters.runnerCheckLabels,
Orchestrator.buildParameters.runnerCheckMinAvailable,
);
OrchestratorLogger.log(
`Runner check: ${result.totalRunners} total, ${result.matchingRunners} matching, ${result.idleRunners} idle — ${result.reason}`,
);
if (result.shouldFallback) {
const original = Orchestrator.buildParameters.providerStrategy;
const fallback = Orchestrator.buildParameters.fallbackProviderStrategy;
OrchestratorLogger.log(`Falling back from '${original}' to '${fallback}' — ${result.reason}`);
Orchestrator.buildParameters.providerStrategy = fallback;
core.setOutput('providerFallbackUsed', 'true');
core.setOutput('providerFallbackReason', result.reason);
} else {
core.setOutput('providerFallbackUsed', 'false');
}
}
// Detect LocalStack endpoints and handle AWS provider appropriately
// AWS_FORCE_PROVIDER options:
// - 'aws': Force AWS provider (requires LocalStack Pro with ECS support)
@@ -182,6 +219,30 @@ class Orchestrator {
if (baseImage.includes(`undefined`)) {
throw new Error(`baseImage is undefined`);
}
try {
return await Orchestrator.runWithProvider(buildParameters, baseImage);
} catch (primaryError: any) {
// Retry on fallback provider if enabled and a fallback is configured
const fallback = buildParameters.fallbackProviderStrategy;
const alreadyOnFallback = buildParameters.providerStrategy === fallback;
if (buildParameters.retryOnFallback && fallback && !alreadyOnFallback) {
OrchestratorLogger.log(
`Primary provider '${buildParameters.providerStrategy}' failed: ${primaryError.message}`,
);
OrchestratorLogger.log(`Retrying build on fallback provider '${fallback}'...`);
buildParameters.providerStrategy = fallback;
core.setOutput('providerFallbackUsed', 'true');
core.setOutput('providerFallbackReason', `Primary provider failed: ${primaryError.message}`);
return await Orchestrator.runWithProvider(buildParameters, baseImage);
}
throw primaryError;
}
}
private static async runWithProvider(buildParameters: BuildParameters, baseImage: string) {
await Orchestrator.setup(buildParameters);
// When aws-local mode is enabled, validate AWS CloudFormation templates
@@ -189,12 +250,10 @@ class Orchestrator {
if (Orchestrator.validateAwsTemplates) {
await Orchestrator.validateAwsCloudFormationTemplates();
}
await Orchestrator.Provider.setupWorkflow(
Orchestrator.buildParameters.buildGuid,
Orchestrator.buildParameters,
Orchestrator.buildParameters.branch,
Orchestrator.defaultSecrets,
);
// Setup workflow with optional init timeout
await Orchestrator.setupWorkflowWithTimeout();
try {
if (buildParameters.maxRetainedWorkspaces > 0) {
Orchestrator.lockedWorkspace = SharedWorkspaceLocking.NewWorkspaceName();
@@ -275,6 +334,39 @@ class Orchestrator {
}
}
/**
* Runs setupWorkflow with an optional timeout. If providerInitTimeout is set and the
* provider takes longer than that to initialize, throws an error that triggers
* retry-on-fallback (if enabled).
*/
private static async setupWorkflowWithTimeout() {
const timeoutSeconds = Orchestrator.buildParameters.providerInitTimeout;
const setupPromise = Orchestrator.Provider.setupWorkflow(
Orchestrator.buildParameters.buildGuid,
Orchestrator.buildParameters,
Orchestrator.buildParameters.branch,
Orchestrator.defaultSecrets,
);
if (timeoutSeconds <= 0) {
await setupPromise;
return;
}
OrchestratorLogger.log(`Provider init timeout: ${timeoutSeconds}s`);
const timeoutPromise = new Promise<never>((_, reject) => {
setTimeout(
() => reject(new Error(`Provider initialization timed out after ${timeoutSeconds}s`)),
timeoutSeconds * 1000,
);
});
await Promise.race([setupPromise, timeoutPromise]);
}
private static async updateStatusWithBuildParameters() {
const content = { ...Orchestrator.buildParameters };
content.gitPrivateToken = ``;

View File

@@ -1,5 +0,0 @@
# Hot Runner Protocol
Extensible runner registration and persistent Unity editor provider protocol.
See GitHub Issue for full specification.

View File

@@ -0,0 +1,318 @@
import { RunnerAvailabilityService } from './runner-availability-service';
// Mock @octokit/core
jest.mock('@octokit/core', () => ({
Octokit: jest.fn().mockImplementation(() => ({
request: jest.fn(),
})),
}));
jest.mock('./orchestrator-logger', () => ({
__esModule: true,
default: {
log: jest.fn(),
logWarning: jest.fn(),
error: jest.fn(),
},
}));
import { Octokit } from '@octokit/core';
const MockedOctokit = Octokit as jest.MockedClass<typeof Octokit>;
function createMockRunners(runners: Array<{ name: string; status: string; busy: boolean; labels: string[] }>) {
return runners.map((r, i) => ({
id: i + 1,
name: r.name,
status: r.status,
busy: r.busy,
labels: r.labels.map((l) => ({ name: l })),
}));
}
describe('RunnerAvailabilityService', () => {
beforeEach(() => {
jest.clearAllMocks();
});
describe('checkAvailability', () => {
it('should skip check and not fallback when no token is provided', async () => {
const result = await RunnerAvailabilityService.checkAvailability('owner', 'repo', '', [], 1);
expect(result.shouldFallback).toBe(false);
expect(result.reason).toContain('No GitHub token');
});
it('should fallback when no runners are registered', async () => {
const mockRequest = jest.fn().mockResolvedValue({ status: 200, data: { runners: [] } });
MockedOctokit.mockImplementation(() => ({ request: mockRequest } as any));
const result = await RunnerAvailabilityService.checkAvailability('owner', 'repo', 'token', [], 1);
expect(result.shouldFallback).toBe(true);
expect(result.reason).toContain('No runners registered');
expect(result.totalRunners).toBe(0);
});
it('should not fallback when enough idle runners are available', async () => {
const runners = createMockRunners([
{ name: 'runner-1', status: 'online', busy: false, labels: ['self-hosted', 'linux'] },
{ name: 'runner-2', status: 'online', busy: false, labels: ['self-hosted', 'linux'] },
]);
const mockRequest = jest.fn().mockResolvedValue({ status: 200, data: { runners } });
MockedOctokit.mockImplementation(() => ({ request: mockRequest } as any));
const result = await RunnerAvailabilityService.checkAvailability('owner', 'repo', 'token', [], 1);
expect(result.shouldFallback).toBe(false);
expect(result.idleRunners).toBe(2);
expect(result.totalRunners).toBe(2);
});
it('should fallback when all runners are busy', async () => {
const runners = createMockRunners([
{ name: 'runner-1', status: 'online', busy: true, labels: ['self-hosted'] },
{ name: 'runner-2', status: 'online', busy: true, labels: ['self-hosted'] },
]);
const mockRequest = jest.fn().mockResolvedValue({ status: 200, data: { runners } });
MockedOctokit.mockImplementation(() => ({ request: mockRequest } as any));
const result = await RunnerAvailabilityService.checkAvailability('owner', 'repo', 'token', [], 1);
expect(result.shouldFallback).toBe(true);
expect(result.idleRunners).toBe(0);
expect(result.matchingRunners).toBe(2);
});
it('should fallback when all runners are offline', async () => {
const runners = createMockRunners([
{ name: 'runner-1', status: 'offline', busy: false, labels: ['self-hosted'] },
]);
const mockRequest = jest.fn().mockResolvedValue({ status: 200, data: { runners } });
MockedOctokit.mockImplementation(() => ({ request: mockRequest } as any));
const result = await RunnerAvailabilityService.checkAvailability('owner', 'repo', 'token', [], 1);
expect(result.shouldFallback).toBe(true);
expect(result.idleRunners).toBe(0);
});
it('should filter runners by required labels', async () => {
const runners = createMockRunners([
{ name: 'linux-runner', status: 'online', busy: false, labels: ['self-hosted', 'linux'] },
{ name: 'windows-runner', status: 'online', busy: false, labels: ['self-hosted', 'windows'] },
]);
const mockRequest = jest.fn().mockResolvedValue({ status: 200, data: { runners } });
MockedOctokit.mockImplementation(() => ({ request: mockRequest } as any));
const result = await RunnerAvailabilityService.checkAvailability(
'owner',
'repo',
'token',
['self-hosted', 'linux'],
1,
);
expect(result.shouldFallback).toBe(false);
expect(result.matchingRunners).toBe(1);
expect(result.idleRunners).toBe(1);
expect(result.totalRunners).toBe(2);
});
it('should fallback when no runners match required labels', async () => {
const runners = createMockRunners([
{ name: 'windows-runner', status: 'online', busy: false, labels: ['self-hosted', 'windows'] },
]);
const mockRequest = jest.fn().mockResolvedValue({ status: 200, data: { runners } });
MockedOctokit.mockImplementation(() => ({ request: mockRequest } as any));
const result = await RunnerAvailabilityService.checkAvailability(
'owner',
'repo',
'token',
['self-hosted', 'linux'],
1,
);
expect(result.shouldFallback).toBe(true);
expect(result.matchingRunners).toBe(0);
expect(result.idleRunners).toBe(0);
});
it('should respect minAvailable threshold', async () => {
const runners = createMockRunners([{ name: 'runner-1', status: 'online', busy: false, labels: ['self-hosted'] }]);
const mockRequest = jest.fn().mockResolvedValue({ status: 200, data: { runners } });
MockedOctokit.mockImplementation(() => ({ request: mockRequest } as any));
// Need 2, have 1 — should fallback
const result = await RunnerAvailabilityService.checkAvailability('owner', 'repo', 'token', [], 2);
expect(result.shouldFallback).toBe(true);
expect(result.idleRunners).toBe(1);
});
it('should be case-insensitive for label matching', async () => {
const runners = createMockRunners([
{ name: 'runner-1', status: 'online', busy: false, labels: ['Self-Hosted', 'Linux'] },
]);
const mockRequest = jest.fn().mockResolvedValue({ status: 200, data: { runners } });
MockedOctokit.mockImplementation(() => ({ request: mockRequest } as any));
const result = await RunnerAvailabilityService.checkAvailability(
'owner',
'repo',
'token',
['self-hosted', 'linux'],
1,
);
expect(result.shouldFallback).toBe(false);
expect(result.matchingRunners).toBe(1);
});
it('should not fallback on API error (fail-open)', async () => {
const mockRequest = jest.fn().mockRejectedValue(new Error('403 Forbidden'));
MockedOctokit.mockImplementation(() => ({ request: mockRequest } as any));
const result = await RunnerAvailabilityService.checkAvailability('owner', 'repo', 'token', [], 1);
expect(result.shouldFallback).toBe(false);
expect(result.reason).toContain('Runner check failed');
});
it('should count only online+idle runners', async () => {
const runners = createMockRunners([
{ name: 'idle', status: 'online', busy: false, labels: ['self-hosted'] },
{ name: 'busy', status: 'online', busy: true, labels: ['self-hosted'] },
{ name: 'offline', status: 'offline', busy: false, labels: ['self-hosted'] },
]);
const mockRequest = jest.fn().mockResolvedValue({ status: 200, data: { runners } });
MockedOctokit.mockImplementation(() => ({ request: mockRequest } as any));
const result = await RunnerAvailabilityService.checkAvailability('owner', 'repo', 'token', [], 1);
expect(result.shouldFallback).toBe(false);
expect(result.totalRunners).toBe(3);
expect(result.matchingRunners).toBe(3);
expect(result.idleRunners).toBe(1);
});
});
describe('pagination limits', () => {
it('should stop paginating after reaching the page limit', async () => {
// Return full pages (100 runners each) to force continued pagination
let callCount = 0;
const mockRequest = jest.fn().mockImplementation(() => {
callCount++;
const runners = createMockRunners(
Array.from({ length: 100 }, (_, i) => ({
name: `runner-${callCount}-${i}`,
status: 'online' as const,
busy: false,
labels: ['self-hosted'],
})),
);
return Promise.resolve({ status: 200, data: { runners } });
});
MockedOctokit.mockImplementation(() => ({ request: mockRequest } as any));
const result = await RunnerAvailabilityService.checkAvailability('owner', 'repo', 'token', [], 1);
// Should have called at most 100 pages (the MAX_PAGINATION_PAGES limit)
expect(mockRequest).toHaveBeenCalledTimes(100);
// Should still have runners from the pages it did fetch
expect(result.totalRunners).toBe(10000);
expect(result.shouldFallback).toBe(false);
});
it('should stop paginating on rate limit (HTTP 403)', async () => {
let callCount = 0;
const mockRequest = jest.fn().mockImplementation(() => {
callCount++;
if (callCount === 2) {
// Octokit throws for non-2xx responses
const error: any = new Error('API rate limit exceeded');
error.status = 403;
error.response = {
status: 403,
headers: { 'x-ratelimit-reset': String(Math.floor(Date.now() / 1000) + 3600) },
};
return Promise.reject(error);
}
const runners = createMockRunners(
Array.from({ length: 100 }, (_, i) => ({
name: `runner-${i}`,
status: 'online' as const,
busy: false,
labels: ['self-hosted'],
})),
);
return Promise.resolve({ status: 200, data: { runners } });
});
MockedOctokit.mockImplementation(() => ({ request: mockRequest } as any));
const result = await RunnerAvailabilityService.checkAvailability('owner', 'repo', 'token', [], 1);
// Should have stopped at page 2 (rate limited)
expect(mockRequest).toHaveBeenCalledTimes(2);
// Should use the 100 runners from the first page
expect(result.totalRunners).toBe(100);
expect(result.shouldFallback).toBe(false);
});
it('should stop paginating on rate limit (HTTP 429)', async () => {
let callCount = 0;
const mockRequest = jest.fn().mockImplementation(() => {
callCount++;
if (callCount === 1) {
// Octokit throws for non-2xx responses
const error: any = new Error('Too Many Requests');
error.status = 429;
error.response = { status: 429, headers: {} };
return Promise.reject(error);
}
return Promise.resolve({ status: 200, data: { runners: [] } });
});
MockedOctokit.mockImplementation(() => ({ request: mockRequest } as any));
const result = await RunnerAvailabilityService.checkAvailability('owner', 'repo', 'token', [], 1);
// Should have stopped at first page (rate limited immediately)
expect(mockRequest).toHaveBeenCalledTimes(1);
// No runners found — should fallback
expect(result.totalRunners).toBe(0);
expect(result.shouldFallback).toBe(true);
});
it('should handle pagination timeout gracefully', async () => {
// Mock Date.now to simulate timeout
const originalDateNow = Date.now;
let callCount = 0;
const mockRequest = jest.fn().mockImplementation(() => {
callCount++;
// After first call, advance time past the timeout
if (callCount >= 2) {
Date.now = jest.fn(() => originalDateNow() + 31_000);
}
const runners = createMockRunners(
Array.from({ length: 100 }, (_, i) => ({
name: `runner-${callCount}-${i}`,
status: 'online' as const,
busy: false,
labels: ['self-hosted'],
})),
);
return Promise.resolve({ status: 200, data: { runners } });
});
MockedOctokit.mockImplementation(() => ({ request: mockRequest } as any));
const result = await RunnerAvailabilityService.checkAvailability('owner', 'repo', 'token', [], 1);
// Should have stopped after timeout was detected (2 pages: first succeeds, second triggers timeout check)
expect(mockRequest.mock.calls.length).toBeLessThanOrEqual(3);
// Should have runners from pages fetched before timeout
expect(result.totalRunners).toBeGreaterThan(0);
// Restore
Date.now = originalDateNow;
});
});
});

View File

@@ -0,0 +1,205 @@
import { Octokit } from '@octokit/core';
import OrchestratorLogger from './orchestrator-logger';
interface GitHubRunner {
id: number;
name: string;
status: 'online' | 'offline';
busy: boolean;
labels: Array<{ name: string }>;
}
interface RunnerCheckResult {
shouldFallback: boolean;
reason: string;
totalRunners: number;
matchingRunners: number;
idleRunners: number;
}
/**
* Maximum number of pages to fetch when paginating through GitHub API results.
* 100 pages * 100 per page = 10,000 runners maximum.
*/
const MAX_PAGINATION_PAGES = 100;
/**
* Total timeout in milliseconds for the pagination loop.
* Prevents indefinite API calls if GitHub is slow or pagination is unexpectedly deep.
*/
const PAGINATION_TIMEOUT_MS = 30_000;
/**
* Checks GitHub Actions runner availability to support automatic provider fallback.
*
* When a user configures `runnerCheckEnabled: true` with a `fallbackProviderStrategy`,
* this service queries the GitHub API for runner status before the build starts.
* If insufficient runners are available, the orchestrator routes to the fallback provider.
*/
export class RunnerAvailabilityService {
/**
* Check if enough runners are available to handle the build.
*
* @param owner - GitHub repository owner
* @param repo - GitHub repository name
* @param token - GitHub token with repo/actions scope
* @param requiredLabels - Labels runners must have (empty = any runner)
* @param minAvailable - Minimum idle runners required
* @returns RunnerCheckResult with decision and diagnostics
*/
static async checkAvailability(
owner: string,
repo: string,
token: string,
requiredLabels: string[],
minAvailable: number,
): Promise<RunnerCheckResult> {
if (!token) {
return {
shouldFallback: false,
reason: 'No GitHub token available — skipping runner check',
totalRunners: 0,
matchingRunners: 0,
idleRunners: 0,
};
}
try {
const octokit = new Octokit({ auth: token });
// Fetch all runners for the repository
const runners = await RunnerAvailabilityService.fetchRunners(octokit, owner, repo);
if (runners.length === 0) {
return {
shouldFallback: true,
reason: 'No runners registered for this repository',
totalRunners: 0,
matchingRunners: 0,
idleRunners: 0,
};
}
// Filter by required labels
const matching = RunnerAvailabilityService.filterByLabels(runners, requiredLabels);
// Count idle (online + not busy)
const idle = matching.filter((r) => r.status === 'online' && !r.busy);
const result: RunnerCheckResult = {
shouldFallback: idle.length < minAvailable,
reason:
idle.length >= minAvailable
? `${idle.length} idle runner(s) available (need ${minAvailable})`
: `Only ${idle.length} idle runner(s) available, need ${minAvailable}`,
totalRunners: runners.length,
matchingRunners: matching.length,
idleRunners: idle.length,
};
return result;
} catch (error: any) {
// If the API call fails (permissions, rate limit, etc.), don't block the build
OrchestratorLogger.log(`Runner availability check failed: ${error.message}`);
return {
shouldFallback: false,
reason: `Runner check failed (${error.message}) — proceeding with primary provider`,
totalRunners: 0,
matchingRunners: 0,
idleRunners: 0,
};
}
}
/**
* Fetch all runners for a repository, handling pagination.
*
* Includes defensive limits:
* - Maximum page count (MAX_PAGINATION_PAGES) to prevent infinite loops
* - Total timeout (PAGINATION_TIMEOUT_MS) to prevent indefinite API calls
* - Rate-limit detection (HTTP 403/429 with X-RateLimit-Remaining header)
*/
private static async fetchRunners(octokit: Octokit, owner: string, repo: string): Promise<GitHubRunner[]> {
const allRunners: GitHubRunner[] = [];
let page = 1;
const perPage = 100;
const startTime = Date.now();
while (page <= MAX_PAGINATION_PAGES) {
// Check total timeout
if (Date.now() - startTime > PAGINATION_TIMEOUT_MS) {
OrchestratorLogger.logWarning(
`[RunnerAvailability] Pagination timeout reached after ${page - 1} pages and ${Date.now() - startTime}ms. ` +
`Using ${allRunners.length} runners found so far.`,
);
break;
}
let response: any;
try {
response = await octokit.request('GET /repos/{owner}/{repo}/actions/runners', {
owner,
repo,
per_page: perPage,
page,
});
} catch (requestError: any) {
// Octokit throws for non-2xx responses. Check if this is a rate limit error.
const status = requestError.status ?? requestError.response?.status;
if (status === 403 || status === 429) {
const resetTime =
requestError.response?.headers?.['x-ratelimit-reset'] ?? requestError.headers?.['x-ratelimit-reset'];
const resetMessage = resetTime
? ` Resets at ${new Date(Number.parseInt(String(resetTime), 10) * 1000).toISOString()}`
: '';
OrchestratorLogger.logWarning(
`[RunnerAvailability] GitHub API rate limit reached (HTTP ${status}).${resetMessage} ` +
`Using ${allRunners.length} runners found so far.`,
);
break;
}
// Re-throw non-rate-limit errors to be handled by the outer catch
throw requestError;
}
const runners = (response.data.runners || []) as GitHubRunner[];
allRunners.push(...runners);
if (runners.length < perPage) break;
page++;
}
if (page > MAX_PAGINATION_PAGES) {
OrchestratorLogger.logWarning(
`[RunnerAvailability] Maximum pagination limit reached (${MAX_PAGINATION_PAGES} pages). ` +
`Using ${allRunners.length} runners found so far.`,
);
}
if (allRunners.length === 0) {
OrchestratorLogger.log(
'[RunnerAvailability] No runners found. Possible causes: ' +
'wrong token permissions (needs repo or actions scope), ' +
'no self-hosted runners registered, ' +
'or runners are registered at the organization level instead of the repository.',
);
}
return allRunners;
}
/**
* Filter runners by required labels. A runner matches if it has ALL required labels.
* If requiredLabels is empty, all runners match.
*/
private static filterByLabels(runners: GitHubRunner[], requiredLabels: string[]): GitHubRunner[] {
if (requiredLabels.length === 0) return runners;
return runners.filter((runner) => {
const runnerLabelNames = runner.labels.map((l) => l.name.toLowerCase());
return requiredLabels.every((required) => runnerLabelNames.includes(required.toLowerCase()));
});
}
}

View File

@@ -1,159 +0,0 @@
import OrchestratorLogger from '../core/orchestrator-logger';
import { HotRunnerRegistry } from './hot-runner-registry';
import { HotRunnerJobRequest, HotRunnerJobResult, HotRunnerStatus, HotRunnerTransport } from './hot-runner-types';
const POLL_INTERVAL_MS = 1000;
// eslint-disable-next-line no-unused-vars
export type OutputCallback = (output: string) => void;
export class HotRunnerDispatcher {
private transports: Map<string, HotRunnerTransport>;
constructor(transports: Map<string, HotRunnerTransport>) {
this.transports = transports;
}
/**
* Dispatch a job to an available hot runner matching the request's build target.
* If no runner is immediately available, waits up to the request timeout.
* Returns the job result, or throws if no runner becomes available in time.
*/
async dispatchJob(
request: HotRunnerJobRequest,
registry: HotRunnerRegistry,
unityVersion: string,
onOutput?: OutputCallback,
): Promise<HotRunnerJobResult> {
OrchestratorLogger.log(`[HotRunner] Dispatching job ${request.jobId} (target: ${request.buildTarget})`);
// Find or wait for an available runner
let runner = registry.findAvailableRunner({
unityVersion,
platform: request.buildTarget,
});
if (!runner) {
OrchestratorLogger.log(
`[HotRunner] No idle runner available for ${unityVersion}/${request.buildTarget}, waiting...`,
);
runner = await this.waitForRunner({ unityVersion, platform: request.buildTarget }, request.timeout, registry);
}
// Mark runner as busy
registry.updateRunner(runner.id, {
state: 'busy',
currentJob: request.jobId,
});
const transport = this.transports.get(runner.id);
if (!transport) {
registry.updateRunner(runner.id, { state: 'idle', currentJob: undefined });
throw new Error(`[HotRunner] No transport available for runner ${runner.id}`);
}
OrchestratorLogger.log(`[HotRunner] Sending job ${request.jobId} to runner ${runner.id}`);
const startTime = Date.now();
try {
const result = await this.executeWithTimeout(transport, request);
const duration = Date.now() - startTime;
OrchestratorLogger.log(
`[HotRunner] Job ${request.jobId} completed on runner ${runner.id} in ${duration}ms (exit: ${result.exitCode})`,
);
if (onOutput && result.output) {
onOutput(result.output);
}
// Mark runner as idle and increment job count
const currentStatus = registry.getRunner(runner.id);
registry.updateRunner(runner.id, {
state: 'idle',
currentJob: undefined,
lastJobCompleted: request.jobId,
jobsCompleted: (currentStatus?.jobsCompleted ?? 0) + 1,
});
return result;
} catch (error: any) {
OrchestratorLogger.logWarning(`[HotRunner] Job ${request.jobId} failed on runner ${runner.id}: ${error.message}`);
// Mark runner as idle despite failure -- the health monitor will recycle if needed
registry.updateRunner(runner.id, {
state: 'idle',
currentJob: undefined,
});
throw error;
}
}
/**
* Wait for an available runner matching the requirements.
* Polls the registry at a fixed interval until one becomes available or timeout expires.
*/
async waitForRunner(
requirements: { unityVersion: string; platform: string },
timeoutMs: number,
registry: HotRunnerRegistry,
): Promise<HotRunnerStatus> {
const deadline = Date.now() + timeoutMs;
while (Date.now() < deadline) {
const runner = registry.findAvailableRunner(requirements);
if (runner) {
OrchestratorLogger.log(`[HotRunner] Runner ${runner.id} became available`);
return runner;
}
await this.sleep(Math.min(POLL_INTERVAL_MS, deadline - Date.now()));
}
throw new Error(
`[HotRunner] Timed out waiting for available runner (${requirements.unityVersion}/${requirements.platform}) after ${timeoutMs}ms`,
);
}
/**
* Execute a job on a transport with a timeout guard.
* On timeout, disconnects the transport to release the connection
* and prevent the orphaned sendJob promise from holding resources.
*/
private async executeWithTimeout(
transport: HotRunnerTransport,
request: HotRunnerJobRequest,
): Promise<HotRunnerJobResult> {
const TIMEOUT_SENTINEL = Symbol('timeout');
const timeoutPromise = new Promise<typeof TIMEOUT_SENTINEL>((resolve) => {
setTimeout(() => {
resolve(TIMEOUT_SENTINEL);
}, request.timeout);
});
const result = await Promise.race([transport.sendJob(request), timeoutPromise]);
if (result === TIMEOUT_SENTINEL) {
// Disconnect the transport to clean up the orphaned sendJob call
try {
await transport.disconnect();
} catch (disconnectError: any) {
OrchestratorLogger.logWarning(
`[HotRunner] Error disconnecting transport after timeout for job ${request.jobId}: ${disconnectError.message}`,
);
}
throw new Error(`[HotRunner] Job ${request.jobId} timed out after ${request.timeout}ms`);
}
return result;
}
private sleep(ms: number): Promise<void> {
return new Promise((resolve) => setTimeout(resolve, ms));
}
}

View File

@@ -1,186 +0,0 @@
import OrchestratorLogger from '../core/orchestrator-logger';
import { HotRunnerRegistry } from './hot-runner-registry';
import { HotRunnerTransport } from './hot-runner-types';
export class HotRunnerHealthMonitor {
private intervalHandle: ReturnType<typeof setInterval> | undefined;
private registry: HotRunnerRegistry | undefined;
private transports: Map<string, HotRunnerTransport> = new Map();
/**
* Start periodic health monitoring for all registered runners.
*/
startMonitoring(registry: HotRunnerRegistry, interval: number, transports: Map<string, HotRunnerTransport>): void {
if (this.intervalHandle) {
this.stopMonitoring();
}
this.registry = registry;
this.transports = transports;
OrchestratorLogger.log(`[HotRunner] Starting health monitoring (interval: ${interval}s)`);
this.intervalHandle = setInterval(() => {
this.runHealthChecks().catch((error: any) => {
OrchestratorLogger.logWarning(`[HotRunner] Health check cycle failed: ${error.message}`);
});
}, interval * 1000);
}
/**
* Stop periodic health monitoring.
*/
stopMonitoring(): void {
if (this.intervalHandle) {
clearInterval(this.intervalHandle);
this.intervalHandle = undefined;
OrchestratorLogger.log(`[HotRunner] Health monitoring stopped`);
}
}
/**
* Check health of a specific runner by ID. Returns true if healthy.
*/
async checkHealth(runnerId: string): Promise<boolean> {
if (!this.registry) {
return false;
}
const transport = this.transports.get(runnerId);
if (!transport) {
OrchestratorLogger.logWarning(`[HotRunner] No transport for runner ${runnerId}`);
this.registry.updateRunner(runnerId, {
state: 'unhealthy',
lastHealthCheck: new Date().toISOString(),
});
return false;
}
try {
const healthy = await transport.healthCheck();
if (healthy) {
const status = await transport.getStatus();
this.registry.updateRunner(runnerId, {
lastHealthCheck: new Date().toISOString(),
memoryUsageMB: status.memoryUsageMB,
uptime: status.uptime,
libraryHash: status.libraryHash,
});
return true;
}
OrchestratorLogger.logWarning(`[HotRunner] Runner ${runnerId} health check returned false`);
this.registry.updateRunner(runnerId, {
state: 'unhealthy',
lastHealthCheck: new Date().toISOString(),
});
return false;
} catch (error: any) {
OrchestratorLogger.logWarning(`[HotRunner] Runner ${runnerId} health check failed: ${error.message}`);
this.registry.updateRunner(runnerId, {
state: 'unhealthy',
lastHealthCheck: new Date().toISOString(),
});
return false;
}
}
/**
* Mark an unhealthy runner for cleanup and disconnect its transport.
*/
async recycleUnhealthyRunner(runnerId: string): Promise<void> {
if (!this.registry) {
return;
}
OrchestratorLogger.log(`[HotRunner] Recycling unhealthy runner ${runnerId}`);
this.registry.updateRunner(runnerId, { state: 'stopping' });
const transport = this.transports.get(runnerId);
if (transport) {
try {
await transport.disconnect();
} catch (error: any) {
OrchestratorLogger.logWarning(`[HotRunner] Error disconnecting runner ${runnerId}: ${error.message}`);
}
this.transports.delete(runnerId);
}
this.registry.unregisterRunner(runnerId);
OrchestratorLogger.log(`[HotRunner] Runner ${runnerId} recycled and removed`);
}
/**
* Recycle a runner that has been idle longer than the maximum idle time.
*/
async recycleIdleRunner(runnerId: string, maxIdleTime: number): Promise<void> {
if (!this.registry) {
return;
}
const runner = this.registry.getRunner(runnerId);
if (!runner || runner.state !== 'idle') {
return;
}
const lastCheckTime = new Date(runner.lastHealthCheck).getTime();
const now = Date.now();
const idleSeconds = (now - lastCheckTime) / 1000;
if (idleSeconds >= maxIdleTime) {
OrchestratorLogger.log(
`[HotRunner] Runner ${runnerId} idle for ${Math.floor(idleSeconds)}s (max: ${maxIdleTime}s), recycling`,
);
await this.recycleUnhealthyRunner(runnerId);
}
}
/**
* Run health checks and idle-recycle checks for all registered runners.
*/
private async runHealthChecks(): Promise<void> {
if (!this.registry) {
return;
}
const runners = this.registry.listRunners();
for (const runner of runners) {
if (runner.state === 'stopping') {
continue;
}
const healthy = await this.checkHealth(runner.id);
if (!healthy && runner.state !== 'starting') {
await this.recycleUnhealthyRunner(runner.id);
continue;
}
// Check for idle timeout
const config = this.registry.getConfig(runner.id);
if (config && runner.state === 'idle') {
await this.recycleIdleRunner(runner.id, config.maxIdleTime);
}
// Check for max jobs before recycle
if (config && config.maxJobsBeforeRecycle > 0 && runner.jobsCompleted >= config.maxJobsBeforeRecycle) {
OrchestratorLogger.log(
`[HotRunner] Runner ${runner.id} reached max jobs (${runner.jobsCompleted}/${config.maxJobsBeforeRecycle}), recycling`,
);
await this.recycleUnhealthyRunner(runner.id);
}
}
}
/**
* Whether health monitoring is currently active.
*/
get isMonitoring(): boolean {
return this.intervalHandle !== undefined;
}
}

View File

@@ -1,315 +0,0 @@
import fs from 'node:fs';
import path from 'node:path';
import { customAlphabet } from 'nanoid';
import OrchestratorLogger from '../core/orchestrator-logger';
import { HotRunnerConfig, HotRunnerStatus } from './hot-runner-types';
const generateId = customAlphabet('abcdefghijklmnopqrstuvwxyz0123456789', 12);
const PERSISTENCE_FILENAME = 'hot-runners.json';
const VALID_RUNNER_STATES: ReadonlySet<string> = new Set(['idle', 'busy', 'starting', 'stopping', 'unhealthy']);
export interface HotRunnerFilter {
platform?: string;
state?: string;
unityVersion?: string;
}
/**
* Validate that a restored runner entry has all required fields with correct types.
* Returns true if the entry is a valid HotRunnerStatus, false otherwise.
*/
function isValidRunnerStatus(entry: unknown): entry is HotRunnerStatus {
if (typeof entry !== 'object' || entry === null) {
return false;
}
const record = entry as Record<string, unknown>;
return (
typeof record.id === 'string' &&
record.id.length > 0 &&
typeof record.state === 'string' &&
VALID_RUNNER_STATES.has(record.state) &&
typeof record.unityVersion === 'string' &&
typeof record.platform === 'string' &&
typeof record.uptime === 'number' &&
typeof record.jobsCompleted === 'number' &&
typeof record.lastHealthCheck === 'string' &&
typeof record.memoryUsageMB === 'number'
);
}
/**
* Validate that a restored config entry has all required fields with correct types.
* Returns true if the entry is a valid HotRunnerConfig, false otherwise.
*/
function isValidRunnerConfig(entry: unknown): entry is HotRunnerConfig {
if (typeof entry !== 'object' || entry === null) {
return false;
}
const record = entry as Record<string, unknown>;
return (
typeof record.enabled === 'boolean' &&
typeof record.transport === 'string' &&
['websocket', 'grpc', 'named-pipe'].includes(record.transport) &&
typeof record.host === 'string' &&
typeof record.port === 'number' &&
typeof record.healthCheckInterval === 'number' &&
typeof record.maxIdleTime === 'number' &&
typeof record.maxJobsBeforeRecycle === 'number'
);
}
export class HotRunnerRegistry {
private runners: Map<string, HotRunnerStatus> = new Map();
private configs: Map<string, HotRunnerConfig> = new Map();
private persistencePath: string;
constructor(persistenceDirectory?: string) {
this.persistencePath = persistenceDirectory ? path.join(persistenceDirectory, PERSISTENCE_FILENAME) : '';
}
/**
* Register a new hot runner. Returns the generated runner ID.
*/
registerRunner(config: HotRunnerConfig): string {
const id = `hr-${generateId()}`;
const status: HotRunnerStatus = {
id,
state: 'starting',
unityVersion: config.unityVersion ?? 'unknown',
platform: config.platform ?? 'unknown',
uptime: 0,
jobsCompleted: 0,
lastHealthCheck: new Date().toISOString(),
memoryUsageMB: 0,
};
this.runners.set(id, status);
this.configs.set(id, config);
OrchestratorLogger.log(`[HotRunner] Registered runner ${id} (${status.unityVersion}/${status.platform})`);
this.persist();
return id;
}
/**
* Remove a runner from the registry.
*/
unregisterRunner(id: string): void {
const existed = this.runners.delete(id);
this.configs.delete(id);
if (existed) {
OrchestratorLogger.log(`[HotRunner] Unregistered runner ${id}`);
this.persist();
}
}
/**
* Get a runner's current status by ID.
*/
getRunner(id: string): HotRunnerStatus | undefined {
return this.runners.get(id);
}
/**
* Get a runner's config by ID.
*/
getConfig(id: string): HotRunnerConfig | undefined {
return this.configs.get(id);
}
/**
* List all runners, optionally filtered by platform, state, or Unity version.
*/
listRunners(filter?: HotRunnerFilter): HotRunnerStatus[] {
let results = [...this.runners.values()];
if (filter?.platform) {
results = results.filter((runner) => runner.platform === filter.platform);
}
if (filter?.state) {
results = results.filter((runner) => runner.state === filter.state);
}
if (filter?.unityVersion) {
results = results.filter((runner) => runner.unityVersion === filter.unityVersion);
}
return results;
}
/**
* Find an idle runner matching the given Unity version and platform requirements.
*/
findAvailableRunner(requirements: { unityVersion: string; platform: string }): HotRunnerStatus | undefined {
return this.listRunners({
state: 'idle',
unityVersion: requirements.unityVersion,
platform: requirements.platform,
})[0];
}
/**
* Update a runner's status fields. Merges partial updates into existing status.
*/
updateRunner(id: string, update: Partial<HotRunnerStatus>): void {
const existing = this.runners.get(id);
if (!existing) {
return;
}
this.runners.set(id, { ...existing, ...update, id });
this.persist();
}
/**
* Get the total number of registered runners.
*/
get size(): number {
return this.runners.size;
}
/**
* Validate all runners in the registry and reset invalid ones to 'unhealthy'.
* Returns the number of runners that were repaired.
*/
validateAndRepair(): number {
let repaired = 0;
for (const [id, status] of this.runners) {
// Cast to unknown to bypass the type guard narrowing to 'never',
// since the Map is typed as HotRunnerStatus but entries may have
// been corrupted via direct deserialization or unsafe casts.
const entry = status as unknown as Record<string, unknown>;
if (!isValidRunnerStatus(entry)) {
OrchestratorLogger.logWarning(`[HotRunner] Runner ${id} has invalid state, marking as unhealthy`);
this.runners.set(id, {
id,
state: 'unhealthy',
unityVersion: typeof entry.unityVersion === 'string' ? entry.unityVersion : 'unknown',
platform: typeof entry.platform === 'string' ? entry.platform : 'unknown',
uptime: typeof entry.uptime === 'number' ? entry.uptime : 0,
jobsCompleted: typeof entry.jobsCompleted === 'number' ? entry.jobsCompleted : 0,
lastHealthCheck: typeof entry.lastHealthCheck === 'string' ? entry.lastHealthCheck : new Date().toISOString(),
memoryUsageMB: typeof entry.memoryUsageMB === 'number' ? entry.memoryUsageMB : 0,
});
repaired++;
}
}
if (repaired > 0) {
this.persist();
}
return repaired;
}
/**
* Persist current registry state to disk for crash recovery.
* Validates data before writing to prevent persisting corrupt state.
*/
private persist(): void {
if (!this.persistencePath) {
return;
}
try {
// Validate data before persisting
for (const [id, status] of this.runners) {
if (!isValidRunnerStatus(status)) {
OrchestratorLogger.logWarning(`[HotRunner] Skipping persistence -- runner ${id} has invalid state`);
return;
}
}
const data = {
runners: Object.fromEntries(this.runners),
configs: Object.fromEntries(this.configs),
};
const directory = path.dirname(this.persistencePath);
if (!fs.existsSync(directory)) {
fs.mkdirSync(directory, { recursive: true });
}
fs.writeFileSync(this.persistencePath, JSON.stringify(data, undefined, 2));
} catch (error: any) {
OrchestratorLogger.logWarning(`[HotRunner] Failed to persist registry: ${error.message}`);
}
}
/**
* Load registry state from disk. Returns the number of runners restored.
* Validates each restored entry and discards corrupt entries with warnings.
* If the persistence file itself is corrupt (invalid JSON), starts with
* an empty registry.
*/
loadFromDisk(): number {
if (!this.persistencePath || !fs.existsSync(this.persistencePath)) {
return 0;
}
let data: any;
try {
const raw = fs.readFileSync(this.persistencePath, 'utf8');
data = JSON.parse(raw);
} catch (error: any) {
OrchestratorLogger.logWarning(
`[HotRunner] Persistence file is corrupt, starting with empty registry: ${error.message}`,
);
return 0;
}
if (typeof data !== 'object' || data === null) {
OrchestratorLogger.logWarning('[HotRunner] Persistence file has invalid structure, starting with empty registry');
return 0;
}
let discarded = 0;
if (data.runners && typeof data.runners === 'object') {
for (const [id, status] of Object.entries(data.runners)) {
if (isValidRunnerStatus(status)) {
this.runners.set(id, status);
} else {
OrchestratorLogger.logWarning(`[HotRunner] Discarding invalid runner entry '${id}' from persistence file`);
discarded++;
}
}
}
if (data.configs && typeof data.configs === 'object') {
for (const [id, config] of Object.entries(data.configs)) {
// Only restore configs for runners that were successfully restored
if (this.runners.has(id)) {
if (isValidRunnerConfig(config)) {
this.configs.set(id, config);
} else {
OrchestratorLogger.logWarning(`[HotRunner] Discarding invalid config entry '${id}' from persistence file`);
}
}
}
}
if (discarded > 0) {
OrchestratorLogger.logWarning(`[HotRunner] Discarded ${discarded} invalid runner(s) from persistence file`);
}
OrchestratorLogger.log(`[HotRunner] Restored ${this.runners.size} runner(s) from disk`);
return this.runners.size;
}
}

View File

@@ -1,166 +0,0 @@
import OrchestratorLogger from '../core/orchestrator-logger';
import { HotRunnerRegistry } from './hot-runner-registry';
import { HotRunnerHealthMonitor } from './hot-runner-health-monitor';
import { HotRunnerDispatcher, OutputCallback } from './hot-runner-dispatcher';
import {
HotRunnerConfig,
HotRunnerJobRequest,
HotRunnerJobResult,
HotRunnerStatus,
HotRunnerTransport,
} from './hot-runner-types';
import BuildParameters from '../../../build-parameters';
export class HotRunnerService {
private registry: HotRunnerRegistry;
private healthMonitor: HotRunnerHealthMonitor;
private dispatcher: HotRunnerDispatcher;
private transports: Map<string, HotRunnerTransport> = new Map();
private config: HotRunnerConfig | undefined;
constructor(persistenceDirectory?: string) {
this.registry = new HotRunnerRegistry(persistenceDirectory);
this.healthMonitor = new HotRunnerHealthMonitor();
this.dispatcher = new HotRunnerDispatcher(this.transports);
}
/**
* Initialize the hot runner service: load persisted state, start health monitoring.
*/
async initialize(config: HotRunnerConfig): Promise<void> {
this.config = config;
OrchestratorLogger.log(
`[HotRunner] Initializing service (transport: ${config.transport}, ${config.host}:${config.port})`,
);
// Attempt to restore previously registered runners from disk
const restored = this.registry.loadFromDisk();
if (restored > 0) {
OrchestratorLogger.log(`[HotRunner] Restored ${restored} runner(s) from persistence`);
}
// Start health monitoring
this.healthMonitor.startMonitoring(this.registry, config.healthCheckInterval, this.transports);
OrchestratorLogger.log(`[HotRunner] Service initialized`);
}
/**
* Register a runner with a transport implementation.
* Returns the runner ID.
*/
registerRunner(config: HotRunnerConfig, transport: HotRunnerTransport): string {
const id = this.registry.registerRunner(config);
this.transports.set(id, transport);
return id;
}
/**
* Submit a build job to an available hot runner.
* Converts BuildParameters to a HotRunnerJobRequest and dispatches.
*/
async submitBuild(params: BuildParameters, onOutput?: OutputCallback): Promise<HotRunnerJobResult> {
const request: HotRunnerJobRequest = {
jobId: params.buildGuid || `build-${Date.now()}`,
buildMethod: params.buildMethod || undefined,
buildTarget: params.targetPlatform,
buildPath: params.buildPath,
customParameters: params.customParameters ? this.parseCustomParameters(params.customParameters) : undefined,
timeout: 30 * 60 * 1000, // 30 minutes default
};
OrchestratorLogger.log(`[HotRunner] Submitting build: ${request.jobId} (target: ${request.buildTarget})`);
return this.dispatcher.dispatchJob(request, this.registry, params.editorVersion, onOutput);
}
/**
* Submit a test job to an available hot runner.
* Converts BuildParameters and optional suite config to a test-mode HotRunnerJobRequest.
*/
async submitTest(
params: BuildParameters,
suiteConfig?: { testMode?: 'editmode' | 'playmode'; testSuitePath?: string },
onOutput?: OutputCallback,
): Promise<HotRunnerJobResult> {
const request: HotRunnerJobRequest = {
jobId: params.buildGuid || `test-${Date.now()}`,
buildTarget: params.targetPlatform,
customParameters: params.customParameters ? this.parseCustomParameters(params.customParameters) : undefined,
timeout: 30 * 60 * 1000, // 30 minutes default
testMode: suiteConfig?.testMode ?? 'editmode',
testSuitePath: suiteConfig?.testSuitePath,
};
OrchestratorLogger.log(`[HotRunner] Submitting test: ${request.jobId} (mode: ${request.testMode})`);
return this.dispatcher.dispatchJob(request, this.registry, params.editorVersion, onOutput);
}
/**
* Shut down the service: stop health monitoring, disconnect all transports,
* and unregister all runners.
*/
async shutdown(): Promise<void> {
OrchestratorLogger.log(`[HotRunner] Shutting down service`);
this.healthMonitor.stopMonitoring();
const disconnectPromises: Promise<void>[] = [];
for (const [id, transport] of this.transports.entries()) {
disconnectPromises.push(
transport.disconnect().catch((error: any) => {
OrchestratorLogger.logWarning(`[HotRunner] Error disconnecting runner ${id}: ${error.message}`);
}),
);
}
await Promise.all(disconnectPromises);
this.transports.clear();
OrchestratorLogger.log(`[HotRunner] Service shut down`);
}
/**
* Get the status of all registered runners.
*/
getStatus(): HotRunnerStatus[] {
return this.registry.listRunners();
}
/**
* Get the underlying registry (for testing or advanced use).
*/
getRegistry(): HotRunnerRegistry {
return this.registry;
}
/**
* Parse a space-separated custom parameters string into a key-value map.
* Handles `-key value` and `-key=value` formats.
*/
private parseCustomParameters(raw: string): Record<string, string> {
const result: Record<string, string> = {};
const parts = raw.trim().split(/\s+/);
for (let i = 0; i < parts.length; i++) {
const part = parts[i];
if (part.startsWith('-')) {
const key = part.replace(/^-+/, '');
if (key.includes('=')) {
const [k, ...v] = key.split('=');
result[k] = v.join('=');
} else if (i + 1 < parts.length && !parts[i + 1].startsWith('-')) {
result[key] = parts[i + 1];
i++;
} else {
result[key] = 'true';
}
}
}
return result;
}
}

View File

@@ -1,54 +0,0 @@
export interface HotRunnerConfig {
enabled: boolean;
transport: 'websocket' | 'grpc' | 'named-pipe';
host: string;
port: number;
healthCheckInterval: number; // seconds
maxIdleTime: number; // seconds before recycling
maxJobsBeforeRecycle: number;
unityVersion?: string;
platform?: string;
}
export interface HotRunnerStatus {
id: string;
state: 'idle' | 'busy' | 'starting' | 'stopping' | 'unhealthy';
unityVersion: string;
platform: string;
currentJob?: string;
lastJobCompleted?: string;
uptime: number;
jobsCompleted: number;
lastHealthCheck: string;
memoryUsageMB: number;
libraryHash?: string;
}
export interface HotRunnerJobRequest {
jobId: string;
buildMethod?: string;
buildTarget: string;
buildPath?: string;
customParameters?: Record<string, string>;
timeout: number;
testMode?: 'editmode' | 'playmode';
testSuitePath?: string;
}
export interface HotRunnerJobResult {
jobId: string;
success: boolean;
exitCode: number;
duration: number;
output: string;
artifacts?: string[];
testResults?: string; // path to test result file
}
export interface HotRunnerTransport {
connect(config: HotRunnerConfig): Promise<void>;
disconnect(): Promise<void>;
sendJob(request: HotRunnerJobRequest): Promise<HotRunnerJobResult>;
getStatus(): Promise<HotRunnerStatus>;
healthCheck(): Promise<boolean>;
}

View File

@@ -1,740 +0,0 @@
import fs from 'node:fs';
import { HotRunnerRegistry } from './hot-runner-registry';
import { HotRunnerHealthMonitor } from './hot-runner-health-monitor';
import { HotRunnerDispatcher } from './hot-runner-dispatcher';
import { HotRunnerService } from './hot-runner-service';
import {
HotRunnerConfig,
HotRunnerJobRequest,
HotRunnerJobResult,
HotRunnerStatus,
HotRunnerTransport,
} from './hot-runner-types';
// Mock dependencies
jest.mock('node:fs');
jest.mock('../core/orchestrator-logger');
const mockFs = fs as jest.Mocked<typeof fs>;
function createMockConfig(overrides?: Partial<HotRunnerConfig>): HotRunnerConfig {
return {
enabled: true,
transport: 'websocket',
host: 'localhost',
port: 9090,
healthCheckInterval: 30,
maxIdleTime: 3600,
maxJobsBeforeRecycle: 100,
unityVersion: '2022.3.0f1',
platform: 'StandaloneWindows64',
...overrides,
};
}
function createMockTransport(overrides?: Partial<HotRunnerTransport>): HotRunnerTransport {
return {
// eslint-disable-next-line unicorn/no-useless-undefined
connect: jest.fn().mockResolvedValue(undefined),
// eslint-disable-next-line unicorn/no-useless-undefined
disconnect: jest.fn().mockResolvedValue(undefined),
sendJob: jest.fn().mockResolvedValue({
jobId: 'test-job',
success: true,
exitCode: 0,
duration: 5000,
output: 'Build succeeded',
artifacts: ['build/output.exe'],
} as HotRunnerJobResult),
getStatus: jest.fn().mockResolvedValue({
id: 'mock-runner',
state: 'idle',
unityVersion: '2022.3.0f1',
platform: 'StandaloneWindows64',
uptime: 3600,
jobsCompleted: 5,
lastHealthCheck: new Date().toISOString(),
memoryUsageMB: 1024,
} as HotRunnerStatus),
healthCheck: jest.fn().mockResolvedValue(true),
...overrides,
};
}
function createMockJobRequest(overrides?: Partial<HotRunnerJobRequest>): HotRunnerJobRequest {
return {
jobId: 'job-001',
buildTarget: 'StandaloneWindows64',
timeout: 60000,
...overrides,
};
}
// --- Registry Tests ---
describe('HotRunnerRegistry', () => {
let registry: HotRunnerRegistry;
beforeEach(() => {
jest.clearAllMocks();
registry = new HotRunnerRegistry();
});
it('should register a runner and return an ID', () => {
const config = createMockConfig();
const id = registry.registerRunner(config);
expect(id).toMatch(/^hr-/);
expect(registry.size).toBe(1);
});
it('should retrieve a registered runner by ID', () => {
const config = createMockConfig();
const id = registry.registerRunner(config);
const runner = registry.getRunner(id);
expect(runner).toBeDefined();
expect(runner!.id).toBe(id);
expect(runner!.state).toBe('starting');
expect(runner!.unityVersion).toBe('2022.3.0f1');
expect(runner!.platform).toBe('StandaloneWindows64');
});
it('should return undefined for unknown runner ID', () => {
const runner = registry.getRunner('nonexistent');
expect(runner).toBeUndefined();
});
it('should unregister a runner', () => {
const id = registry.registerRunner(createMockConfig());
expect(registry.size).toBe(1);
registry.unregisterRunner(id);
expect(registry.size).toBe(0);
expect(registry.getRunner(id)).toBeUndefined();
});
it('should handle unregistering a nonexistent runner gracefully', () => {
registry.unregisterRunner('nonexistent');
expect(registry.size).toBe(0);
});
it('should list all runners without filter', () => {
registry.registerRunner(createMockConfig({ platform: 'StandaloneWindows64' }));
registry.registerRunner(createMockConfig({ platform: 'StandaloneLinux64' }));
registry.registerRunner(createMockConfig({ platform: 'StandaloneOSX' }));
const all = registry.listRunners();
expect(all).toHaveLength(3);
});
it('should filter runners by platform', () => {
registry.registerRunner(createMockConfig({ platform: 'StandaloneWindows64' }));
registry.registerRunner(createMockConfig({ platform: 'StandaloneLinux64' }));
registry.registerRunner(createMockConfig({ platform: 'StandaloneWindows64' }));
const windows = registry.listRunners({ platform: 'StandaloneWindows64' });
expect(windows).toHaveLength(2);
const linux = registry.listRunners({ platform: 'StandaloneLinux64' });
expect(linux).toHaveLength(1);
});
it('should filter runners by state', () => {
const id1 = registry.registerRunner(createMockConfig());
registry.registerRunner(createMockConfig());
registry.updateRunner(id1, { state: 'idle' });
// second runner remains in 'starting' state
const idle = registry.listRunners({ state: 'idle' });
expect(idle).toHaveLength(1);
expect(idle[0].id).toBe(id1);
});
it('should filter runners by Unity version', () => {
registry.registerRunner(createMockConfig({ unityVersion: '2022.3.0f1' }));
registry.registerRunner(createMockConfig({ unityVersion: '2023.1.0f1' }));
registry.registerRunner(createMockConfig({ unityVersion: '2022.3.0f1' }));
const v2022 = registry.listRunners({ unityVersion: '2022.3.0f1' });
expect(v2022).toHaveLength(2);
});
it('should find an available idle runner matching requirements', () => {
const id1 = registry.registerRunner(
createMockConfig({ unityVersion: '2022.3.0f1', platform: 'StandaloneWindows64' }),
);
registry.updateRunner(id1, { state: 'idle' });
const id2 = registry.registerRunner(
createMockConfig({ unityVersion: '2023.1.0f1', platform: 'StandaloneLinux64' }),
);
registry.updateRunner(id2, { state: 'idle' });
const found = registry.findAvailableRunner({
unityVersion: '2022.3.0f1',
platform: 'StandaloneWindows64',
});
expect(found).toBeDefined();
expect(found!.id).toBe(id1);
});
it('should return undefined when no runner matches requirements', () => {
const id = registry.registerRunner(
createMockConfig({ unityVersion: '2022.3.0f1', platform: 'StandaloneWindows64' }),
);
registry.updateRunner(id, { state: 'idle' });
const found = registry.findAvailableRunner({
unityVersion: '2023.1.0f1',
platform: 'StandaloneLinux64',
});
expect(found).toBeUndefined();
});
it('should update runner status fields', () => {
const id = registry.registerRunner(createMockConfig());
registry.updateRunner(id, { state: 'idle', memoryUsageMB: 2048 });
const runner = registry.getRunner(id);
expect(runner!.state).toBe('idle');
expect(runner!.memoryUsageMB).toBe(2048);
// ID should not be overridden by the update
expect(runner!.id).toBe(id);
});
it('should persist and load registry from disk', () => {
const persistenceRegistry = new HotRunnerRegistry('/tmp/test');
mockFs.existsSync.mockReturnValue(true);
mockFs.writeFileSync.mockImplementation(() => {});
mockFs.mkdirSync.mockImplementation(() => '' as any);
const id = persistenceRegistry.registerRunner(createMockConfig());
// Verify writeFileSync was called for persistence
expect(mockFs.writeFileSync).toHaveBeenCalled();
const writtenData = JSON.parse((mockFs.writeFileSync as jest.Mock).mock.calls[0][1] as string);
expect(writtenData.runners).toBeDefined();
expect(writtenData.runners[id]).toBeDefined();
});
it('should load runners from disk on loadFromDisk', () => {
const persistenceRegistry = new HotRunnerRegistry('/tmp/test');
const storedData = {
runners: {
'hr-restored': {
id: 'hr-restored',
state: 'idle',
unityVersion: '2022.3.0f1',
platform: 'StandaloneWindows64',
uptime: 100,
jobsCompleted: 3,
lastHealthCheck: new Date().toISOString(),
memoryUsageMB: 512,
},
},
configs: {
'hr-restored': createMockConfig(),
},
};
mockFs.existsSync.mockReturnValue(true);
mockFs.readFileSync.mockReturnValue(JSON.stringify(storedData));
const count = persistenceRegistry.loadFromDisk();
expect(count).toBe(1);
expect(persistenceRegistry.getRunner('hr-restored')).toBeDefined();
});
it('should discard invalid runner entries when loading from disk', () => {
const persistenceRegistry = new HotRunnerRegistry('/tmp/test');
const storedData = {
runners: {
'hr-valid': {
id: 'hr-valid',
state: 'idle',
unityVersion: '2022.3.0f1',
platform: 'StandaloneWindows64',
uptime: 100,
jobsCompleted: 3,
lastHealthCheck: new Date().toISOString(),
memoryUsageMB: 512,
},
'hr-invalid': {
// Missing required fields like state, unityVersion
id: 'hr-invalid',
},
'hr-bad-state': {
id: 'hr-bad-state',
state: 'nonexistent-state',
unityVersion: '2022.3.0f1',
platform: 'StandaloneWindows64',
uptime: 0,
jobsCompleted: 0,
lastHealthCheck: new Date().toISOString(),
memoryUsageMB: 0,
},
},
configs: {
'hr-valid': createMockConfig(),
'hr-invalid': createMockConfig(),
'hr-bad-state': createMockConfig(),
},
};
mockFs.existsSync.mockReturnValue(true);
mockFs.readFileSync.mockReturnValue(JSON.stringify(storedData));
const count = persistenceRegistry.loadFromDisk();
expect(count).toBe(1);
expect(persistenceRegistry.getRunner('hr-valid')).toBeDefined();
expect(persistenceRegistry.getRunner('hr-invalid')).toBeUndefined();
expect(persistenceRegistry.getRunner('hr-bad-state')).toBeUndefined();
});
it('should handle corrupt JSON persistence file gracefully', () => {
const persistenceRegistry = new HotRunnerRegistry('/tmp/test');
mockFs.existsSync.mockReturnValue(true);
mockFs.readFileSync.mockReturnValue('{ invalid json !!!');
const count = persistenceRegistry.loadFromDisk();
expect(count).toBe(0);
expect(persistenceRegistry.size).toBe(0);
});
it('should handle persistence file with invalid top-level structure', () => {
const persistenceRegistry = new HotRunnerRegistry('/tmp/test');
mockFs.existsSync.mockReturnValue(true);
mockFs.readFileSync.mockReturnValue('"just a string"');
const count = persistenceRegistry.loadFromDisk();
expect(count).toBe(0);
});
it('should handle persistence file with null runners', () => {
const persistenceRegistry = new HotRunnerRegistry('/tmp/test');
mockFs.existsSync.mockReturnValue(true);
mockFs.readFileSync.mockReturnValue('{"runners": null, "configs": null}');
const count = persistenceRegistry.loadFromDisk();
expect(count).toBe(0);
});
it('should validate and repair invalid runners', () => {
const persistenceRegistry = new HotRunnerRegistry('/tmp/test');
mockFs.existsSync.mockReturnValue(true);
mockFs.writeFileSync.mockImplementation(() => {});
mockFs.mkdirSync.mockImplementation(() => '' as any);
// Register a valid runner first
const id = persistenceRegistry.registerRunner(createMockConfig());
persistenceRegistry.updateRunner(id, { state: 'idle' });
// Manually corrupt the runner's state by setting an invalid state
// (we access via the public API -- updateRunner with a cast)
persistenceRegistry.updateRunner(id, { state: 'invalid-state' as any });
const repaired = persistenceRegistry.validateAndRepair();
expect(repaired).toBe(1);
const runner = persistenceRegistry.getRunner(id);
expect(runner!.state).toBe('unhealthy');
});
it('should not discard configs for valid runners when loading from disk', () => {
const persistenceRegistry = new HotRunnerRegistry('/tmp/test');
const storedData = {
runners: {
'hr-valid': {
id: 'hr-valid',
state: 'idle',
unityVersion: '2022.3.0f1',
platform: 'StandaloneWindows64',
uptime: 100,
jobsCompleted: 3,
lastHealthCheck: new Date().toISOString(),
memoryUsageMB: 512,
},
},
configs: {
'hr-valid': createMockConfig(),
},
};
mockFs.existsSync.mockReturnValue(true);
mockFs.readFileSync.mockReturnValue(JSON.stringify(storedData));
persistenceRegistry.loadFromDisk();
expect(persistenceRegistry.getConfig('hr-valid')).toBeDefined();
});
});
// --- Health Monitor Tests ---
describe('HotRunnerHealthMonitor', () => {
let monitor: HotRunnerHealthMonitor;
let registry: HotRunnerRegistry;
let transports: Map<string, HotRunnerTransport>;
beforeEach(() => {
jest.clearAllMocks();
jest.useFakeTimers();
monitor = new HotRunnerHealthMonitor();
registry = new HotRunnerRegistry();
transports = new Map();
});
afterEach(() => {
monitor.stopMonitoring();
jest.useRealTimers();
});
it('should start and stop monitoring', () => {
monitor.startMonitoring(registry, 30, transports);
expect(monitor.isMonitoring).toBe(true);
monitor.stopMonitoring();
expect(monitor.isMonitoring).toBe(false);
});
it('should report healthy when transport health check passes', async () => {
const id = registry.registerRunner(createMockConfig());
registry.updateRunner(id, { state: 'idle' });
const transport = createMockTransport();
transports.set(id, transport);
monitor.startMonitoring(registry, 30, transports);
const healthy = await monitor.checkHealth(id);
expect(healthy).toBe(true);
expect(transport.healthCheck).toHaveBeenCalled();
});
it('should mark runner as unhealthy when health check fails', async () => {
const id = registry.registerRunner(createMockConfig());
registry.updateRunner(id, { state: 'idle' });
const transport = createMockTransport({
healthCheck: jest.fn().mockResolvedValue(false),
});
transports.set(id, transport);
monitor.startMonitoring(registry, 30, transports);
const healthy = await monitor.checkHealth(id);
expect(healthy).toBe(false);
const runner = registry.getRunner(id);
expect(runner!.state).toBe('unhealthy');
});
it('should mark runner as unhealthy when health check throws', async () => {
const id = registry.registerRunner(createMockConfig());
registry.updateRunner(id, { state: 'idle' });
const transport = createMockTransport({
healthCheck: jest.fn().mockRejectedValue(new Error('Connection refused')),
});
transports.set(id, transport);
monitor.startMonitoring(registry, 30, transports);
const healthy = await monitor.checkHealth(id);
expect(healthy).toBe(false);
});
it('should recycle unhealthy runner and remove from registry', async () => {
const id = registry.registerRunner(createMockConfig());
const transport = createMockTransport();
transports.set(id, transport);
monitor.startMonitoring(registry, 30, transports);
await monitor.recycleUnhealthyRunner(id);
expect(registry.getRunner(id)).toBeUndefined();
expect(transport.disconnect).toHaveBeenCalled();
expect(transports.has(id)).toBe(false);
});
it('should recycle idle runner when max idle time exceeded', async () => {
const id = registry.registerRunner(createMockConfig({ maxIdleTime: 60 }));
// Set lastHealthCheck to 120 seconds ago
const oldDate = new Date(Date.now() - 120 * 1000).toISOString();
registry.updateRunner(id, { state: 'idle', lastHealthCheck: oldDate });
const transport = createMockTransport();
transports.set(id, transport);
monitor.startMonitoring(registry, 30, transports);
await monitor.recycleIdleRunner(id, 60);
expect(registry.getRunner(id)).toBeUndefined();
});
it('should not recycle idle runner when within max idle time', async () => {
const id = registry.registerRunner(createMockConfig({ maxIdleTime: 3600 }));
registry.updateRunner(id, {
state: 'idle',
lastHealthCheck: new Date().toISOString(),
});
const transport = createMockTransport();
transports.set(id, transport);
monitor.startMonitoring(registry, 30, transports);
await monitor.recycleIdleRunner(id, 3600);
// Runner should still exist
expect(registry.getRunner(id)).toBeDefined();
});
it('should return false when no transport exists for runner', async () => {
const id = registry.registerRunner(createMockConfig());
// Do not set any transport for this runner
monitor.startMonitoring(registry, 30, transports);
const healthy = await monitor.checkHealth(id);
expect(healthy).toBe(false);
});
});
// --- Dispatcher Tests ---
describe('HotRunnerDispatcher', () => {
let registry: HotRunnerRegistry;
let transports: Map<string, HotRunnerTransport>;
let dispatcher: HotRunnerDispatcher;
beforeEach(() => {
jest.clearAllMocks();
registry = new HotRunnerRegistry();
transports = new Map();
dispatcher = new HotRunnerDispatcher(transports);
});
it('should dispatch a job to an available runner', async () => {
const id = registry.registerRunner(createMockConfig());
registry.updateRunner(id, { state: 'idle' });
const transport = createMockTransport();
transports.set(id, transport);
const request = createMockJobRequest();
const result = await dispatcher.dispatchJob(request, registry, '2022.3.0f1');
expect(result.success).toBe(true);
expect(result.exitCode).toBe(0);
expect(transport.sendJob).toHaveBeenCalledWith(request);
});
it('should mark runner as busy during job execution', async () => {
const id = registry.registerRunner(createMockConfig());
registry.updateRunner(id, { state: 'idle' });
const statesDuringJob: string[] = [];
const transport = createMockTransport({
sendJob: jest.fn().mockImplementation(async () => {
const runner = registry.getRunner(id);
if (runner) statesDuringJob.push(runner.state);
return {
jobId: 'job-001',
success: true,
exitCode: 0,
duration: 1000,
output: 'ok',
};
}),
});
transports.set(id, transport);
await dispatcher.dispatchJob(createMockJobRequest(), registry, '2022.3.0f1');
expect(statesDuringJob).toContain('busy');
// After completion, should be idle again
const runner = registry.getRunner(id);
expect(runner!.state).toBe('idle');
});
it('should increment jobsCompleted after successful dispatch', async () => {
const id = registry.registerRunner(createMockConfig());
registry.updateRunner(id, { state: 'idle', jobsCompleted: 5 });
const transport = createMockTransport();
transports.set(id, transport);
await dispatcher.dispatchJob(createMockJobRequest(), registry, '2022.3.0f1');
const runner = registry.getRunner(id);
expect(runner!.jobsCompleted).toBe(6);
});
it('should throw when no runner is available and wait times out', async () => {
// No runners registered at all
const request = createMockJobRequest({ timeout: 100 });
await expect(dispatcher.dispatchJob(request, registry, '2022.3.0f1')).rejects.toThrow(/Timed out waiting/);
});
it('should throw when runner has no transport', async () => {
const id = registry.registerRunner(createMockConfig());
registry.updateRunner(id, { state: 'idle' });
// No transport set for this runner
const request = createMockJobRequest();
await expect(dispatcher.dispatchJob(request, registry, '2022.3.0f1')).rejects.toThrow(/No transport available/);
});
it('should handle job failure and return runner to idle', async () => {
const id = registry.registerRunner(createMockConfig());
registry.updateRunner(id, { state: 'idle' });
const transport = createMockTransport({
sendJob: jest.fn().mockRejectedValue(new Error('Unity crashed')),
});
transports.set(id, transport);
await expect(dispatcher.dispatchJob(createMockJobRequest(), registry, '2022.3.0f1')).rejects.toThrow(
'Unity crashed',
);
// Runner should be back to idle despite failure
const runner = registry.getRunner(id);
expect(runner!.state).toBe('idle');
});
it('should handle job timeout', async () => {
const id = registry.registerRunner(createMockConfig());
registry.updateRunner(id, { state: 'idle' });
const transport = createMockTransport({
sendJob: jest.fn().mockImplementation(
() => new Promise((resolve) => setTimeout(resolve, 60000)), // never resolves within timeout
),
});
transports.set(id, transport);
const request = createMockJobRequest({ timeout: 50 });
await expect(dispatcher.dispatchJob(request, registry, '2022.3.0f1')).rejects.toThrow(/timed out/);
});
it('should disconnect transport on job timeout', async () => {
const id = registry.registerRunner(createMockConfig());
registry.updateRunner(id, { state: 'idle' });
const transport = createMockTransport({
sendJob: jest.fn().mockImplementation(
() => new Promise((resolve) => setTimeout(resolve, 60000)), // never resolves within timeout
),
});
transports.set(id, transport);
const request = createMockJobRequest({ timeout: 50 });
await expect(dispatcher.dispatchJob(request, registry, '2022.3.0f1')).rejects.toThrow(/timed out/);
// Transport should have been disconnected to clean up orphaned connection
expect(transport.disconnect).toHaveBeenCalled();
});
it('should call output callback with job output', async () => {
const id = registry.registerRunner(createMockConfig());
registry.updateRunner(id, { state: 'idle' });
const transport = createMockTransport();
transports.set(id, transport);
const outputCallback = jest.fn();
await dispatcher.dispatchJob(createMockJobRequest(), registry, '2022.3.0f1', outputCallback);
expect(outputCallback).toHaveBeenCalledWith('Build succeeded');
});
it('should wait for runner to become available', async () => {
const id = registry.registerRunner(createMockConfig());
// Runner starts in 'starting' state, not idle
const transport = createMockTransport();
transports.set(id, transport);
// Simulate runner becoming idle after a short delay
setTimeout(() => {
registry.updateRunner(id, { state: 'idle' });
}, 50);
const request = createMockJobRequest({ timeout: 5000 });
const result = await dispatcher.dispatchJob(request, registry, '2022.3.0f1');
expect(result.success).toBe(true);
});
});
// --- Service Integration Tests ---
describe('HotRunnerService', () => {
let service: HotRunnerService;
beforeEach(() => {
jest.clearAllMocks();
mockFs.existsSync.mockReturnValue(false);
service = new HotRunnerService();
});
afterEach(async () => {
await service.shutdown();
});
it('should initialize and shut down cleanly', async () => {
const config = createMockConfig();
await service.initialize(config);
const status = service.getStatus();
expect(status).toEqual([]);
await service.shutdown();
});
it('should register a runner with transport', async () => {
await service.initialize(createMockConfig());
const transport = createMockTransport();
const id = service.registerRunner(createMockConfig(), transport);
expect(id).toMatch(/^hr-/);
expect(service.getStatus()).toHaveLength(1);
});
it('should disconnect all transports on shutdown', async () => {
await service.initialize(createMockConfig());
const transport1 = createMockTransport();
const transport2 = createMockTransport();
service.registerRunner(createMockConfig(), transport1);
service.registerRunner(createMockConfig(), transport2);
await service.shutdown();
expect(transport1.disconnect).toHaveBeenCalled();
expect(transport2.disconnect).toHaveBeenCalled();
});
it('should expose the underlying registry', async () => {
await service.initialize(createMockConfig());
const registry = service.getRegistry();
expect(registry).toBeInstanceOf(HotRunnerRegistry);
});
});

View File

@@ -1,11 +0,0 @@
export { HotRunnerService } from './hot-runner-service';
export { HotRunnerRegistry } from './hot-runner-registry';
export { HotRunnerHealthMonitor } from './hot-runner-health-monitor';
export { HotRunnerDispatcher } from './hot-runner-dispatcher';
export type {
HotRunnerConfig,
HotRunnerStatus,
HotRunnerJobRequest,
HotRunnerJobResult,
HotRunnerTransport,
} from './hot-runner-types';