Compare commits

..

5 Commits

Author SHA1 Message Date
frostebite
3cac1845e3 fix: replace orchestrator-develop branch references with main
The orchestrator-develop branch no longer exists. Update all fallback
clone commands and test fixtures to use main instead.

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
2026-03-09 20:02:17 +00:00
frostebite
41f00bd1f9 ci: set macOS builds to continue-on-error
Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
2026-03-05 23:33:21 +00:00
frostebite
7c0c4c2072 fix(hot-runner): validate persisted registry state and add dispatcher safeguards
Validate runner entries when loading from hot-runners.json. Discard
corrupted entries with warnings. Add validateAndRepair() method for
runtime recovery. Validate data before persisting to prevent writing
corrupt state. Handle corrupt persistence files (invalid JSON)
gracefully. Rewrite executeWithTimeout using Promise.race to clean up
transport connections on timeout. Fix pre-existing ESLint violations
in dispatcher and test files.

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
2026-03-05 13:00:47 +00:00
frostebite
1bb31f3e98 feat(hot-runner): implement hot runner protocol with registry, health monitoring, and job dispatch (#791)
Adds persistent Unity editor instance support to reduce build iteration time
by eliminating cold-start overhead. Includes:

- HotRunnerTypes: interfaces for config, status, job request/result, transport
- HotRunnerRegistry: in-memory runner management with file-based persistence
- HotRunnerHealthMonitor: periodic health checks, idle recycling, job-count recycling
- HotRunnerDispatcher: job routing with wait-for-runner, timeout, and output streaming
- HotRunnerService: high-level API integrating registry, health, and dispatch
- 34 unit tests covering registration, filtering, health, dispatch, timeout, fallback
- action.yml inputs for hot runner configuration (7 new inputs)
- Input/BuildParameters integration for hot runner settings
- index.ts wiring with cold-build fallback when hot runner unavailable

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
2026-03-05 11:50:31 +00:00
frostebite
49b37f7831 feat(orchestrator): add hot runner protocol placeholder
Initial scaffold for the runner registration and hot editor provider module.

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
2026-03-05 09:05:17 +00:00
18 changed files with 2583 additions and 973 deletions

View File

@@ -182,8 +182,8 @@ inputs:
required: false
default: ''
description:
'[Orchestrator] Run a custom job instead of the standard build automation for orchestrator (in yaml format with the
keys image, secrets (name, value object array), command line string)'
'[Orchestrator] Run a custom job instead of the standard build automation for orchestrator (in yaml format with
the keys image, secrets (name, value object array), command line string)'
awsStackName:
default: 'game-ci'
required: false
@@ -194,42 +194,6 @@ inputs:
description:
'[Orchestrator] Either local, k8s or aws can be used to run builds on a remote cluster. Additional parameters must
be configured.'
fallbackProviderStrategy:
default: ''
required: false
description:
'[Orchestrator] Fallback provider when the primary is unavailable. Used with runnerCheckEnabled for automatic
failover, or as a catch-all if the primary provider fails to initialize.'
runnerCheckEnabled:
default: 'false'
required: false
description:
'[Orchestrator] Check GitHub Actions runner availability before starting a build. When no suitable runners are
available and fallbackProviderStrategy is set, automatically routes to the fallback provider.'
runnerCheckLabels:
default: ''
required: false
description:
'[Orchestrator] Comma-separated runner labels to filter when checking availability (e.g. self-hosted,linux).
When empty, checks all runners in the repository.'
runnerCheckMinAvailable:
default: '1'
required: false
description:
'[Orchestrator] Minimum number of idle runners required for the primary provider. If fewer are available,
routes to fallbackProviderStrategy.'
retryOnFallback:
default: 'false'
required: false
description:
'[Orchestrator] When true and fallbackProviderStrategy is set, automatically retry the build on the fallback
provider if the primary provider fails. Useful for long builds where transient cloud failures are common.'
providerInitTimeout:
default: '0'
required: false
description:
'[Orchestrator] Maximum seconds to wait for the primary provider to initialize (setupWorkflow). If exceeded
and fallbackProviderStrategy is set, switches to the fallback. Set to 0 to disable (default).'
resourceTracking:
default: 'false'
required: false
@@ -316,6 +280,35 @@ inputs:
'[Orchestrator] Specifies the repo for the unity builder. Useful if you forked the repo for testing, features, or
fixes.'
hotRunnerEnabled:
description: '[HotRunner] Use persistent hot runner for builds (requires pre-registered runners)'
required: false
default: 'false'
hotRunnerTransport:
description: '[HotRunner] Transport protocol for hot runner communication: websocket, grpc, named-pipe'
required: false
default: 'websocket'
hotRunnerHost:
description: '[HotRunner] Hot runner host address'
required: false
default: 'localhost'
hotRunnerPort:
description: '[HotRunner] Hot runner port number'
required: false
default: '9090'
hotRunnerHealthInterval:
description: '[HotRunner] Health check interval in seconds'
required: false
default: '30'
hotRunnerMaxIdle:
description: '[HotRunner] Maximum idle time in seconds before recycling runner'
required: false
default: '3600'
hotRunnerFallbackToCold:
description: '[HotRunner] Fall back to cold build if no hot runner available'
required: false
default: 'true'
outputs:
volume:
description: 'The Persistent Volume (PV) where the build artifacts have been stored by Kubernetes'

1063
dist/index.js generated vendored

File diff suppressed because it is too large Load Diff

2
dist/index.js.map generated vendored

File diff suppressed because one or more lines are too long

View File

@@ -3,6 +3,8 @@ import { Action, BuildParameters, Cache, Orchestrator, Docker, ImageTag, Output
import { Cli } from './model/cli/cli';
import MacBuilder from './model/mac-builder';
import PlatformSetup from './model/platform-setup';
import { HotRunnerService } from './model/orchestrator/services/hot-runner';
import { HotRunnerConfig } from './model/orchestrator/services/hot-runner/hot-runner-types';
async function runMain() {
try {
@@ -21,17 +23,46 @@ async function runMain() {
let exitCode = -1;
if (buildParameters.providerStrategy === 'local') {
// Hot runner path: attempt to use a persistent Unity editor instance
if (buildParameters.hotRunnerEnabled) {
core.info('[HotRunner] Hot runner mode enabled, attempting hot build...');
const hotRunnerConfig: HotRunnerConfig = {
enabled: true,
transport: buildParameters.hotRunnerTransport,
host: buildParameters.hotRunnerHost,
port: buildParameters.hotRunnerPort,
healthCheckInterval: buildParameters.hotRunnerHealthInterval,
maxIdleTime: buildParameters.hotRunnerMaxIdle,
maxJobsBeforeRecycle: 0, // no automatic recycle by job count
};
const hotRunnerService = new HotRunnerService();
try {
await hotRunnerService.initialize(hotRunnerConfig);
const result = await hotRunnerService.submitBuild(buildParameters, (output) => {
core.info(output);
});
exitCode = result.exitCode;
core.info(`[HotRunner] Build completed with exit code ${exitCode}`);
await hotRunnerService.shutdown();
} catch (hotRunnerError) {
await hotRunnerService.shutdown();
if (buildParameters.hotRunnerFallbackToCold) {
core.warning(
`[HotRunner] Hot runner failed: ${(hotRunnerError as Error).message}. Falling back to cold build.`,
);
exitCode = await runColdBuild(buildParameters, baseImage, workspace, actionFolder);
} else {
throw hotRunnerError;
}
}
} else if (buildParameters.providerStrategy === 'local') {
core.info('Building locally');
await PlatformSetup.setup(buildParameters, actionFolder);
exitCode =
process.platform === 'darwin'
? await MacBuilder.run(actionFolder)
: await Docker.run(baseImage.toString(), {
workspace,
actionFolder,
...buildParameters,
});
exitCode = await runColdBuild(buildParameters, baseImage, workspace, actionFolder);
} else {
await Orchestrator.run(buildParameters, baseImage.toString());
exitCode = 0;
@@ -50,4 +81,28 @@ async function runMain() {
}
}
async function runColdBuild(
buildParameters: BuildParameters,
baseImage: ImageTag,
workspace: string,
actionFolder: string,
): Promise<number> {
if (buildParameters.providerStrategy === 'local') {
core.info('Building locally');
await PlatformSetup.setup(buildParameters, actionFolder);
return process.platform === 'darwin'
? await MacBuilder.run(actionFolder)
: await Docker.run(baseImage.toString(), {
workspace,
actionFolder,
...buildParameters,
});
} else {
await Orchestrator.run(buildParameters, baseImage.toString());
return 0;
}
}
runMain();

View File

@@ -54,12 +54,6 @@ class BuildParameters {
public sshAgent!: string;
public sshPublicKeysDirectoryPath!: string;
public providerStrategy!: string;
public fallbackProviderStrategy!: string;
public runnerCheckEnabled!: boolean;
public runnerCheckLabels!: string[];
public runnerCheckMinAvailable!: number;
public retryOnFallback!: boolean;
public providerInitTimeout!: number;
public gitPrivateToken!: string;
public awsStackName!: string;
public awsEndpoint?: string;
@@ -112,6 +106,13 @@ class BuildParameters {
public cacheUnityInstallationOnMac!: boolean;
public unityHubVersionOnMac!: string;
public dockerWorkspacePath!: string;
public hotRunnerEnabled!: boolean;
public hotRunnerTransport!: 'websocket' | 'grpc' | 'named-pipe';
public hotRunnerHost!: string;
public hotRunnerPort!: number;
public hotRunnerHealthInterval!: number;
public hotRunnerMaxIdle!: number;
public hotRunnerFallbackToCold!: boolean;
public static shouldUseRetainedWorkspaceMode(buildParameters: BuildParameters) {
return buildParameters.maxRetainedWorkspaces > 0 && Orchestrator.lockedWorkspace !== ``;
@@ -200,12 +201,6 @@ class BuildParameters {
containerRegistryRepository: Input.containerRegistryRepository,
containerRegistryImageVersion: Input.containerRegistryImageVersion,
providerStrategy: OrchestratorOptions.providerStrategy,
fallbackProviderStrategy: OrchestratorOptions.fallbackProviderStrategy,
runnerCheckEnabled: OrchestratorOptions.runnerCheckEnabled,
runnerCheckLabels: OrchestratorOptions.runnerCheckLabels,
runnerCheckMinAvailable: OrchestratorOptions.runnerCheckMinAvailable,
retryOnFallback: OrchestratorOptions.retryOnFallback,
providerInitTimeout: OrchestratorOptions.providerInitTimeout,
buildPlatform: OrchestratorOptions.buildPlatform,
kubeConfig: OrchestratorOptions.kubeConfig,
containerMemory: OrchestratorOptions.containerMemory,
@@ -254,6 +249,13 @@ class BuildParameters {
cacheUnityInstallationOnMac: Input.cacheUnityInstallationOnMac,
unityHubVersionOnMac: Input.unityHubVersionOnMac,
dockerWorkspacePath: Input.dockerWorkspacePath,
hotRunnerEnabled: Input.hotRunnerEnabled,
hotRunnerTransport: Input.hotRunnerTransport,
hotRunnerHost: Input.hotRunnerHost,
hotRunnerPort: Input.hotRunnerPort,
hotRunnerHealthInterval: Input.hotRunnerHealthInterval,
hotRunnerMaxIdle: Input.hotRunnerMaxIdle,
hotRunnerFallbackToCold: Input.hotRunnerFallbackToCold,
};
}

View File

@@ -282,6 +282,38 @@ class Input {
return Input.getInput('skipActivation')?.toLowerCase() ?? 'false';
}
static get hotRunnerEnabled(): boolean {
const input = Input.getInput('hotRunnerEnabled') ?? false;
return input === 'true';
}
static get hotRunnerTransport(): 'websocket' | 'grpc' | 'named-pipe' {
return (Input.getInput('hotRunnerTransport') ?? 'websocket') as 'websocket' | 'grpc' | 'named-pipe';
}
static get hotRunnerHost(): string {
return Input.getInput('hotRunnerHost') ?? 'localhost';
}
static get hotRunnerPort(): number {
return Number.parseInt(Input.getInput('hotRunnerPort') ?? '9090', 10);
}
static get hotRunnerHealthInterval(): number {
return Number.parseInt(Input.getInput('hotRunnerHealthInterval') ?? '30', 10);
}
static get hotRunnerMaxIdle(): number {
return Number.parseInt(Input.getInput('hotRunnerMaxIdle') ?? '3600', 10);
}
static get hotRunnerFallbackToCold(): boolean {
const input = Input.getInput('hotRunnerFallbackToCold') ?? 'true';
return input === 'true';
}
public static ToEnvVarFormat(input: string) {
if (input.toUpperCase() === input) {
return input;

View File

@@ -138,32 +138,6 @@ class OrchestratorOptions {
return provider || 'local';
}
static get fallbackProviderStrategy(): string {
return OrchestratorOptions.getInput('fallbackProviderStrategy') || '';
}
static get runnerCheckEnabled(): boolean {
return OrchestratorOptions.getInput('runnerCheckEnabled') === 'true';
}
static get runnerCheckLabels(): string[] {
const labels = OrchestratorOptions.getInput('runnerCheckLabels');
return labels ? labels.split(',').map((l) => l.trim()) : [];
}
static get runnerCheckMinAvailable(): number {
return Number(OrchestratorOptions.getInput('runnerCheckMinAvailable')) || 1;
}
static get retryOnFallback(): boolean {
return OrchestratorOptions.getInput('retryOnFallback') === 'true';
}
static get providerInitTimeout(): number {
return Number(OrchestratorOptions.getInput('providerInitTimeout')) || 0;
}
static get containerCpu(): string {
return OrchestratorOptions.getInput('containerCpu') || `1024`;
}

View File

@@ -20,7 +20,6 @@ import { FollowLogStreamService } from './services/core/follow-log-stream-servic
import OrchestratorResult from './services/core/orchestrator-result';
import OrchestratorOptions from './options/orchestrator-options';
import ResourceTracking from './services/core/resource-tracking';
import { RunnerAvailabilityService } from './services/core/runner-availability-service';
class Orchestrator {
public static Provider: ProviderInterface;
@@ -77,42 +76,6 @@ class Orchestrator {
private static async setupSelectedBuildPlatform() {
OrchestratorLogger.log(`Orchestrator platform selected ${Orchestrator.buildParameters.providerStrategy}`);
// Check runner availability and apply fallback if needed
if (Orchestrator.buildParameters.runnerCheckEnabled && Orchestrator.buildParameters.fallbackProviderStrategy) {
const owner = OrchestratorOptions.githubOwner;
const repo = OrchestratorOptions.githubRepoName;
const token = Orchestrator.buildParameters.gitPrivateToken || process.env.GITHUB_TOKEN || '';
OrchestratorLogger.log(
`Checking runner availability (labels: [${Orchestrator.buildParameters.runnerCheckLabels.join(', ')}], min: ${
Orchestrator.buildParameters.runnerCheckMinAvailable
})`,
);
const result = await RunnerAvailabilityService.checkAvailability(
owner,
repo,
token,
Orchestrator.buildParameters.runnerCheckLabels,
Orchestrator.buildParameters.runnerCheckMinAvailable,
);
OrchestratorLogger.log(
`Runner check: ${result.totalRunners} total, ${result.matchingRunners} matching, ${result.idleRunners} idle — ${result.reason}`,
);
if (result.shouldFallback) {
const original = Orchestrator.buildParameters.providerStrategy;
const fallback = Orchestrator.buildParameters.fallbackProviderStrategy;
OrchestratorLogger.log(`Falling back from '${original}' to '${fallback}' — ${result.reason}`);
Orchestrator.buildParameters.providerStrategy = fallback;
core.setOutput('providerFallbackUsed', 'true');
core.setOutput('providerFallbackReason', result.reason);
} else {
core.setOutput('providerFallbackUsed', 'false');
}
}
// Detect LocalStack endpoints and handle AWS provider appropriately
// AWS_FORCE_PROVIDER options:
// - 'aws': Force AWS provider (requires LocalStack Pro with ECS support)
@@ -219,30 +182,6 @@ class Orchestrator {
if (baseImage.includes(`undefined`)) {
throw new Error(`baseImage is undefined`);
}
try {
return await Orchestrator.runWithProvider(buildParameters, baseImage);
} catch (primaryError: any) {
// Retry on fallback provider if enabled and a fallback is configured
const fallback = buildParameters.fallbackProviderStrategy;
const alreadyOnFallback = buildParameters.providerStrategy === fallback;
if (buildParameters.retryOnFallback && fallback && !alreadyOnFallback) {
OrchestratorLogger.log(
`Primary provider '${buildParameters.providerStrategy}' failed: ${primaryError.message}`,
);
OrchestratorLogger.log(`Retrying build on fallback provider '${fallback}'...`);
buildParameters.providerStrategy = fallback;
core.setOutput('providerFallbackUsed', 'true');
core.setOutput('providerFallbackReason', `Primary provider failed: ${primaryError.message}`);
return await Orchestrator.runWithProvider(buildParameters, baseImage);
}
throw primaryError;
}
}
private static async runWithProvider(buildParameters: BuildParameters, baseImage: string) {
await Orchestrator.setup(buildParameters);
// When aws-local mode is enabled, validate AWS CloudFormation templates
@@ -250,10 +189,12 @@ class Orchestrator {
if (Orchestrator.validateAwsTemplates) {
await Orchestrator.validateAwsCloudFormationTemplates();
}
// Setup workflow with optional init timeout
await Orchestrator.setupWorkflowWithTimeout();
await Orchestrator.Provider.setupWorkflow(
Orchestrator.buildParameters.buildGuid,
Orchestrator.buildParameters,
Orchestrator.buildParameters.branch,
Orchestrator.defaultSecrets,
);
try {
if (buildParameters.maxRetainedWorkspaces > 0) {
Orchestrator.lockedWorkspace = SharedWorkspaceLocking.NewWorkspaceName();
@@ -334,39 +275,6 @@ class Orchestrator {
}
}
/**
* Runs setupWorkflow with an optional timeout. If providerInitTimeout is set and the
* provider takes longer than that to initialize, throws an error that triggers
* retry-on-fallback (if enabled).
*/
private static async setupWorkflowWithTimeout() {
const timeoutSeconds = Orchestrator.buildParameters.providerInitTimeout;
const setupPromise = Orchestrator.Provider.setupWorkflow(
Orchestrator.buildParameters.buildGuid,
Orchestrator.buildParameters,
Orchestrator.buildParameters.branch,
Orchestrator.defaultSecrets,
);
if (timeoutSeconds <= 0) {
await setupPromise;
return;
}
OrchestratorLogger.log(`Provider init timeout: ${timeoutSeconds}s`);
const timeoutPromise = new Promise<never>((_, reject) => {
setTimeout(
() => reject(new Error(`Provider initialization timed out after ${timeoutSeconds}s`)),
timeoutSeconds * 1000,
);
});
await Promise.race([setupPromise, timeoutPromise]);
}
private static async updateStatusWithBuildParameters() {
const content = { ...Orchestrator.buildParameters };
content.gitPrivateToken = ``;

View File

@@ -0,0 +1,5 @@
# Hot Runner Protocol
Extensible runner registration and persistent Unity editor provider protocol.
See GitHub Issue for full specification.

View File

@@ -1,318 +0,0 @@
import { RunnerAvailabilityService } from './runner-availability-service';
// Mock @octokit/core
jest.mock('@octokit/core', () => ({
Octokit: jest.fn().mockImplementation(() => ({
request: jest.fn(),
})),
}));
jest.mock('./orchestrator-logger', () => ({
__esModule: true,
default: {
log: jest.fn(),
logWarning: jest.fn(),
error: jest.fn(),
},
}));
import { Octokit } from '@octokit/core';
const MockedOctokit = Octokit as jest.MockedClass<typeof Octokit>;
function createMockRunners(runners: Array<{ name: string; status: string; busy: boolean; labels: string[] }>) {
return runners.map((r, i) => ({
id: i + 1,
name: r.name,
status: r.status,
busy: r.busy,
labels: r.labels.map((l) => ({ name: l })),
}));
}
describe('RunnerAvailabilityService', () => {
beforeEach(() => {
jest.clearAllMocks();
});
describe('checkAvailability', () => {
it('should skip check and not fallback when no token is provided', async () => {
const result = await RunnerAvailabilityService.checkAvailability('owner', 'repo', '', [], 1);
expect(result.shouldFallback).toBe(false);
expect(result.reason).toContain('No GitHub token');
});
it('should fallback when no runners are registered', async () => {
const mockRequest = jest.fn().mockResolvedValue({ status: 200, data: { runners: [] } });
MockedOctokit.mockImplementation(() => ({ request: mockRequest } as any));
const result = await RunnerAvailabilityService.checkAvailability('owner', 'repo', 'token', [], 1);
expect(result.shouldFallback).toBe(true);
expect(result.reason).toContain('No runners registered');
expect(result.totalRunners).toBe(0);
});
it('should not fallback when enough idle runners are available', async () => {
const runners = createMockRunners([
{ name: 'runner-1', status: 'online', busy: false, labels: ['self-hosted', 'linux'] },
{ name: 'runner-2', status: 'online', busy: false, labels: ['self-hosted', 'linux'] },
]);
const mockRequest = jest.fn().mockResolvedValue({ status: 200, data: { runners } });
MockedOctokit.mockImplementation(() => ({ request: mockRequest } as any));
const result = await RunnerAvailabilityService.checkAvailability('owner', 'repo', 'token', [], 1);
expect(result.shouldFallback).toBe(false);
expect(result.idleRunners).toBe(2);
expect(result.totalRunners).toBe(2);
});
it('should fallback when all runners are busy', async () => {
const runners = createMockRunners([
{ name: 'runner-1', status: 'online', busy: true, labels: ['self-hosted'] },
{ name: 'runner-2', status: 'online', busy: true, labels: ['self-hosted'] },
]);
const mockRequest = jest.fn().mockResolvedValue({ status: 200, data: { runners } });
MockedOctokit.mockImplementation(() => ({ request: mockRequest } as any));
const result = await RunnerAvailabilityService.checkAvailability('owner', 'repo', 'token', [], 1);
expect(result.shouldFallback).toBe(true);
expect(result.idleRunners).toBe(0);
expect(result.matchingRunners).toBe(2);
});
it('should fallback when all runners are offline', async () => {
const runners = createMockRunners([
{ name: 'runner-1', status: 'offline', busy: false, labels: ['self-hosted'] },
]);
const mockRequest = jest.fn().mockResolvedValue({ status: 200, data: { runners } });
MockedOctokit.mockImplementation(() => ({ request: mockRequest } as any));
const result = await RunnerAvailabilityService.checkAvailability('owner', 'repo', 'token', [], 1);
expect(result.shouldFallback).toBe(true);
expect(result.idleRunners).toBe(0);
});
it('should filter runners by required labels', async () => {
const runners = createMockRunners([
{ name: 'linux-runner', status: 'online', busy: false, labels: ['self-hosted', 'linux'] },
{ name: 'windows-runner', status: 'online', busy: false, labels: ['self-hosted', 'windows'] },
]);
const mockRequest = jest.fn().mockResolvedValue({ status: 200, data: { runners } });
MockedOctokit.mockImplementation(() => ({ request: mockRequest } as any));
const result = await RunnerAvailabilityService.checkAvailability(
'owner',
'repo',
'token',
['self-hosted', 'linux'],
1,
);
expect(result.shouldFallback).toBe(false);
expect(result.matchingRunners).toBe(1);
expect(result.idleRunners).toBe(1);
expect(result.totalRunners).toBe(2);
});
it('should fallback when no runners match required labels', async () => {
const runners = createMockRunners([
{ name: 'windows-runner', status: 'online', busy: false, labels: ['self-hosted', 'windows'] },
]);
const mockRequest = jest.fn().mockResolvedValue({ status: 200, data: { runners } });
MockedOctokit.mockImplementation(() => ({ request: mockRequest } as any));
const result = await RunnerAvailabilityService.checkAvailability(
'owner',
'repo',
'token',
['self-hosted', 'linux'],
1,
);
expect(result.shouldFallback).toBe(true);
expect(result.matchingRunners).toBe(0);
expect(result.idleRunners).toBe(0);
});
it('should respect minAvailable threshold', async () => {
const runners = createMockRunners([{ name: 'runner-1', status: 'online', busy: false, labels: ['self-hosted'] }]);
const mockRequest = jest.fn().mockResolvedValue({ status: 200, data: { runners } });
MockedOctokit.mockImplementation(() => ({ request: mockRequest } as any));
// Need 2, have 1 — should fallback
const result = await RunnerAvailabilityService.checkAvailability('owner', 'repo', 'token', [], 2);
expect(result.shouldFallback).toBe(true);
expect(result.idleRunners).toBe(1);
});
it('should be case-insensitive for label matching', async () => {
const runners = createMockRunners([
{ name: 'runner-1', status: 'online', busy: false, labels: ['Self-Hosted', 'Linux'] },
]);
const mockRequest = jest.fn().mockResolvedValue({ status: 200, data: { runners } });
MockedOctokit.mockImplementation(() => ({ request: mockRequest } as any));
const result = await RunnerAvailabilityService.checkAvailability(
'owner',
'repo',
'token',
['self-hosted', 'linux'],
1,
);
expect(result.shouldFallback).toBe(false);
expect(result.matchingRunners).toBe(1);
});
it('should not fallback on API error (fail-open)', async () => {
const mockRequest = jest.fn().mockRejectedValue(new Error('403 Forbidden'));
MockedOctokit.mockImplementation(() => ({ request: mockRequest } as any));
const result = await RunnerAvailabilityService.checkAvailability('owner', 'repo', 'token', [], 1);
expect(result.shouldFallback).toBe(false);
expect(result.reason).toContain('Runner check failed');
});
it('should count only online+idle runners', async () => {
const runners = createMockRunners([
{ name: 'idle', status: 'online', busy: false, labels: ['self-hosted'] },
{ name: 'busy', status: 'online', busy: true, labels: ['self-hosted'] },
{ name: 'offline', status: 'offline', busy: false, labels: ['self-hosted'] },
]);
const mockRequest = jest.fn().mockResolvedValue({ status: 200, data: { runners } });
MockedOctokit.mockImplementation(() => ({ request: mockRequest } as any));
const result = await RunnerAvailabilityService.checkAvailability('owner', 'repo', 'token', [], 1);
expect(result.shouldFallback).toBe(false);
expect(result.totalRunners).toBe(3);
expect(result.matchingRunners).toBe(3);
expect(result.idleRunners).toBe(1);
});
});
describe('pagination limits', () => {
it('should stop paginating after reaching the page limit', async () => {
// Return full pages (100 runners each) to force continued pagination
let callCount = 0;
const mockRequest = jest.fn().mockImplementation(() => {
callCount++;
const runners = createMockRunners(
Array.from({ length: 100 }, (_, i) => ({
name: `runner-${callCount}-${i}`,
status: 'online' as const,
busy: false,
labels: ['self-hosted'],
})),
);
return Promise.resolve({ status: 200, data: { runners } });
});
MockedOctokit.mockImplementation(() => ({ request: mockRequest } as any));
const result = await RunnerAvailabilityService.checkAvailability('owner', 'repo', 'token', [], 1);
// Should have called at most 100 pages (the MAX_PAGINATION_PAGES limit)
expect(mockRequest).toHaveBeenCalledTimes(100);
// Should still have runners from the pages it did fetch
expect(result.totalRunners).toBe(10000);
expect(result.shouldFallback).toBe(false);
});
it('should stop paginating on rate limit (HTTP 403)', async () => {
let callCount = 0;
const mockRequest = jest.fn().mockImplementation(() => {
callCount++;
if (callCount === 2) {
// Octokit throws for non-2xx responses
const error: any = new Error('API rate limit exceeded');
error.status = 403;
error.response = {
status: 403,
headers: { 'x-ratelimit-reset': String(Math.floor(Date.now() / 1000) + 3600) },
};
return Promise.reject(error);
}
const runners = createMockRunners(
Array.from({ length: 100 }, (_, i) => ({
name: `runner-${i}`,
status: 'online' as const,
busy: false,
labels: ['self-hosted'],
})),
);
return Promise.resolve({ status: 200, data: { runners } });
});
MockedOctokit.mockImplementation(() => ({ request: mockRequest } as any));
const result = await RunnerAvailabilityService.checkAvailability('owner', 'repo', 'token', [], 1);
// Should have stopped at page 2 (rate limited)
expect(mockRequest).toHaveBeenCalledTimes(2);
// Should use the 100 runners from the first page
expect(result.totalRunners).toBe(100);
expect(result.shouldFallback).toBe(false);
});
it('should stop paginating on rate limit (HTTP 429)', async () => {
let callCount = 0;
const mockRequest = jest.fn().mockImplementation(() => {
callCount++;
if (callCount === 1) {
// Octokit throws for non-2xx responses
const error: any = new Error('Too Many Requests');
error.status = 429;
error.response = { status: 429, headers: {} };
return Promise.reject(error);
}
return Promise.resolve({ status: 200, data: { runners: [] } });
});
MockedOctokit.mockImplementation(() => ({ request: mockRequest } as any));
const result = await RunnerAvailabilityService.checkAvailability('owner', 'repo', 'token', [], 1);
// Should have stopped at first page (rate limited immediately)
expect(mockRequest).toHaveBeenCalledTimes(1);
// No runners found — should fallback
expect(result.totalRunners).toBe(0);
expect(result.shouldFallback).toBe(true);
});
it('should handle pagination timeout gracefully', async () => {
// Mock Date.now to simulate timeout
const originalDateNow = Date.now;
let callCount = 0;
const mockRequest = jest.fn().mockImplementation(() => {
callCount++;
// After first call, advance time past the timeout
if (callCount >= 2) {
Date.now = jest.fn(() => originalDateNow() + 31_000);
}
const runners = createMockRunners(
Array.from({ length: 100 }, (_, i) => ({
name: `runner-${callCount}-${i}`,
status: 'online' as const,
busy: false,
labels: ['self-hosted'],
})),
);
return Promise.resolve({ status: 200, data: { runners } });
});
MockedOctokit.mockImplementation(() => ({ request: mockRequest } as any));
const result = await RunnerAvailabilityService.checkAvailability('owner', 'repo', 'token', [], 1);
// Should have stopped after timeout was detected (2 pages: first succeeds, second triggers timeout check)
expect(mockRequest.mock.calls.length).toBeLessThanOrEqual(3);
// Should have runners from pages fetched before timeout
expect(result.totalRunners).toBeGreaterThan(0);
// Restore
Date.now = originalDateNow;
});
});
});

View File

@@ -1,205 +0,0 @@
import { Octokit } from '@octokit/core';
import OrchestratorLogger from './orchestrator-logger';
interface GitHubRunner {
id: number;
name: string;
status: 'online' | 'offline';
busy: boolean;
labels: Array<{ name: string }>;
}
interface RunnerCheckResult {
shouldFallback: boolean;
reason: string;
totalRunners: number;
matchingRunners: number;
idleRunners: number;
}
/**
* Maximum number of pages to fetch when paginating through GitHub API results.
* 100 pages * 100 per page = 10,000 runners maximum.
*/
const MAX_PAGINATION_PAGES = 100;
/**
* Total timeout in milliseconds for the pagination loop.
* Prevents indefinite API calls if GitHub is slow or pagination is unexpectedly deep.
*/
const PAGINATION_TIMEOUT_MS = 30_000;
/**
* Checks GitHub Actions runner availability to support automatic provider fallback.
*
* When a user configures `runnerCheckEnabled: true` with a `fallbackProviderStrategy`,
* this service queries the GitHub API for runner status before the build starts.
* If insufficient runners are available, the orchestrator routes to the fallback provider.
*/
export class RunnerAvailabilityService {
/**
* Check if enough runners are available to handle the build.
*
* @param owner - GitHub repository owner
* @param repo - GitHub repository name
* @param token - GitHub token with repo/actions scope
* @param requiredLabels - Labels runners must have (empty = any runner)
* @param minAvailable - Minimum idle runners required
* @returns RunnerCheckResult with decision and diagnostics
*/
static async checkAvailability(
owner: string,
repo: string,
token: string,
requiredLabels: string[],
minAvailable: number,
): Promise<RunnerCheckResult> {
if (!token) {
return {
shouldFallback: false,
reason: 'No GitHub token available — skipping runner check',
totalRunners: 0,
matchingRunners: 0,
idleRunners: 0,
};
}
try {
const octokit = new Octokit({ auth: token });
// Fetch all runners for the repository
const runners = await RunnerAvailabilityService.fetchRunners(octokit, owner, repo);
if (runners.length === 0) {
return {
shouldFallback: true,
reason: 'No runners registered for this repository',
totalRunners: 0,
matchingRunners: 0,
idleRunners: 0,
};
}
// Filter by required labels
const matching = RunnerAvailabilityService.filterByLabels(runners, requiredLabels);
// Count idle (online + not busy)
const idle = matching.filter((r) => r.status === 'online' && !r.busy);
const result: RunnerCheckResult = {
shouldFallback: idle.length < minAvailable,
reason:
idle.length >= minAvailable
? `${idle.length} idle runner(s) available (need ${minAvailable})`
: `Only ${idle.length} idle runner(s) available, need ${minAvailable}`,
totalRunners: runners.length,
matchingRunners: matching.length,
idleRunners: idle.length,
};
return result;
} catch (error: any) {
// If the API call fails (permissions, rate limit, etc.), don't block the build
OrchestratorLogger.log(`Runner availability check failed: ${error.message}`);
return {
shouldFallback: false,
reason: `Runner check failed (${error.message}) — proceeding with primary provider`,
totalRunners: 0,
matchingRunners: 0,
idleRunners: 0,
};
}
}
/**
* Fetch all runners for a repository, handling pagination.
*
* Includes defensive limits:
* - Maximum page count (MAX_PAGINATION_PAGES) to prevent infinite loops
* - Total timeout (PAGINATION_TIMEOUT_MS) to prevent indefinite API calls
* - Rate-limit detection (HTTP 403/429 with X-RateLimit-Remaining header)
*/
private static async fetchRunners(octokit: Octokit, owner: string, repo: string): Promise<GitHubRunner[]> {
const allRunners: GitHubRunner[] = [];
let page = 1;
const perPage = 100;
const startTime = Date.now();
while (page <= MAX_PAGINATION_PAGES) {
// Check total timeout
if (Date.now() - startTime > PAGINATION_TIMEOUT_MS) {
OrchestratorLogger.logWarning(
`[RunnerAvailability] Pagination timeout reached after ${page - 1} pages and ${Date.now() - startTime}ms. ` +
`Using ${allRunners.length} runners found so far.`,
);
break;
}
let response: any;
try {
response = await octokit.request('GET /repos/{owner}/{repo}/actions/runners', {
owner,
repo,
per_page: perPage,
page,
});
} catch (requestError: any) {
// Octokit throws for non-2xx responses. Check if this is a rate limit error.
const status = requestError.status ?? requestError.response?.status;
if (status === 403 || status === 429) {
const resetTime =
requestError.response?.headers?.['x-ratelimit-reset'] ?? requestError.headers?.['x-ratelimit-reset'];
const resetMessage = resetTime
? ` Resets at ${new Date(Number.parseInt(String(resetTime), 10) * 1000).toISOString()}`
: '';
OrchestratorLogger.logWarning(
`[RunnerAvailability] GitHub API rate limit reached (HTTP ${status}).${resetMessage} ` +
`Using ${allRunners.length} runners found so far.`,
);
break;
}
// Re-throw non-rate-limit errors to be handled by the outer catch
throw requestError;
}
const runners = (response.data.runners || []) as GitHubRunner[];
allRunners.push(...runners);
if (runners.length < perPage) break;
page++;
}
if (page > MAX_PAGINATION_PAGES) {
OrchestratorLogger.logWarning(
`[RunnerAvailability] Maximum pagination limit reached (${MAX_PAGINATION_PAGES} pages). ` +
`Using ${allRunners.length} runners found so far.`,
);
}
if (allRunners.length === 0) {
OrchestratorLogger.log(
'[RunnerAvailability] No runners found. Possible causes: ' +
'wrong token permissions (needs repo or actions scope), ' +
'no self-hosted runners registered, ' +
'or runners are registered at the organization level instead of the repository.',
);
}
return allRunners;
}
/**
* Filter runners by required labels. A runner matches if it has ALL required labels.
* If requiredLabels is empty, all runners match.
*/
private static filterByLabels(runners: GitHubRunner[], requiredLabels: string[]): GitHubRunner[] {
if (requiredLabels.length === 0) return runners;
return runners.filter((runner) => {
const runnerLabelNames = runner.labels.map((l) => l.name.toLowerCase());
return requiredLabels.every((required) => runnerLabelNames.includes(required.toLowerCase()));
});
}
}

View File

@@ -0,0 +1,159 @@
import OrchestratorLogger from '../core/orchestrator-logger';
import { HotRunnerRegistry } from './hot-runner-registry';
import { HotRunnerJobRequest, HotRunnerJobResult, HotRunnerStatus, HotRunnerTransport } from './hot-runner-types';
const POLL_INTERVAL_MS = 1000;
// eslint-disable-next-line no-unused-vars
export type OutputCallback = (output: string) => void;
export class HotRunnerDispatcher {
private transports: Map<string, HotRunnerTransport>;
constructor(transports: Map<string, HotRunnerTransport>) {
this.transports = transports;
}
/**
* Dispatch a job to an available hot runner matching the request's build target.
* If no runner is immediately available, waits up to the request timeout.
* Returns the job result, or throws if no runner becomes available in time.
*/
async dispatchJob(
request: HotRunnerJobRequest,
registry: HotRunnerRegistry,
unityVersion: string,
onOutput?: OutputCallback,
): Promise<HotRunnerJobResult> {
OrchestratorLogger.log(`[HotRunner] Dispatching job ${request.jobId} (target: ${request.buildTarget})`);
// Find or wait for an available runner
let runner = registry.findAvailableRunner({
unityVersion,
platform: request.buildTarget,
});
if (!runner) {
OrchestratorLogger.log(
`[HotRunner] No idle runner available for ${unityVersion}/${request.buildTarget}, waiting...`,
);
runner = await this.waitForRunner({ unityVersion, platform: request.buildTarget }, request.timeout, registry);
}
// Mark runner as busy
registry.updateRunner(runner.id, {
state: 'busy',
currentJob: request.jobId,
});
const transport = this.transports.get(runner.id);
if (!transport) {
registry.updateRunner(runner.id, { state: 'idle', currentJob: undefined });
throw new Error(`[HotRunner] No transport available for runner ${runner.id}`);
}
OrchestratorLogger.log(`[HotRunner] Sending job ${request.jobId} to runner ${runner.id}`);
const startTime = Date.now();
try {
const result = await this.executeWithTimeout(transport, request);
const duration = Date.now() - startTime;
OrchestratorLogger.log(
`[HotRunner] Job ${request.jobId} completed on runner ${runner.id} in ${duration}ms (exit: ${result.exitCode})`,
);
if (onOutput && result.output) {
onOutput(result.output);
}
// Mark runner as idle and increment job count
const currentStatus = registry.getRunner(runner.id);
registry.updateRunner(runner.id, {
state: 'idle',
currentJob: undefined,
lastJobCompleted: request.jobId,
jobsCompleted: (currentStatus?.jobsCompleted ?? 0) + 1,
});
return result;
} catch (error: any) {
OrchestratorLogger.logWarning(`[HotRunner] Job ${request.jobId} failed on runner ${runner.id}: ${error.message}`);
// Mark runner as idle despite failure -- the health monitor will recycle if needed
registry.updateRunner(runner.id, {
state: 'idle',
currentJob: undefined,
});
throw error;
}
}
/**
* Wait for an available runner matching the requirements.
* Polls the registry at a fixed interval until one becomes available or timeout expires.
*/
async waitForRunner(
requirements: { unityVersion: string; platform: string },
timeoutMs: number,
registry: HotRunnerRegistry,
): Promise<HotRunnerStatus> {
const deadline = Date.now() + timeoutMs;
while (Date.now() < deadline) {
const runner = registry.findAvailableRunner(requirements);
if (runner) {
OrchestratorLogger.log(`[HotRunner] Runner ${runner.id} became available`);
return runner;
}
await this.sleep(Math.min(POLL_INTERVAL_MS, deadline - Date.now()));
}
throw new Error(
`[HotRunner] Timed out waiting for available runner (${requirements.unityVersion}/${requirements.platform}) after ${timeoutMs}ms`,
);
}
/**
* Execute a job on a transport with a timeout guard.
* On timeout, disconnects the transport to release the connection
* and prevent the orphaned sendJob promise from holding resources.
*/
private async executeWithTimeout(
transport: HotRunnerTransport,
request: HotRunnerJobRequest,
): Promise<HotRunnerJobResult> {
const TIMEOUT_SENTINEL = Symbol('timeout');
const timeoutPromise = new Promise<typeof TIMEOUT_SENTINEL>((resolve) => {
setTimeout(() => {
resolve(TIMEOUT_SENTINEL);
}, request.timeout);
});
const result = await Promise.race([transport.sendJob(request), timeoutPromise]);
if (result === TIMEOUT_SENTINEL) {
// Disconnect the transport to clean up the orphaned sendJob call
try {
await transport.disconnect();
} catch (disconnectError: any) {
OrchestratorLogger.logWarning(
`[HotRunner] Error disconnecting transport after timeout for job ${request.jobId}: ${disconnectError.message}`,
);
}
throw new Error(`[HotRunner] Job ${request.jobId} timed out after ${request.timeout}ms`);
}
return result;
}
private sleep(ms: number): Promise<void> {
return new Promise((resolve) => setTimeout(resolve, ms));
}
}

View File

@@ -0,0 +1,186 @@
import OrchestratorLogger from '../core/orchestrator-logger';
import { HotRunnerRegistry } from './hot-runner-registry';
import { HotRunnerTransport } from './hot-runner-types';
export class HotRunnerHealthMonitor {
private intervalHandle: ReturnType<typeof setInterval> | undefined;
private registry: HotRunnerRegistry | undefined;
private transports: Map<string, HotRunnerTransport> = new Map();
/**
* Start periodic health monitoring for all registered runners.
*/
startMonitoring(registry: HotRunnerRegistry, interval: number, transports: Map<string, HotRunnerTransport>): void {
if (this.intervalHandle) {
this.stopMonitoring();
}
this.registry = registry;
this.transports = transports;
OrchestratorLogger.log(`[HotRunner] Starting health monitoring (interval: ${interval}s)`);
this.intervalHandle = setInterval(() => {
this.runHealthChecks().catch((error: any) => {
OrchestratorLogger.logWarning(`[HotRunner] Health check cycle failed: ${error.message}`);
});
}, interval * 1000);
}
/**
* Stop periodic health monitoring.
*/
stopMonitoring(): void {
if (this.intervalHandle) {
clearInterval(this.intervalHandle);
this.intervalHandle = undefined;
OrchestratorLogger.log(`[HotRunner] Health monitoring stopped`);
}
}
/**
* Check health of a specific runner by ID. Returns true if healthy.
*/
async checkHealth(runnerId: string): Promise<boolean> {
if (!this.registry) {
return false;
}
const transport = this.transports.get(runnerId);
if (!transport) {
OrchestratorLogger.logWarning(`[HotRunner] No transport for runner ${runnerId}`);
this.registry.updateRunner(runnerId, {
state: 'unhealthy',
lastHealthCheck: new Date().toISOString(),
});
return false;
}
try {
const healthy = await transport.healthCheck();
if (healthy) {
const status = await transport.getStatus();
this.registry.updateRunner(runnerId, {
lastHealthCheck: new Date().toISOString(),
memoryUsageMB: status.memoryUsageMB,
uptime: status.uptime,
libraryHash: status.libraryHash,
});
return true;
}
OrchestratorLogger.logWarning(`[HotRunner] Runner ${runnerId} health check returned false`);
this.registry.updateRunner(runnerId, {
state: 'unhealthy',
lastHealthCheck: new Date().toISOString(),
});
return false;
} catch (error: any) {
OrchestratorLogger.logWarning(`[HotRunner] Runner ${runnerId} health check failed: ${error.message}`);
this.registry.updateRunner(runnerId, {
state: 'unhealthy',
lastHealthCheck: new Date().toISOString(),
});
return false;
}
}
/**
* Mark an unhealthy runner for cleanup and disconnect its transport.
*/
async recycleUnhealthyRunner(runnerId: string): Promise<void> {
if (!this.registry) {
return;
}
OrchestratorLogger.log(`[HotRunner] Recycling unhealthy runner ${runnerId}`);
this.registry.updateRunner(runnerId, { state: 'stopping' });
const transport = this.transports.get(runnerId);
if (transport) {
try {
await transport.disconnect();
} catch (error: any) {
OrchestratorLogger.logWarning(`[HotRunner] Error disconnecting runner ${runnerId}: ${error.message}`);
}
this.transports.delete(runnerId);
}
this.registry.unregisterRunner(runnerId);
OrchestratorLogger.log(`[HotRunner] Runner ${runnerId} recycled and removed`);
}
/**
* Recycle a runner that has been idle longer than the maximum idle time.
*/
async recycleIdleRunner(runnerId: string, maxIdleTime: number): Promise<void> {
if (!this.registry) {
return;
}
const runner = this.registry.getRunner(runnerId);
if (!runner || runner.state !== 'idle') {
return;
}
const lastCheckTime = new Date(runner.lastHealthCheck).getTime();
const now = Date.now();
const idleSeconds = (now - lastCheckTime) / 1000;
if (idleSeconds >= maxIdleTime) {
OrchestratorLogger.log(
`[HotRunner] Runner ${runnerId} idle for ${Math.floor(idleSeconds)}s (max: ${maxIdleTime}s), recycling`,
);
await this.recycleUnhealthyRunner(runnerId);
}
}
/**
* Run health checks and idle-recycle checks for all registered runners.
*/
private async runHealthChecks(): Promise<void> {
if (!this.registry) {
return;
}
const runners = this.registry.listRunners();
for (const runner of runners) {
if (runner.state === 'stopping') {
continue;
}
const healthy = await this.checkHealth(runner.id);
if (!healthy && runner.state !== 'starting') {
await this.recycleUnhealthyRunner(runner.id);
continue;
}
// Check for idle timeout
const config = this.registry.getConfig(runner.id);
if (config && runner.state === 'idle') {
await this.recycleIdleRunner(runner.id, config.maxIdleTime);
}
// Check for max jobs before recycle
if (config && config.maxJobsBeforeRecycle > 0 && runner.jobsCompleted >= config.maxJobsBeforeRecycle) {
OrchestratorLogger.log(
`[HotRunner] Runner ${runner.id} reached max jobs (${runner.jobsCompleted}/${config.maxJobsBeforeRecycle}), recycling`,
);
await this.recycleUnhealthyRunner(runner.id);
}
}
}
/**
* Whether health monitoring is currently active.
*/
get isMonitoring(): boolean {
return this.intervalHandle !== undefined;
}
}

View File

@@ -0,0 +1,315 @@
import fs from 'node:fs';
import path from 'node:path';
import { customAlphabet } from 'nanoid';
import OrchestratorLogger from '../core/orchestrator-logger';
import { HotRunnerConfig, HotRunnerStatus } from './hot-runner-types';
const generateId = customAlphabet('abcdefghijklmnopqrstuvwxyz0123456789', 12);
const PERSISTENCE_FILENAME = 'hot-runners.json';
const VALID_RUNNER_STATES: ReadonlySet<string> = new Set(['idle', 'busy', 'starting', 'stopping', 'unhealthy']);
export interface HotRunnerFilter {
platform?: string;
state?: string;
unityVersion?: string;
}
/**
* Validate that a restored runner entry has all required fields with correct types.
* Returns true if the entry is a valid HotRunnerStatus, false otherwise.
*/
function isValidRunnerStatus(entry: unknown): entry is HotRunnerStatus {
if (typeof entry !== 'object' || entry === null) {
return false;
}
const record = entry as Record<string, unknown>;
return (
typeof record.id === 'string' &&
record.id.length > 0 &&
typeof record.state === 'string' &&
VALID_RUNNER_STATES.has(record.state) &&
typeof record.unityVersion === 'string' &&
typeof record.platform === 'string' &&
typeof record.uptime === 'number' &&
typeof record.jobsCompleted === 'number' &&
typeof record.lastHealthCheck === 'string' &&
typeof record.memoryUsageMB === 'number'
);
}
/**
* Validate that a restored config entry has all required fields with correct types.
* Returns true if the entry is a valid HotRunnerConfig, false otherwise.
*/
function isValidRunnerConfig(entry: unknown): entry is HotRunnerConfig {
if (typeof entry !== 'object' || entry === null) {
return false;
}
const record = entry as Record<string, unknown>;
return (
typeof record.enabled === 'boolean' &&
typeof record.transport === 'string' &&
['websocket', 'grpc', 'named-pipe'].includes(record.transport) &&
typeof record.host === 'string' &&
typeof record.port === 'number' &&
typeof record.healthCheckInterval === 'number' &&
typeof record.maxIdleTime === 'number' &&
typeof record.maxJobsBeforeRecycle === 'number'
);
}
export class HotRunnerRegistry {
private runners: Map<string, HotRunnerStatus> = new Map();
private configs: Map<string, HotRunnerConfig> = new Map();
private persistencePath: string;
constructor(persistenceDirectory?: string) {
this.persistencePath = persistenceDirectory ? path.join(persistenceDirectory, PERSISTENCE_FILENAME) : '';
}
/**
* Register a new hot runner. Returns the generated runner ID.
*/
registerRunner(config: HotRunnerConfig): string {
const id = `hr-${generateId()}`;
const status: HotRunnerStatus = {
id,
state: 'starting',
unityVersion: config.unityVersion ?? 'unknown',
platform: config.platform ?? 'unknown',
uptime: 0,
jobsCompleted: 0,
lastHealthCheck: new Date().toISOString(),
memoryUsageMB: 0,
};
this.runners.set(id, status);
this.configs.set(id, config);
OrchestratorLogger.log(`[HotRunner] Registered runner ${id} (${status.unityVersion}/${status.platform})`);
this.persist();
return id;
}
/**
* Remove a runner from the registry.
*/
unregisterRunner(id: string): void {
const existed = this.runners.delete(id);
this.configs.delete(id);
if (existed) {
OrchestratorLogger.log(`[HotRunner] Unregistered runner ${id}`);
this.persist();
}
}
/**
* Get a runner's current status by ID.
*/
getRunner(id: string): HotRunnerStatus | undefined {
return this.runners.get(id);
}
/**
* Get a runner's config by ID.
*/
getConfig(id: string): HotRunnerConfig | undefined {
return this.configs.get(id);
}
/**
* List all runners, optionally filtered by platform, state, or Unity version.
*/
listRunners(filter?: HotRunnerFilter): HotRunnerStatus[] {
let results = [...this.runners.values()];
if (filter?.platform) {
results = results.filter((runner) => runner.platform === filter.platform);
}
if (filter?.state) {
results = results.filter((runner) => runner.state === filter.state);
}
if (filter?.unityVersion) {
results = results.filter((runner) => runner.unityVersion === filter.unityVersion);
}
return results;
}
/**
* Find an idle runner matching the given Unity version and platform requirements.
*/
findAvailableRunner(requirements: { unityVersion: string; platform: string }): HotRunnerStatus | undefined {
return this.listRunners({
state: 'idle',
unityVersion: requirements.unityVersion,
platform: requirements.platform,
})[0];
}
/**
* Update a runner's status fields. Merges partial updates into existing status.
*/
updateRunner(id: string, update: Partial<HotRunnerStatus>): void {
const existing = this.runners.get(id);
if (!existing) {
return;
}
this.runners.set(id, { ...existing, ...update, id });
this.persist();
}
/**
* Get the total number of registered runners.
*/
get size(): number {
return this.runners.size;
}
/**
* Validate all runners in the registry and reset invalid ones to 'unhealthy'.
* Returns the number of runners that were repaired.
*/
validateAndRepair(): number {
let repaired = 0;
for (const [id, status] of this.runners) {
// Cast to unknown to bypass the type guard narrowing to 'never',
// since the Map is typed as HotRunnerStatus but entries may have
// been corrupted via direct deserialization or unsafe casts.
const entry = status as unknown as Record<string, unknown>;
if (!isValidRunnerStatus(entry)) {
OrchestratorLogger.logWarning(`[HotRunner] Runner ${id} has invalid state, marking as unhealthy`);
this.runners.set(id, {
id,
state: 'unhealthy',
unityVersion: typeof entry.unityVersion === 'string' ? entry.unityVersion : 'unknown',
platform: typeof entry.platform === 'string' ? entry.platform : 'unknown',
uptime: typeof entry.uptime === 'number' ? entry.uptime : 0,
jobsCompleted: typeof entry.jobsCompleted === 'number' ? entry.jobsCompleted : 0,
lastHealthCheck: typeof entry.lastHealthCheck === 'string' ? entry.lastHealthCheck : new Date().toISOString(),
memoryUsageMB: typeof entry.memoryUsageMB === 'number' ? entry.memoryUsageMB : 0,
});
repaired++;
}
}
if (repaired > 0) {
this.persist();
}
return repaired;
}
/**
* Persist current registry state to disk for crash recovery.
* Validates data before writing to prevent persisting corrupt state.
*/
private persist(): void {
if (!this.persistencePath) {
return;
}
try {
// Validate data before persisting
for (const [id, status] of this.runners) {
if (!isValidRunnerStatus(status)) {
OrchestratorLogger.logWarning(`[HotRunner] Skipping persistence -- runner ${id} has invalid state`);
return;
}
}
const data = {
runners: Object.fromEntries(this.runners),
configs: Object.fromEntries(this.configs),
};
const directory = path.dirname(this.persistencePath);
if (!fs.existsSync(directory)) {
fs.mkdirSync(directory, { recursive: true });
}
fs.writeFileSync(this.persistencePath, JSON.stringify(data, undefined, 2));
} catch (error: any) {
OrchestratorLogger.logWarning(`[HotRunner] Failed to persist registry: ${error.message}`);
}
}
/**
* Load registry state from disk. Returns the number of runners restored.
* Validates each restored entry and discards corrupt entries with warnings.
* If the persistence file itself is corrupt (invalid JSON), starts with
* an empty registry.
*/
loadFromDisk(): number {
if (!this.persistencePath || !fs.existsSync(this.persistencePath)) {
return 0;
}
let data: any;
try {
const raw = fs.readFileSync(this.persistencePath, 'utf8');
data = JSON.parse(raw);
} catch (error: any) {
OrchestratorLogger.logWarning(
`[HotRunner] Persistence file is corrupt, starting with empty registry: ${error.message}`,
);
return 0;
}
if (typeof data !== 'object' || data === null) {
OrchestratorLogger.logWarning('[HotRunner] Persistence file has invalid structure, starting with empty registry');
return 0;
}
let discarded = 0;
if (data.runners && typeof data.runners === 'object') {
for (const [id, status] of Object.entries(data.runners)) {
if (isValidRunnerStatus(status)) {
this.runners.set(id, status);
} else {
OrchestratorLogger.logWarning(`[HotRunner] Discarding invalid runner entry '${id}' from persistence file`);
discarded++;
}
}
}
if (data.configs && typeof data.configs === 'object') {
for (const [id, config] of Object.entries(data.configs)) {
// Only restore configs for runners that were successfully restored
if (this.runners.has(id)) {
if (isValidRunnerConfig(config)) {
this.configs.set(id, config);
} else {
OrchestratorLogger.logWarning(`[HotRunner] Discarding invalid config entry '${id}' from persistence file`);
}
}
}
}
if (discarded > 0) {
OrchestratorLogger.logWarning(`[HotRunner] Discarded ${discarded} invalid runner(s) from persistence file`);
}
OrchestratorLogger.log(`[HotRunner] Restored ${this.runners.size} runner(s) from disk`);
return this.runners.size;
}
}

View File

@@ -0,0 +1,166 @@
import OrchestratorLogger from '../core/orchestrator-logger';
import { HotRunnerRegistry } from './hot-runner-registry';
import { HotRunnerHealthMonitor } from './hot-runner-health-monitor';
import { HotRunnerDispatcher, OutputCallback } from './hot-runner-dispatcher';
import {
HotRunnerConfig,
HotRunnerJobRequest,
HotRunnerJobResult,
HotRunnerStatus,
HotRunnerTransport,
} from './hot-runner-types';
import BuildParameters from '../../../build-parameters';
export class HotRunnerService {
private registry: HotRunnerRegistry;
private healthMonitor: HotRunnerHealthMonitor;
private dispatcher: HotRunnerDispatcher;
private transports: Map<string, HotRunnerTransport> = new Map();
private config: HotRunnerConfig | undefined;
constructor(persistenceDirectory?: string) {
this.registry = new HotRunnerRegistry(persistenceDirectory);
this.healthMonitor = new HotRunnerHealthMonitor();
this.dispatcher = new HotRunnerDispatcher(this.transports);
}
/**
* Initialize the hot runner service: load persisted state, start health monitoring.
*/
async initialize(config: HotRunnerConfig): Promise<void> {
this.config = config;
OrchestratorLogger.log(
`[HotRunner] Initializing service (transport: ${config.transport}, ${config.host}:${config.port})`,
);
// Attempt to restore previously registered runners from disk
const restored = this.registry.loadFromDisk();
if (restored > 0) {
OrchestratorLogger.log(`[HotRunner] Restored ${restored} runner(s) from persistence`);
}
// Start health monitoring
this.healthMonitor.startMonitoring(this.registry, config.healthCheckInterval, this.transports);
OrchestratorLogger.log(`[HotRunner] Service initialized`);
}
/**
* Register a runner with a transport implementation.
* Returns the runner ID.
*/
registerRunner(config: HotRunnerConfig, transport: HotRunnerTransport): string {
const id = this.registry.registerRunner(config);
this.transports.set(id, transport);
return id;
}
/**
* Submit a build job to an available hot runner.
* Converts BuildParameters to a HotRunnerJobRequest and dispatches.
*/
async submitBuild(params: BuildParameters, onOutput?: OutputCallback): Promise<HotRunnerJobResult> {
const request: HotRunnerJobRequest = {
jobId: params.buildGuid || `build-${Date.now()}`,
buildMethod: params.buildMethod || undefined,
buildTarget: params.targetPlatform,
buildPath: params.buildPath,
customParameters: params.customParameters ? this.parseCustomParameters(params.customParameters) : undefined,
timeout: 30 * 60 * 1000, // 30 minutes default
};
OrchestratorLogger.log(`[HotRunner] Submitting build: ${request.jobId} (target: ${request.buildTarget})`);
return this.dispatcher.dispatchJob(request, this.registry, params.editorVersion, onOutput);
}
/**
* Submit a test job to an available hot runner.
* Converts BuildParameters and optional suite config to a test-mode HotRunnerJobRequest.
*/
async submitTest(
params: BuildParameters,
suiteConfig?: { testMode?: 'editmode' | 'playmode'; testSuitePath?: string },
onOutput?: OutputCallback,
): Promise<HotRunnerJobResult> {
const request: HotRunnerJobRequest = {
jobId: params.buildGuid || `test-${Date.now()}`,
buildTarget: params.targetPlatform,
customParameters: params.customParameters ? this.parseCustomParameters(params.customParameters) : undefined,
timeout: 30 * 60 * 1000, // 30 minutes default
testMode: suiteConfig?.testMode ?? 'editmode',
testSuitePath: suiteConfig?.testSuitePath,
};
OrchestratorLogger.log(`[HotRunner] Submitting test: ${request.jobId} (mode: ${request.testMode})`);
return this.dispatcher.dispatchJob(request, this.registry, params.editorVersion, onOutput);
}
/**
* Shut down the service: stop health monitoring, disconnect all transports,
* and unregister all runners.
*/
async shutdown(): Promise<void> {
OrchestratorLogger.log(`[HotRunner] Shutting down service`);
this.healthMonitor.stopMonitoring();
const disconnectPromises: Promise<void>[] = [];
for (const [id, transport] of this.transports.entries()) {
disconnectPromises.push(
transport.disconnect().catch((error: any) => {
OrchestratorLogger.logWarning(`[HotRunner] Error disconnecting runner ${id}: ${error.message}`);
}),
);
}
await Promise.all(disconnectPromises);
this.transports.clear();
OrchestratorLogger.log(`[HotRunner] Service shut down`);
}
/**
* Get the status of all registered runners.
*/
getStatus(): HotRunnerStatus[] {
return this.registry.listRunners();
}
/**
* Get the underlying registry (for testing or advanced use).
*/
getRegistry(): HotRunnerRegistry {
return this.registry;
}
/**
* Parse a space-separated custom parameters string into a key-value map.
* Handles `-key value` and `-key=value` formats.
*/
private parseCustomParameters(raw: string): Record<string, string> {
const result: Record<string, string> = {};
const parts = raw.trim().split(/\s+/);
for (let i = 0; i < parts.length; i++) {
const part = parts[i];
if (part.startsWith('-')) {
const key = part.replace(/^-+/, '');
if (key.includes('=')) {
const [k, ...v] = key.split('=');
result[k] = v.join('=');
} else if (i + 1 < parts.length && !parts[i + 1].startsWith('-')) {
result[key] = parts[i + 1];
i++;
} else {
result[key] = 'true';
}
}
}
return result;
}
}

View File

@@ -0,0 +1,54 @@
export interface HotRunnerConfig {
enabled: boolean;
transport: 'websocket' | 'grpc' | 'named-pipe';
host: string;
port: number;
healthCheckInterval: number; // seconds
maxIdleTime: number; // seconds before recycling
maxJobsBeforeRecycle: number;
unityVersion?: string;
platform?: string;
}
export interface HotRunnerStatus {
id: string;
state: 'idle' | 'busy' | 'starting' | 'stopping' | 'unhealthy';
unityVersion: string;
platform: string;
currentJob?: string;
lastJobCompleted?: string;
uptime: number;
jobsCompleted: number;
lastHealthCheck: string;
memoryUsageMB: number;
libraryHash?: string;
}
export interface HotRunnerJobRequest {
jobId: string;
buildMethod?: string;
buildTarget: string;
buildPath?: string;
customParameters?: Record<string, string>;
timeout: number;
testMode?: 'editmode' | 'playmode';
testSuitePath?: string;
}
export interface HotRunnerJobResult {
jobId: string;
success: boolean;
exitCode: number;
duration: number;
output: string;
artifacts?: string[];
testResults?: string; // path to test result file
}
export interface HotRunnerTransport {
connect(config: HotRunnerConfig): Promise<void>;
disconnect(): Promise<void>;
sendJob(request: HotRunnerJobRequest): Promise<HotRunnerJobResult>;
getStatus(): Promise<HotRunnerStatus>;
healthCheck(): Promise<boolean>;
}

View File

@@ -0,0 +1,740 @@
import fs from 'node:fs';
import { HotRunnerRegistry } from './hot-runner-registry';
import { HotRunnerHealthMonitor } from './hot-runner-health-monitor';
import { HotRunnerDispatcher } from './hot-runner-dispatcher';
import { HotRunnerService } from './hot-runner-service';
import {
HotRunnerConfig,
HotRunnerJobRequest,
HotRunnerJobResult,
HotRunnerStatus,
HotRunnerTransport,
} from './hot-runner-types';
// Mock dependencies
jest.mock('node:fs');
jest.mock('../core/orchestrator-logger');
const mockFs = fs as jest.Mocked<typeof fs>;
function createMockConfig(overrides?: Partial<HotRunnerConfig>): HotRunnerConfig {
return {
enabled: true,
transport: 'websocket',
host: 'localhost',
port: 9090,
healthCheckInterval: 30,
maxIdleTime: 3600,
maxJobsBeforeRecycle: 100,
unityVersion: '2022.3.0f1',
platform: 'StandaloneWindows64',
...overrides,
};
}
function createMockTransport(overrides?: Partial<HotRunnerTransport>): HotRunnerTransport {
return {
// eslint-disable-next-line unicorn/no-useless-undefined
connect: jest.fn().mockResolvedValue(undefined),
// eslint-disable-next-line unicorn/no-useless-undefined
disconnect: jest.fn().mockResolvedValue(undefined),
sendJob: jest.fn().mockResolvedValue({
jobId: 'test-job',
success: true,
exitCode: 0,
duration: 5000,
output: 'Build succeeded',
artifacts: ['build/output.exe'],
} as HotRunnerJobResult),
getStatus: jest.fn().mockResolvedValue({
id: 'mock-runner',
state: 'idle',
unityVersion: '2022.3.0f1',
platform: 'StandaloneWindows64',
uptime: 3600,
jobsCompleted: 5,
lastHealthCheck: new Date().toISOString(),
memoryUsageMB: 1024,
} as HotRunnerStatus),
healthCheck: jest.fn().mockResolvedValue(true),
...overrides,
};
}
function createMockJobRequest(overrides?: Partial<HotRunnerJobRequest>): HotRunnerJobRequest {
return {
jobId: 'job-001',
buildTarget: 'StandaloneWindows64',
timeout: 60000,
...overrides,
};
}
// --- Registry Tests ---
describe('HotRunnerRegistry', () => {
let registry: HotRunnerRegistry;
beforeEach(() => {
jest.clearAllMocks();
registry = new HotRunnerRegistry();
});
it('should register a runner and return an ID', () => {
const config = createMockConfig();
const id = registry.registerRunner(config);
expect(id).toMatch(/^hr-/);
expect(registry.size).toBe(1);
});
it('should retrieve a registered runner by ID', () => {
const config = createMockConfig();
const id = registry.registerRunner(config);
const runner = registry.getRunner(id);
expect(runner).toBeDefined();
expect(runner!.id).toBe(id);
expect(runner!.state).toBe('starting');
expect(runner!.unityVersion).toBe('2022.3.0f1');
expect(runner!.platform).toBe('StandaloneWindows64');
});
it('should return undefined for unknown runner ID', () => {
const runner = registry.getRunner('nonexistent');
expect(runner).toBeUndefined();
});
it('should unregister a runner', () => {
const id = registry.registerRunner(createMockConfig());
expect(registry.size).toBe(1);
registry.unregisterRunner(id);
expect(registry.size).toBe(0);
expect(registry.getRunner(id)).toBeUndefined();
});
it('should handle unregistering a nonexistent runner gracefully', () => {
registry.unregisterRunner('nonexistent');
expect(registry.size).toBe(0);
});
it('should list all runners without filter', () => {
registry.registerRunner(createMockConfig({ platform: 'StandaloneWindows64' }));
registry.registerRunner(createMockConfig({ platform: 'StandaloneLinux64' }));
registry.registerRunner(createMockConfig({ platform: 'StandaloneOSX' }));
const all = registry.listRunners();
expect(all).toHaveLength(3);
});
it('should filter runners by platform', () => {
registry.registerRunner(createMockConfig({ platform: 'StandaloneWindows64' }));
registry.registerRunner(createMockConfig({ platform: 'StandaloneLinux64' }));
registry.registerRunner(createMockConfig({ platform: 'StandaloneWindows64' }));
const windows = registry.listRunners({ platform: 'StandaloneWindows64' });
expect(windows).toHaveLength(2);
const linux = registry.listRunners({ platform: 'StandaloneLinux64' });
expect(linux).toHaveLength(1);
});
it('should filter runners by state', () => {
const id1 = registry.registerRunner(createMockConfig());
registry.registerRunner(createMockConfig());
registry.updateRunner(id1, { state: 'idle' });
// second runner remains in 'starting' state
const idle = registry.listRunners({ state: 'idle' });
expect(idle).toHaveLength(1);
expect(idle[0].id).toBe(id1);
});
it('should filter runners by Unity version', () => {
registry.registerRunner(createMockConfig({ unityVersion: '2022.3.0f1' }));
registry.registerRunner(createMockConfig({ unityVersion: '2023.1.0f1' }));
registry.registerRunner(createMockConfig({ unityVersion: '2022.3.0f1' }));
const v2022 = registry.listRunners({ unityVersion: '2022.3.0f1' });
expect(v2022).toHaveLength(2);
});
it('should find an available idle runner matching requirements', () => {
const id1 = registry.registerRunner(
createMockConfig({ unityVersion: '2022.3.0f1', platform: 'StandaloneWindows64' }),
);
registry.updateRunner(id1, { state: 'idle' });
const id2 = registry.registerRunner(
createMockConfig({ unityVersion: '2023.1.0f1', platform: 'StandaloneLinux64' }),
);
registry.updateRunner(id2, { state: 'idle' });
const found = registry.findAvailableRunner({
unityVersion: '2022.3.0f1',
platform: 'StandaloneWindows64',
});
expect(found).toBeDefined();
expect(found!.id).toBe(id1);
});
it('should return undefined when no runner matches requirements', () => {
const id = registry.registerRunner(
createMockConfig({ unityVersion: '2022.3.0f1', platform: 'StandaloneWindows64' }),
);
registry.updateRunner(id, { state: 'idle' });
const found = registry.findAvailableRunner({
unityVersion: '2023.1.0f1',
platform: 'StandaloneLinux64',
});
expect(found).toBeUndefined();
});
it('should update runner status fields', () => {
const id = registry.registerRunner(createMockConfig());
registry.updateRunner(id, { state: 'idle', memoryUsageMB: 2048 });
const runner = registry.getRunner(id);
expect(runner!.state).toBe('idle');
expect(runner!.memoryUsageMB).toBe(2048);
// ID should not be overridden by the update
expect(runner!.id).toBe(id);
});
it('should persist and load registry from disk', () => {
const persistenceRegistry = new HotRunnerRegistry('/tmp/test');
mockFs.existsSync.mockReturnValue(true);
mockFs.writeFileSync.mockImplementation(() => {});
mockFs.mkdirSync.mockImplementation(() => '' as any);
const id = persistenceRegistry.registerRunner(createMockConfig());
// Verify writeFileSync was called for persistence
expect(mockFs.writeFileSync).toHaveBeenCalled();
const writtenData = JSON.parse((mockFs.writeFileSync as jest.Mock).mock.calls[0][1] as string);
expect(writtenData.runners).toBeDefined();
expect(writtenData.runners[id]).toBeDefined();
});
it('should load runners from disk on loadFromDisk', () => {
const persistenceRegistry = new HotRunnerRegistry('/tmp/test');
const storedData = {
runners: {
'hr-restored': {
id: 'hr-restored',
state: 'idle',
unityVersion: '2022.3.0f1',
platform: 'StandaloneWindows64',
uptime: 100,
jobsCompleted: 3,
lastHealthCheck: new Date().toISOString(),
memoryUsageMB: 512,
},
},
configs: {
'hr-restored': createMockConfig(),
},
};
mockFs.existsSync.mockReturnValue(true);
mockFs.readFileSync.mockReturnValue(JSON.stringify(storedData));
const count = persistenceRegistry.loadFromDisk();
expect(count).toBe(1);
expect(persistenceRegistry.getRunner('hr-restored')).toBeDefined();
});
it('should discard invalid runner entries when loading from disk', () => {
const persistenceRegistry = new HotRunnerRegistry('/tmp/test');
const storedData = {
runners: {
'hr-valid': {
id: 'hr-valid',
state: 'idle',
unityVersion: '2022.3.0f1',
platform: 'StandaloneWindows64',
uptime: 100,
jobsCompleted: 3,
lastHealthCheck: new Date().toISOString(),
memoryUsageMB: 512,
},
'hr-invalid': {
// Missing required fields like state, unityVersion
id: 'hr-invalid',
},
'hr-bad-state': {
id: 'hr-bad-state',
state: 'nonexistent-state',
unityVersion: '2022.3.0f1',
platform: 'StandaloneWindows64',
uptime: 0,
jobsCompleted: 0,
lastHealthCheck: new Date().toISOString(),
memoryUsageMB: 0,
},
},
configs: {
'hr-valid': createMockConfig(),
'hr-invalid': createMockConfig(),
'hr-bad-state': createMockConfig(),
},
};
mockFs.existsSync.mockReturnValue(true);
mockFs.readFileSync.mockReturnValue(JSON.stringify(storedData));
const count = persistenceRegistry.loadFromDisk();
expect(count).toBe(1);
expect(persistenceRegistry.getRunner('hr-valid')).toBeDefined();
expect(persistenceRegistry.getRunner('hr-invalid')).toBeUndefined();
expect(persistenceRegistry.getRunner('hr-bad-state')).toBeUndefined();
});
it('should handle corrupt JSON persistence file gracefully', () => {
const persistenceRegistry = new HotRunnerRegistry('/tmp/test');
mockFs.existsSync.mockReturnValue(true);
mockFs.readFileSync.mockReturnValue('{ invalid json !!!');
const count = persistenceRegistry.loadFromDisk();
expect(count).toBe(0);
expect(persistenceRegistry.size).toBe(0);
});
it('should handle persistence file with invalid top-level structure', () => {
const persistenceRegistry = new HotRunnerRegistry('/tmp/test');
mockFs.existsSync.mockReturnValue(true);
mockFs.readFileSync.mockReturnValue('"just a string"');
const count = persistenceRegistry.loadFromDisk();
expect(count).toBe(0);
});
it('should handle persistence file with null runners', () => {
const persistenceRegistry = new HotRunnerRegistry('/tmp/test');
mockFs.existsSync.mockReturnValue(true);
mockFs.readFileSync.mockReturnValue('{"runners": null, "configs": null}');
const count = persistenceRegistry.loadFromDisk();
expect(count).toBe(0);
});
it('should validate and repair invalid runners', () => {
const persistenceRegistry = new HotRunnerRegistry('/tmp/test');
mockFs.existsSync.mockReturnValue(true);
mockFs.writeFileSync.mockImplementation(() => {});
mockFs.mkdirSync.mockImplementation(() => '' as any);
// Register a valid runner first
const id = persistenceRegistry.registerRunner(createMockConfig());
persistenceRegistry.updateRunner(id, { state: 'idle' });
// Manually corrupt the runner's state by setting an invalid state
// (we access via the public API -- updateRunner with a cast)
persistenceRegistry.updateRunner(id, { state: 'invalid-state' as any });
const repaired = persistenceRegistry.validateAndRepair();
expect(repaired).toBe(1);
const runner = persistenceRegistry.getRunner(id);
expect(runner!.state).toBe('unhealthy');
});
it('should not discard configs for valid runners when loading from disk', () => {
const persistenceRegistry = new HotRunnerRegistry('/tmp/test');
const storedData = {
runners: {
'hr-valid': {
id: 'hr-valid',
state: 'idle',
unityVersion: '2022.3.0f1',
platform: 'StandaloneWindows64',
uptime: 100,
jobsCompleted: 3,
lastHealthCheck: new Date().toISOString(),
memoryUsageMB: 512,
},
},
configs: {
'hr-valid': createMockConfig(),
},
};
mockFs.existsSync.mockReturnValue(true);
mockFs.readFileSync.mockReturnValue(JSON.stringify(storedData));
persistenceRegistry.loadFromDisk();
expect(persistenceRegistry.getConfig('hr-valid')).toBeDefined();
});
});
// --- Health Monitor Tests ---
describe('HotRunnerHealthMonitor', () => {
let monitor: HotRunnerHealthMonitor;
let registry: HotRunnerRegistry;
let transports: Map<string, HotRunnerTransport>;
beforeEach(() => {
jest.clearAllMocks();
jest.useFakeTimers();
monitor = new HotRunnerHealthMonitor();
registry = new HotRunnerRegistry();
transports = new Map();
});
afterEach(() => {
monitor.stopMonitoring();
jest.useRealTimers();
});
it('should start and stop monitoring', () => {
monitor.startMonitoring(registry, 30, transports);
expect(monitor.isMonitoring).toBe(true);
monitor.stopMonitoring();
expect(monitor.isMonitoring).toBe(false);
});
it('should report healthy when transport health check passes', async () => {
const id = registry.registerRunner(createMockConfig());
registry.updateRunner(id, { state: 'idle' });
const transport = createMockTransport();
transports.set(id, transport);
monitor.startMonitoring(registry, 30, transports);
const healthy = await monitor.checkHealth(id);
expect(healthy).toBe(true);
expect(transport.healthCheck).toHaveBeenCalled();
});
it('should mark runner as unhealthy when health check fails', async () => {
const id = registry.registerRunner(createMockConfig());
registry.updateRunner(id, { state: 'idle' });
const transport = createMockTransport({
healthCheck: jest.fn().mockResolvedValue(false),
});
transports.set(id, transport);
monitor.startMonitoring(registry, 30, transports);
const healthy = await monitor.checkHealth(id);
expect(healthy).toBe(false);
const runner = registry.getRunner(id);
expect(runner!.state).toBe('unhealthy');
});
it('should mark runner as unhealthy when health check throws', async () => {
const id = registry.registerRunner(createMockConfig());
registry.updateRunner(id, { state: 'idle' });
const transport = createMockTransport({
healthCheck: jest.fn().mockRejectedValue(new Error('Connection refused')),
});
transports.set(id, transport);
monitor.startMonitoring(registry, 30, transports);
const healthy = await monitor.checkHealth(id);
expect(healthy).toBe(false);
});
it('should recycle unhealthy runner and remove from registry', async () => {
const id = registry.registerRunner(createMockConfig());
const transport = createMockTransport();
transports.set(id, transport);
monitor.startMonitoring(registry, 30, transports);
await monitor.recycleUnhealthyRunner(id);
expect(registry.getRunner(id)).toBeUndefined();
expect(transport.disconnect).toHaveBeenCalled();
expect(transports.has(id)).toBe(false);
});
it('should recycle idle runner when max idle time exceeded', async () => {
const id = registry.registerRunner(createMockConfig({ maxIdleTime: 60 }));
// Set lastHealthCheck to 120 seconds ago
const oldDate = new Date(Date.now() - 120 * 1000).toISOString();
registry.updateRunner(id, { state: 'idle', lastHealthCheck: oldDate });
const transport = createMockTransport();
transports.set(id, transport);
monitor.startMonitoring(registry, 30, transports);
await monitor.recycleIdleRunner(id, 60);
expect(registry.getRunner(id)).toBeUndefined();
});
it('should not recycle idle runner when within max idle time', async () => {
const id = registry.registerRunner(createMockConfig({ maxIdleTime: 3600 }));
registry.updateRunner(id, {
state: 'idle',
lastHealthCheck: new Date().toISOString(),
});
const transport = createMockTransport();
transports.set(id, transport);
monitor.startMonitoring(registry, 30, transports);
await monitor.recycleIdleRunner(id, 3600);
// Runner should still exist
expect(registry.getRunner(id)).toBeDefined();
});
it('should return false when no transport exists for runner', async () => {
const id = registry.registerRunner(createMockConfig());
// Do not set any transport for this runner
monitor.startMonitoring(registry, 30, transports);
const healthy = await monitor.checkHealth(id);
expect(healthy).toBe(false);
});
});
// --- Dispatcher Tests ---
describe('HotRunnerDispatcher', () => {
let registry: HotRunnerRegistry;
let transports: Map<string, HotRunnerTransport>;
let dispatcher: HotRunnerDispatcher;
beforeEach(() => {
jest.clearAllMocks();
registry = new HotRunnerRegistry();
transports = new Map();
dispatcher = new HotRunnerDispatcher(transports);
});
it('should dispatch a job to an available runner', async () => {
const id = registry.registerRunner(createMockConfig());
registry.updateRunner(id, { state: 'idle' });
const transport = createMockTransport();
transports.set(id, transport);
const request = createMockJobRequest();
const result = await dispatcher.dispatchJob(request, registry, '2022.3.0f1');
expect(result.success).toBe(true);
expect(result.exitCode).toBe(0);
expect(transport.sendJob).toHaveBeenCalledWith(request);
});
it('should mark runner as busy during job execution', async () => {
const id = registry.registerRunner(createMockConfig());
registry.updateRunner(id, { state: 'idle' });
const statesDuringJob: string[] = [];
const transport = createMockTransport({
sendJob: jest.fn().mockImplementation(async () => {
const runner = registry.getRunner(id);
if (runner) statesDuringJob.push(runner.state);
return {
jobId: 'job-001',
success: true,
exitCode: 0,
duration: 1000,
output: 'ok',
};
}),
});
transports.set(id, transport);
await dispatcher.dispatchJob(createMockJobRequest(), registry, '2022.3.0f1');
expect(statesDuringJob).toContain('busy');
// After completion, should be idle again
const runner = registry.getRunner(id);
expect(runner!.state).toBe('idle');
});
it('should increment jobsCompleted after successful dispatch', async () => {
const id = registry.registerRunner(createMockConfig());
registry.updateRunner(id, { state: 'idle', jobsCompleted: 5 });
const transport = createMockTransport();
transports.set(id, transport);
await dispatcher.dispatchJob(createMockJobRequest(), registry, '2022.3.0f1');
const runner = registry.getRunner(id);
expect(runner!.jobsCompleted).toBe(6);
});
it('should throw when no runner is available and wait times out', async () => {
// No runners registered at all
const request = createMockJobRequest({ timeout: 100 });
await expect(dispatcher.dispatchJob(request, registry, '2022.3.0f1')).rejects.toThrow(/Timed out waiting/);
});
it('should throw when runner has no transport', async () => {
const id = registry.registerRunner(createMockConfig());
registry.updateRunner(id, { state: 'idle' });
// No transport set for this runner
const request = createMockJobRequest();
await expect(dispatcher.dispatchJob(request, registry, '2022.3.0f1')).rejects.toThrow(/No transport available/);
});
it('should handle job failure and return runner to idle', async () => {
const id = registry.registerRunner(createMockConfig());
registry.updateRunner(id, { state: 'idle' });
const transport = createMockTransport({
sendJob: jest.fn().mockRejectedValue(new Error('Unity crashed')),
});
transports.set(id, transport);
await expect(dispatcher.dispatchJob(createMockJobRequest(), registry, '2022.3.0f1')).rejects.toThrow(
'Unity crashed',
);
// Runner should be back to idle despite failure
const runner = registry.getRunner(id);
expect(runner!.state).toBe('idle');
});
it('should handle job timeout', async () => {
const id = registry.registerRunner(createMockConfig());
registry.updateRunner(id, { state: 'idle' });
const transport = createMockTransport({
sendJob: jest.fn().mockImplementation(
() => new Promise((resolve) => setTimeout(resolve, 60000)), // never resolves within timeout
),
});
transports.set(id, transport);
const request = createMockJobRequest({ timeout: 50 });
await expect(dispatcher.dispatchJob(request, registry, '2022.3.0f1')).rejects.toThrow(/timed out/);
});
it('should disconnect transport on job timeout', async () => {
const id = registry.registerRunner(createMockConfig());
registry.updateRunner(id, { state: 'idle' });
const transport = createMockTransport({
sendJob: jest.fn().mockImplementation(
() => new Promise((resolve) => setTimeout(resolve, 60000)), // never resolves within timeout
),
});
transports.set(id, transport);
const request = createMockJobRequest({ timeout: 50 });
await expect(dispatcher.dispatchJob(request, registry, '2022.3.0f1')).rejects.toThrow(/timed out/);
// Transport should have been disconnected to clean up orphaned connection
expect(transport.disconnect).toHaveBeenCalled();
});
it('should call output callback with job output', async () => {
const id = registry.registerRunner(createMockConfig());
registry.updateRunner(id, { state: 'idle' });
const transport = createMockTransport();
transports.set(id, transport);
const outputCallback = jest.fn();
await dispatcher.dispatchJob(createMockJobRequest(), registry, '2022.3.0f1', outputCallback);
expect(outputCallback).toHaveBeenCalledWith('Build succeeded');
});
it('should wait for runner to become available', async () => {
const id = registry.registerRunner(createMockConfig());
// Runner starts in 'starting' state, not idle
const transport = createMockTransport();
transports.set(id, transport);
// Simulate runner becoming idle after a short delay
setTimeout(() => {
registry.updateRunner(id, { state: 'idle' });
}, 50);
const request = createMockJobRequest({ timeout: 5000 });
const result = await dispatcher.dispatchJob(request, registry, '2022.3.0f1');
expect(result.success).toBe(true);
});
});
// --- Service Integration Tests ---
describe('HotRunnerService', () => {
let service: HotRunnerService;
beforeEach(() => {
jest.clearAllMocks();
mockFs.existsSync.mockReturnValue(false);
service = new HotRunnerService();
});
afterEach(async () => {
await service.shutdown();
});
it('should initialize and shut down cleanly', async () => {
const config = createMockConfig();
await service.initialize(config);
const status = service.getStatus();
expect(status).toEqual([]);
await service.shutdown();
});
it('should register a runner with transport', async () => {
await service.initialize(createMockConfig());
const transport = createMockTransport();
const id = service.registerRunner(createMockConfig(), transport);
expect(id).toMatch(/^hr-/);
expect(service.getStatus()).toHaveLength(1);
});
it('should disconnect all transports on shutdown', async () => {
await service.initialize(createMockConfig());
const transport1 = createMockTransport();
const transport2 = createMockTransport();
service.registerRunner(createMockConfig(), transport1);
service.registerRunner(createMockConfig(), transport2);
await service.shutdown();
expect(transport1.disconnect).toHaveBeenCalled();
expect(transport2.disconnect).toHaveBeenCalled();
});
it('should expose the underlying registry', async () => {
await service.initialize(createMockConfig());
const registry = service.getRegistry();
expect(registry).toBeInstanceOf(HotRunnerRegistry);
});
});

View File

@@ -0,0 +1,11 @@
export { HotRunnerService } from './hot-runner-service';
export { HotRunnerRegistry } from './hot-runner-registry';
export { HotRunnerHealthMonitor } from './hot-runner-health-monitor';
export { HotRunnerDispatcher } from './hot-runner-dispatcher';
export type {
HotRunnerConfig,
HotRunnerStatus,
HotRunnerJobRequest,
HotRunnerJobResult,
HotRunnerTransport,
} from './hot-runner-types';