mirror of
https://github.com/game-ci/unity-builder.git
synced 2026-05-31 22:06:16 -07:00
Compare commits
7 Commits
feature/ho
...
feature/pr
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
dcb0894d0e | ||
|
|
90b9b0c7b0 | ||
|
|
e9c247f04f | ||
|
|
cff759721a | ||
|
|
7e9d0bf53e | ||
|
|
8194790728 | ||
|
|
786ee3799c |
69
action.yml
69
action.yml
@@ -182,8 +182,8 @@ inputs:
|
||||
required: false
|
||||
default: ''
|
||||
description:
|
||||
'[Orchestrator] Run a custom job instead of the standard build automation for orchestrator (in yaml format with
|
||||
the keys image, secrets (name, value object array), command line string)'
|
||||
'[Orchestrator] Run a custom job instead of the standard build automation for orchestrator (in yaml format with the
|
||||
keys image, secrets (name, value object array), command line string)'
|
||||
awsStackName:
|
||||
default: 'game-ci'
|
||||
required: false
|
||||
@@ -194,6 +194,42 @@ inputs:
|
||||
description:
|
||||
'[Orchestrator] Either local, k8s or aws can be used to run builds on a remote cluster. Additional parameters must
|
||||
be configured.'
|
||||
fallbackProviderStrategy:
|
||||
default: ''
|
||||
required: false
|
||||
description:
|
||||
'[Orchestrator] Fallback provider when the primary is unavailable. Used with runnerCheckEnabled for automatic
|
||||
failover, or as a catch-all if the primary provider fails to initialize.'
|
||||
runnerCheckEnabled:
|
||||
default: 'false'
|
||||
required: false
|
||||
description:
|
||||
'[Orchestrator] Check GitHub Actions runner availability before starting a build. When no suitable runners are
|
||||
available and fallbackProviderStrategy is set, automatically routes to the fallback provider.'
|
||||
runnerCheckLabels:
|
||||
default: ''
|
||||
required: false
|
||||
description:
|
||||
'[Orchestrator] Comma-separated runner labels to filter when checking availability (e.g. self-hosted,linux).
|
||||
When empty, checks all runners in the repository.'
|
||||
runnerCheckMinAvailable:
|
||||
default: '1'
|
||||
required: false
|
||||
description:
|
||||
'[Orchestrator] Minimum number of idle runners required for the primary provider. If fewer are available,
|
||||
routes to fallbackProviderStrategy.'
|
||||
retryOnFallback:
|
||||
default: 'false'
|
||||
required: false
|
||||
description:
|
||||
'[Orchestrator] When true and fallbackProviderStrategy is set, automatically retry the build on the fallback
|
||||
provider if the primary provider fails. Useful for long builds where transient cloud failures are common.'
|
||||
providerInitTimeout:
|
||||
default: '0'
|
||||
required: false
|
||||
description:
|
||||
'[Orchestrator] Maximum seconds to wait for the primary provider to initialize (setupWorkflow). If exceeded
|
||||
and fallbackProviderStrategy is set, switches to the fallback. Set to 0 to disable (default).'
|
||||
resourceTracking:
|
||||
default: 'false'
|
||||
required: false
|
||||
@@ -280,35 +316,6 @@ inputs:
|
||||
'[Orchestrator] Specifies the repo for the unity builder. Useful if you forked the repo for testing, features, or
|
||||
fixes.'
|
||||
|
||||
hotRunnerEnabled:
|
||||
description: '[HotRunner] Use persistent hot runner for builds (requires pre-registered runners)'
|
||||
required: false
|
||||
default: 'false'
|
||||
hotRunnerTransport:
|
||||
description: '[HotRunner] Transport protocol for hot runner communication: websocket, grpc, named-pipe'
|
||||
required: false
|
||||
default: 'websocket'
|
||||
hotRunnerHost:
|
||||
description: '[HotRunner] Hot runner host address'
|
||||
required: false
|
||||
default: 'localhost'
|
||||
hotRunnerPort:
|
||||
description: '[HotRunner] Hot runner port number'
|
||||
required: false
|
||||
default: '9090'
|
||||
hotRunnerHealthInterval:
|
||||
description: '[HotRunner] Health check interval in seconds'
|
||||
required: false
|
||||
default: '30'
|
||||
hotRunnerMaxIdle:
|
||||
description: '[HotRunner] Maximum idle time in seconds before recycling runner'
|
||||
required: false
|
||||
default: '3600'
|
||||
hotRunnerFallbackToCold:
|
||||
description: '[HotRunner] Fall back to cold build if no hot runner available'
|
||||
required: false
|
||||
default: 'true'
|
||||
|
||||
outputs:
|
||||
volume:
|
||||
description: 'The Persistent Volume (PV) where the build artifacts have been stored by Kubernetes'
|
||||
|
||||
1063
dist/index.js
generated
vendored
1063
dist/index.js
generated
vendored
File diff suppressed because it is too large
Load Diff
2
dist/index.js.map
generated
vendored
2
dist/index.js.map
generated
vendored
File diff suppressed because one or more lines are too long
75
src/index.ts
75
src/index.ts
@@ -3,8 +3,6 @@ import { Action, BuildParameters, Cache, Orchestrator, Docker, ImageTag, Output
|
||||
import { Cli } from './model/cli/cli';
|
||||
import MacBuilder from './model/mac-builder';
|
||||
import PlatformSetup from './model/platform-setup';
|
||||
import { HotRunnerService } from './model/orchestrator/services/hot-runner';
|
||||
import { HotRunnerConfig } from './model/orchestrator/services/hot-runner/hot-runner-types';
|
||||
|
||||
async function runMain() {
|
||||
try {
|
||||
@@ -23,46 +21,17 @@ async function runMain() {
|
||||
|
||||
let exitCode = -1;
|
||||
|
||||
// Hot runner path: attempt to use a persistent Unity editor instance
|
||||
if (buildParameters.hotRunnerEnabled) {
|
||||
core.info('[HotRunner] Hot runner mode enabled, attempting hot build...');
|
||||
|
||||
const hotRunnerConfig: HotRunnerConfig = {
|
||||
enabled: true,
|
||||
transport: buildParameters.hotRunnerTransport,
|
||||
host: buildParameters.hotRunnerHost,
|
||||
port: buildParameters.hotRunnerPort,
|
||||
healthCheckInterval: buildParameters.hotRunnerHealthInterval,
|
||||
maxIdleTime: buildParameters.hotRunnerMaxIdle,
|
||||
maxJobsBeforeRecycle: 0, // no automatic recycle by job count
|
||||
};
|
||||
|
||||
const hotRunnerService = new HotRunnerService();
|
||||
|
||||
try {
|
||||
await hotRunnerService.initialize(hotRunnerConfig);
|
||||
const result = await hotRunnerService.submitBuild(buildParameters, (output) => {
|
||||
core.info(output);
|
||||
});
|
||||
|
||||
exitCode = result.exitCode;
|
||||
core.info(`[HotRunner] Build completed with exit code ${exitCode}`);
|
||||
await hotRunnerService.shutdown();
|
||||
} catch (hotRunnerError) {
|
||||
await hotRunnerService.shutdown();
|
||||
|
||||
if (buildParameters.hotRunnerFallbackToCold) {
|
||||
core.warning(
|
||||
`[HotRunner] Hot runner failed: ${(hotRunnerError as Error).message}. Falling back to cold build.`,
|
||||
);
|
||||
exitCode = await runColdBuild(buildParameters, baseImage, workspace, actionFolder);
|
||||
} else {
|
||||
throw hotRunnerError;
|
||||
}
|
||||
}
|
||||
} else if (buildParameters.providerStrategy === 'local') {
|
||||
if (buildParameters.providerStrategy === 'local') {
|
||||
core.info('Building locally');
|
||||
exitCode = await runColdBuild(buildParameters, baseImage, workspace, actionFolder);
|
||||
await PlatformSetup.setup(buildParameters, actionFolder);
|
||||
exitCode =
|
||||
process.platform === 'darwin'
|
||||
? await MacBuilder.run(actionFolder)
|
||||
: await Docker.run(baseImage.toString(), {
|
||||
workspace,
|
||||
actionFolder,
|
||||
...buildParameters,
|
||||
});
|
||||
} else {
|
||||
await Orchestrator.run(buildParameters, baseImage.toString());
|
||||
exitCode = 0;
|
||||
@@ -81,28 +50,4 @@ async function runMain() {
|
||||
}
|
||||
}
|
||||
|
||||
async function runColdBuild(
|
||||
buildParameters: BuildParameters,
|
||||
baseImage: ImageTag,
|
||||
workspace: string,
|
||||
actionFolder: string,
|
||||
): Promise<number> {
|
||||
if (buildParameters.providerStrategy === 'local') {
|
||||
core.info('Building locally');
|
||||
await PlatformSetup.setup(buildParameters, actionFolder);
|
||||
|
||||
return process.platform === 'darwin'
|
||||
? await MacBuilder.run(actionFolder)
|
||||
: await Docker.run(baseImage.toString(), {
|
||||
workspace,
|
||||
actionFolder,
|
||||
...buildParameters,
|
||||
});
|
||||
} else {
|
||||
await Orchestrator.run(buildParameters, baseImage.toString());
|
||||
|
||||
return 0;
|
||||
}
|
||||
}
|
||||
|
||||
runMain();
|
||||
|
||||
@@ -54,6 +54,12 @@ class BuildParameters {
|
||||
public sshAgent!: string;
|
||||
public sshPublicKeysDirectoryPath!: string;
|
||||
public providerStrategy!: string;
|
||||
public fallbackProviderStrategy!: string;
|
||||
public runnerCheckEnabled!: boolean;
|
||||
public runnerCheckLabels!: string[];
|
||||
public runnerCheckMinAvailable!: number;
|
||||
public retryOnFallback!: boolean;
|
||||
public providerInitTimeout!: number;
|
||||
public gitPrivateToken!: string;
|
||||
public awsStackName!: string;
|
||||
public awsEndpoint?: string;
|
||||
@@ -106,13 +112,6 @@ class BuildParameters {
|
||||
public cacheUnityInstallationOnMac!: boolean;
|
||||
public unityHubVersionOnMac!: string;
|
||||
public dockerWorkspacePath!: string;
|
||||
public hotRunnerEnabled!: boolean;
|
||||
public hotRunnerTransport!: 'websocket' | 'grpc' | 'named-pipe';
|
||||
public hotRunnerHost!: string;
|
||||
public hotRunnerPort!: number;
|
||||
public hotRunnerHealthInterval!: number;
|
||||
public hotRunnerMaxIdle!: number;
|
||||
public hotRunnerFallbackToCold!: boolean;
|
||||
|
||||
public static shouldUseRetainedWorkspaceMode(buildParameters: BuildParameters) {
|
||||
return buildParameters.maxRetainedWorkspaces > 0 && Orchestrator.lockedWorkspace !== ``;
|
||||
@@ -201,6 +200,12 @@ class BuildParameters {
|
||||
containerRegistryRepository: Input.containerRegistryRepository,
|
||||
containerRegistryImageVersion: Input.containerRegistryImageVersion,
|
||||
providerStrategy: OrchestratorOptions.providerStrategy,
|
||||
fallbackProviderStrategy: OrchestratorOptions.fallbackProviderStrategy,
|
||||
runnerCheckEnabled: OrchestratorOptions.runnerCheckEnabled,
|
||||
runnerCheckLabels: OrchestratorOptions.runnerCheckLabels,
|
||||
runnerCheckMinAvailable: OrchestratorOptions.runnerCheckMinAvailable,
|
||||
retryOnFallback: OrchestratorOptions.retryOnFallback,
|
||||
providerInitTimeout: OrchestratorOptions.providerInitTimeout,
|
||||
buildPlatform: OrchestratorOptions.buildPlatform,
|
||||
kubeConfig: OrchestratorOptions.kubeConfig,
|
||||
containerMemory: OrchestratorOptions.containerMemory,
|
||||
@@ -249,13 +254,6 @@ class BuildParameters {
|
||||
cacheUnityInstallationOnMac: Input.cacheUnityInstallationOnMac,
|
||||
unityHubVersionOnMac: Input.unityHubVersionOnMac,
|
||||
dockerWorkspacePath: Input.dockerWorkspacePath,
|
||||
hotRunnerEnabled: Input.hotRunnerEnabled,
|
||||
hotRunnerTransport: Input.hotRunnerTransport,
|
||||
hotRunnerHost: Input.hotRunnerHost,
|
||||
hotRunnerPort: Input.hotRunnerPort,
|
||||
hotRunnerHealthInterval: Input.hotRunnerHealthInterval,
|
||||
hotRunnerMaxIdle: Input.hotRunnerMaxIdle,
|
||||
hotRunnerFallbackToCold: Input.hotRunnerFallbackToCold,
|
||||
};
|
||||
}
|
||||
|
||||
|
||||
@@ -282,38 +282,6 @@ class Input {
|
||||
return Input.getInput('skipActivation')?.toLowerCase() ?? 'false';
|
||||
}
|
||||
|
||||
static get hotRunnerEnabled(): boolean {
|
||||
const input = Input.getInput('hotRunnerEnabled') ?? false;
|
||||
|
||||
return input === 'true';
|
||||
}
|
||||
|
||||
static get hotRunnerTransport(): 'websocket' | 'grpc' | 'named-pipe' {
|
||||
return (Input.getInput('hotRunnerTransport') ?? 'websocket') as 'websocket' | 'grpc' | 'named-pipe';
|
||||
}
|
||||
|
||||
static get hotRunnerHost(): string {
|
||||
return Input.getInput('hotRunnerHost') ?? 'localhost';
|
||||
}
|
||||
|
||||
static get hotRunnerPort(): number {
|
||||
return Number.parseInt(Input.getInput('hotRunnerPort') ?? '9090', 10);
|
||||
}
|
||||
|
||||
static get hotRunnerHealthInterval(): number {
|
||||
return Number.parseInt(Input.getInput('hotRunnerHealthInterval') ?? '30', 10);
|
||||
}
|
||||
|
||||
static get hotRunnerMaxIdle(): number {
|
||||
return Number.parseInt(Input.getInput('hotRunnerMaxIdle') ?? '3600', 10);
|
||||
}
|
||||
|
||||
static get hotRunnerFallbackToCold(): boolean {
|
||||
const input = Input.getInput('hotRunnerFallbackToCold') ?? 'true';
|
||||
|
||||
return input === 'true';
|
||||
}
|
||||
|
||||
public static ToEnvVarFormat(input: string) {
|
||||
if (input.toUpperCase() === input) {
|
||||
return input;
|
||||
|
||||
@@ -138,6 +138,32 @@ class OrchestratorOptions {
|
||||
return provider || 'local';
|
||||
}
|
||||
|
||||
static get fallbackProviderStrategy(): string {
|
||||
return OrchestratorOptions.getInput('fallbackProviderStrategy') || '';
|
||||
}
|
||||
|
||||
static get runnerCheckEnabled(): boolean {
|
||||
return OrchestratorOptions.getInput('runnerCheckEnabled') === 'true';
|
||||
}
|
||||
|
||||
static get runnerCheckLabels(): string[] {
|
||||
const labels = OrchestratorOptions.getInput('runnerCheckLabels');
|
||||
|
||||
return labels ? labels.split(',').map((l) => l.trim()) : [];
|
||||
}
|
||||
|
||||
static get runnerCheckMinAvailable(): number {
|
||||
return Number(OrchestratorOptions.getInput('runnerCheckMinAvailable')) || 1;
|
||||
}
|
||||
|
||||
static get retryOnFallback(): boolean {
|
||||
return OrchestratorOptions.getInput('retryOnFallback') === 'true';
|
||||
}
|
||||
|
||||
static get providerInitTimeout(): number {
|
||||
return Number(OrchestratorOptions.getInput('providerInitTimeout')) || 0;
|
||||
}
|
||||
|
||||
static get containerCpu(): string {
|
||||
return OrchestratorOptions.getInput('containerCpu') || `1024`;
|
||||
}
|
||||
|
||||
@@ -20,6 +20,7 @@ import { FollowLogStreamService } from './services/core/follow-log-stream-servic
|
||||
import OrchestratorResult from './services/core/orchestrator-result';
|
||||
import OrchestratorOptions from './options/orchestrator-options';
|
||||
import ResourceTracking from './services/core/resource-tracking';
|
||||
import { RunnerAvailabilityService } from './services/core/runner-availability-service';
|
||||
|
||||
class Orchestrator {
|
||||
public static Provider: ProviderInterface;
|
||||
@@ -76,6 +77,42 @@ class Orchestrator {
|
||||
private static async setupSelectedBuildPlatform() {
|
||||
OrchestratorLogger.log(`Orchestrator platform selected ${Orchestrator.buildParameters.providerStrategy}`);
|
||||
|
||||
// Check runner availability and apply fallback if needed
|
||||
if (Orchestrator.buildParameters.runnerCheckEnabled && Orchestrator.buildParameters.fallbackProviderStrategy) {
|
||||
const owner = OrchestratorOptions.githubOwner;
|
||||
const repo = OrchestratorOptions.githubRepoName;
|
||||
const token = Orchestrator.buildParameters.gitPrivateToken || process.env.GITHUB_TOKEN || '';
|
||||
|
||||
OrchestratorLogger.log(
|
||||
`Checking runner availability (labels: [${Orchestrator.buildParameters.runnerCheckLabels.join(', ')}], min: ${
|
||||
Orchestrator.buildParameters.runnerCheckMinAvailable
|
||||
})`,
|
||||
);
|
||||
|
||||
const result = await RunnerAvailabilityService.checkAvailability(
|
||||
owner,
|
||||
repo,
|
||||
token,
|
||||
Orchestrator.buildParameters.runnerCheckLabels,
|
||||
Orchestrator.buildParameters.runnerCheckMinAvailable,
|
||||
);
|
||||
|
||||
OrchestratorLogger.log(
|
||||
`Runner check: ${result.totalRunners} total, ${result.matchingRunners} matching, ${result.idleRunners} idle — ${result.reason}`,
|
||||
);
|
||||
|
||||
if (result.shouldFallback) {
|
||||
const original = Orchestrator.buildParameters.providerStrategy;
|
||||
const fallback = Orchestrator.buildParameters.fallbackProviderStrategy;
|
||||
OrchestratorLogger.log(`Falling back from '${original}' to '${fallback}' — ${result.reason}`);
|
||||
Orchestrator.buildParameters.providerStrategy = fallback;
|
||||
core.setOutput('providerFallbackUsed', 'true');
|
||||
core.setOutput('providerFallbackReason', result.reason);
|
||||
} else {
|
||||
core.setOutput('providerFallbackUsed', 'false');
|
||||
}
|
||||
}
|
||||
|
||||
// Detect LocalStack endpoints and handle AWS provider appropriately
|
||||
// AWS_FORCE_PROVIDER options:
|
||||
// - 'aws': Force AWS provider (requires LocalStack Pro with ECS support)
|
||||
@@ -182,6 +219,30 @@ class Orchestrator {
|
||||
if (baseImage.includes(`undefined`)) {
|
||||
throw new Error(`baseImage is undefined`);
|
||||
}
|
||||
|
||||
try {
|
||||
return await Orchestrator.runWithProvider(buildParameters, baseImage);
|
||||
} catch (primaryError: any) {
|
||||
// Retry on fallback provider if enabled and a fallback is configured
|
||||
const fallback = buildParameters.fallbackProviderStrategy;
|
||||
const alreadyOnFallback = buildParameters.providerStrategy === fallback;
|
||||
if (buildParameters.retryOnFallback && fallback && !alreadyOnFallback) {
|
||||
OrchestratorLogger.log(
|
||||
`Primary provider '${buildParameters.providerStrategy}' failed: ${primaryError.message}`,
|
||||
);
|
||||
OrchestratorLogger.log(`Retrying build on fallback provider '${fallback}'...`);
|
||||
buildParameters.providerStrategy = fallback;
|
||||
core.setOutput('providerFallbackUsed', 'true');
|
||||
core.setOutput('providerFallbackReason', `Primary provider failed: ${primaryError.message}`);
|
||||
|
||||
return await Orchestrator.runWithProvider(buildParameters, baseImage);
|
||||
}
|
||||
|
||||
throw primaryError;
|
||||
}
|
||||
}
|
||||
|
||||
private static async runWithProvider(buildParameters: BuildParameters, baseImage: string) {
|
||||
await Orchestrator.setup(buildParameters);
|
||||
|
||||
// When aws-local mode is enabled, validate AWS CloudFormation templates
|
||||
@@ -189,12 +250,10 @@ class Orchestrator {
|
||||
if (Orchestrator.validateAwsTemplates) {
|
||||
await Orchestrator.validateAwsCloudFormationTemplates();
|
||||
}
|
||||
await Orchestrator.Provider.setupWorkflow(
|
||||
Orchestrator.buildParameters.buildGuid,
|
||||
Orchestrator.buildParameters,
|
||||
Orchestrator.buildParameters.branch,
|
||||
Orchestrator.defaultSecrets,
|
||||
);
|
||||
|
||||
// Setup workflow with optional init timeout
|
||||
await Orchestrator.setupWorkflowWithTimeout();
|
||||
|
||||
try {
|
||||
if (buildParameters.maxRetainedWorkspaces > 0) {
|
||||
Orchestrator.lockedWorkspace = SharedWorkspaceLocking.NewWorkspaceName();
|
||||
@@ -275,6 +334,39 @@ class Orchestrator {
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Runs setupWorkflow with an optional timeout. If providerInitTimeout is set and the
|
||||
* provider takes longer than that to initialize, throws an error that triggers
|
||||
* retry-on-fallback (if enabled).
|
||||
*/
|
||||
private static async setupWorkflowWithTimeout() {
|
||||
const timeoutSeconds = Orchestrator.buildParameters.providerInitTimeout;
|
||||
|
||||
const setupPromise = Orchestrator.Provider.setupWorkflow(
|
||||
Orchestrator.buildParameters.buildGuid,
|
||||
Orchestrator.buildParameters,
|
||||
Orchestrator.buildParameters.branch,
|
||||
Orchestrator.defaultSecrets,
|
||||
);
|
||||
|
||||
if (timeoutSeconds <= 0) {
|
||||
await setupPromise;
|
||||
|
||||
return;
|
||||
}
|
||||
|
||||
OrchestratorLogger.log(`Provider init timeout: ${timeoutSeconds}s`);
|
||||
|
||||
const timeoutPromise = new Promise<never>((_, reject) => {
|
||||
setTimeout(
|
||||
() => reject(new Error(`Provider initialization timed out after ${timeoutSeconds}s`)),
|
||||
timeoutSeconds * 1000,
|
||||
);
|
||||
});
|
||||
|
||||
await Promise.race([setupPromise, timeoutPromise]);
|
||||
}
|
||||
|
||||
private static async updateStatusWithBuildParameters() {
|
||||
const content = { ...Orchestrator.buildParameters };
|
||||
content.gitPrivateToken = ``;
|
||||
|
||||
@@ -1,5 +0,0 @@
|
||||
# Hot Runner Protocol
|
||||
|
||||
Extensible runner registration and persistent Unity editor provider protocol.
|
||||
|
||||
See GitHub Issue for full specification.
|
||||
@@ -0,0 +1,318 @@
|
||||
import { RunnerAvailabilityService } from './runner-availability-service';
|
||||
|
||||
// Mock @octokit/core
|
||||
jest.mock('@octokit/core', () => ({
|
||||
Octokit: jest.fn().mockImplementation(() => ({
|
||||
request: jest.fn(),
|
||||
})),
|
||||
}));
|
||||
|
||||
jest.mock('./orchestrator-logger', () => ({
|
||||
__esModule: true,
|
||||
default: {
|
||||
log: jest.fn(),
|
||||
logWarning: jest.fn(),
|
||||
error: jest.fn(),
|
||||
},
|
||||
}));
|
||||
|
||||
import { Octokit } from '@octokit/core';
|
||||
|
||||
const MockedOctokit = Octokit as jest.MockedClass<typeof Octokit>;
|
||||
|
||||
function createMockRunners(runners: Array<{ name: string; status: string; busy: boolean; labels: string[] }>) {
|
||||
return runners.map((r, i) => ({
|
||||
id: i + 1,
|
||||
name: r.name,
|
||||
status: r.status,
|
||||
busy: r.busy,
|
||||
labels: r.labels.map((l) => ({ name: l })),
|
||||
}));
|
||||
}
|
||||
|
||||
describe('RunnerAvailabilityService', () => {
|
||||
beforeEach(() => {
|
||||
jest.clearAllMocks();
|
||||
});
|
||||
|
||||
describe('checkAvailability', () => {
|
||||
it('should skip check and not fallback when no token is provided', async () => {
|
||||
const result = await RunnerAvailabilityService.checkAvailability('owner', 'repo', '', [], 1);
|
||||
expect(result.shouldFallback).toBe(false);
|
||||
expect(result.reason).toContain('No GitHub token');
|
||||
});
|
||||
|
||||
it('should fallback when no runners are registered', async () => {
|
||||
const mockRequest = jest.fn().mockResolvedValue({ status: 200, data: { runners: [] } });
|
||||
MockedOctokit.mockImplementation(() => ({ request: mockRequest } as any));
|
||||
|
||||
const result = await RunnerAvailabilityService.checkAvailability('owner', 'repo', 'token', [], 1);
|
||||
expect(result.shouldFallback).toBe(true);
|
||||
expect(result.reason).toContain('No runners registered');
|
||||
expect(result.totalRunners).toBe(0);
|
||||
});
|
||||
|
||||
it('should not fallback when enough idle runners are available', async () => {
|
||||
const runners = createMockRunners([
|
||||
{ name: 'runner-1', status: 'online', busy: false, labels: ['self-hosted', 'linux'] },
|
||||
{ name: 'runner-2', status: 'online', busy: false, labels: ['self-hosted', 'linux'] },
|
||||
]);
|
||||
const mockRequest = jest.fn().mockResolvedValue({ status: 200, data: { runners } });
|
||||
MockedOctokit.mockImplementation(() => ({ request: mockRequest } as any));
|
||||
|
||||
const result = await RunnerAvailabilityService.checkAvailability('owner', 'repo', 'token', [], 1);
|
||||
expect(result.shouldFallback).toBe(false);
|
||||
expect(result.idleRunners).toBe(2);
|
||||
expect(result.totalRunners).toBe(2);
|
||||
});
|
||||
|
||||
it('should fallback when all runners are busy', async () => {
|
||||
const runners = createMockRunners([
|
||||
{ name: 'runner-1', status: 'online', busy: true, labels: ['self-hosted'] },
|
||||
{ name: 'runner-2', status: 'online', busy: true, labels: ['self-hosted'] },
|
||||
]);
|
||||
const mockRequest = jest.fn().mockResolvedValue({ status: 200, data: { runners } });
|
||||
MockedOctokit.mockImplementation(() => ({ request: mockRequest } as any));
|
||||
|
||||
const result = await RunnerAvailabilityService.checkAvailability('owner', 'repo', 'token', [], 1);
|
||||
expect(result.shouldFallback).toBe(true);
|
||||
expect(result.idleRunners).toBe(0);
|
||||
expect(result.matchingRunners).toBe(2);
|
||||
});
|
||||
|
||||
it('should fallback when all runners are offline', async () => {
|
||||
const runners = createMockRunners([
|
||||
{ name: 'runner-1', status: 'offline', busy: false, labels: ['self-hosted'] },
|
||||
]);
|
||||
const mockRequest = jest.fn().mockResolvedValue({ status: 200, data: { runners } });
|
||||
MockedOctokit.mockImplementation(() => ({ request: mockRequest } as any));
|
||||
|
||||
const result = await RunnerAvailabilityService.checkAvailability('owner', 'repo', 'token', [], 1);
|
||||
expect(result.shouldFallback).toBe(true);
|
||||
expect(result.idleRunners).toBe(0);
|
||||
});
|
||||
|
||||
it('should filter runners by required labels', async () => {
|
||||
const runners = createMockRunners([
|
||||
{ name: 'linux-runner', status: 'online', busy: false, labels: ['self-hosted', 'linux'] },
|
||||
{ name: 'windows-runner', status: 'online', busy: false, labels: ['self-hosted', 'windows'] },
|
||||
]);
|
||||
const mockRequest = jest.fn().mockResolvedValue({ status: 200, data: { runners } });
|
||||
MockedOctokit.mockImplementation(() => ({ request: mockRequest } as any));
|
||||
|
||||
const result = await RunnerAvailabilityService.checkAvailability(
|
||||
'owner',
|
||||
'repo',
|
||||
'token',
|
||||
['self-hosted', 'linux'],
|
||||
1,
|
||||
);
|
||||
|
||||
expect(result.shouldFallback).toBe(false);
|
||||
expect(result.matchingRunners).toBe(1);
|
||||
expect(result.idleRunners).toBe(1);
|
||||
expect(result.totalRunners).toBe(2);
|
||||
});
|
||||
|
||||
it('should fallback when no runners match required labels', async () => {
|
||||
const runners = createMockRunners([
|
||||
{ name: 'windows-runner', status: 'online', busy: false, labels: ['self-hosted', 'windows'] },
|
||||
]);
|
||||
const mockRequest = jest.fn().mockResolvedValue({ status: 200, data: { runners } });
|
||||
MockedOctokit.mockImplementation(() => ({ request: mockRequest } as any));
|
||||
|
||||
const result = await RunnerAvailabilityService.checkAvailability(
|
||||
'owner',
|
||||
'repo',
|
||||
'token',
|
||||
['self-hosted', 'linux'],
|
||||
1,
|
||||
);
|
||||
|
||||
expect(result.shouldFallback).toBe(true);
|
||||
expect(result.matchingRunners).toBe(0);
|
||||
expect(result.idleRunners).toBe(0);
|
||||
});
|
||||
|
||||
it('should respect minAvailable threshold', async () => {
|
||||
const runners = createMockRunners([{ name: 'runner-1', status: 'online', busy: false, labels: ['self-hosted'] }]);
|
||||
const mockRequest = jest.fn().mockResolvedValue({ status: 200, data: { runners } });
|
||||
MockedOctokit.mockImplementation(() => ({ request: mockRequest } as any));
|
||||
|
||||
// Need 2, have 1 — should fallback
|
||||
const result = await RunnerAvailabilityService.checkAvailability('owner', 'repo', 'token', [], 2);
|
||||
expect(result.shouldFallback).toBe(true);
|
||||
expect(result.idleRunners).toBe(1);
|
||||
});
|
||||
|
||||
it('should be case-insensitive for label matching', async () => {
|
||||
const runners = createMockRunners([
|
||||
{ name: 'runner-1', status: 'online', busy: false, labels: ['Self-Hosted', 'Linux'] },
|
||||
]);
|
||||
const mockRequest = jest.fn().mockResolvedValue({ status: 200, data: { runners } });
|
||||
MockedOctokit.mockImplementation(() => ({ request: mockRequest } as any));
|
||||
|
||||
const result = await RunnerAvailabilityService.checkAvailability(
|
||||
'owner',
|
||||
'repo',
|
||||
'token',
|
||||
['self-hosted', 'linux'],
|
||||
1,
|
||||
);
|
||||
expect(result.shouldFallback).toBe(false);
|
||||
expect(result.matchingRunners).toBe(1);
|
||||
});
|
||||
|
||||
it('should not fallback on API error (fail-open)', async () => {
|
||||
const mockRequest = jest.fn().mockRejectedValue(new Error('403 Forbidden'));
|
||||
MockedOctokit.mockImplementation(() => ({ request: mockRequest } as any));
|
||||
|
||||
const result = await RunnerAvailabilityService.checkAvailability('owner', 'repo', 'token', [], 1);
|
||||
expect(result.shouldFallback).toBe(false);
|
||||
expect(result.reason).toContain('Runner check failed');
|
||||
});
|
||||
|
||||
it('should count only online+idle runners', async () => {
|
||||
const runners = createMockRunners([
|
||||
{ name: 'idle', status: 'online', busy: false, labels: ['self-hosted'] },
|
||||
{ name: 'busy', status: 'online', busy: true, labels: ['self-hosted'] },
|
||||
{ name: 'offline', status: 'offline', busy: false, labels: ['self-hosted'] },
|
||||
]);
|
||||
const mockRequest = jest.fn().mockResolvedValue({ status: 200, data: { runners } });
|
||||
MockedOctokit.mockImplementation(() => ({ request: mockRequest } as any));
|
||||
|
||||
const result = await RunnerAvailabilityService.checkAvailability('owner', 'repo', 'token', [], 1);
|
||||
expect(result.shouldFallback).toBe(false);
|
||||
expect(result.totalRunners).toBe(3);
|
||||
expect(result.matchingRunners).toBe(3);
|
||||
expect(result.idleRunners).toBe(1);
|
||||
});
|
||||
});
|
||||
|
||||
describe('pagination limits', () => {
|
||||
it('should stop paginating after reaching the page limit', async () => {
|
||||
// Return full pages (100 runners each) to force continued pagination
|
||||
let callCount = 0;
|
||||
const mockRequest = jest.fn().mockImplementation(() => {
|
||||
callCount++;
|
||||
const runners = createMockRunners(
|
||||
Array.from({ length: 100 }, (_, i) => ({
|
||||
name: `runner-${callCount}-${i}`,
|
||||
status: 'online' as const,
|
||||
busy: false,
|
||||
labels: ['self-hosted'],
|
||||
})),
|
||||
);
|
||||
|
||||
return Promise.resolve({ status: 200, data: { runners } });
|
||||
});
|
||||
MockedOctokit.mockImplementation(() => ({ request: mockRequest } as any));
|
||||
|
||||
const result = await RunnerAvailabilityService.checkAvailability('owner', 'repo', 'token', [], 1);
|
||||
|
||||
// Should have called at most 100 pages (the MAX_PAGINATION_PAGES limit)
|
||||
expect(mockRequest).toHaveBeenCalledTimes(100);
|
||||
// Should still have runners from the pages it did fetch
|
||||
expect(result.totalRunners).toBe(10000);
|
||||
expect(result.shouldFallback).toBe(false);
|
||||
});
|
||||
|
||||
it('should stop paginating on rate limit (HTTP 403)', async () => {
|
||||
let callCount = 0;
|
||||
const mockRequest = jest.fn().mockImplementation(() => {
|
||||
callCount++;
|
||||
if (callCount === 2) {
|
||||
// Octokit throws for non-2xx responses
|
||||
const error: any = new Error('API rate limit exceeded');
|
||||
error.status = 403;
|
||||
error.response = {
|
||||
status: 403,
|
||||
headers: { 'x-ratelimit-reset': String(Math.floor(Date.now() / 1000) + 3600) },
|
||||
};
|
||||
|
||||
return Promise.reject(error);
|
||||
}
|
||||
const runners = createMockRunners(
|
||||
Array.from({ length: 100 }, (_, i) => ({
|
||||
name: `runner-${i}`,
|
||||
status: 'online' as const,
|
||||
busy: false,
|
||||
labels: ['self-hosted'],
|
||||
})),
|
||||
);
|
||||
|
||||
return Promise.resolve({ status: 200, data: { runners } });
|
||||
});
|
||||
MockedOctokit.mockImplementation(() => ({ request: mockRequest } as any));
|
||||
|
||||
const result = await RunnerAvailabilityService.checkAvailability('owner', 'repo', 'token', [], 1);
|
||||
|
||||
// Should have stopped at page 2 (rate limited)
|
||||
expect(mockRequest).toHaveBeenCalledTimes(2);
|
||||
// Should use the 100 runners from the first page
|
||||
expect(result.totalRunners).toBe(100);
|
||||
expect(result.shouldFallback).toBe(false);
|
||||
});
|
||||
|
||||
it('should stop paginating on rate limit (HTTP 429)', async () => {
|
||||
let callCount = 0;
|
||||
const mockRequest = jest.fn().mockImplementation(() => {
|
||||
callCount++;
|
||||
if (callCount === 1) {
|
||||
// Octokit throws for non-2xx responses
|
||||
const error: any = new Error('Too Many Requests');
|
||||
error.status = 429;
|
||||
error.response = { status: 429, headers: {} };
|
||||
|
||||
return Promise.reject(error);
|
||||
}
|
||||
|
||||
return Promise.resolve({ status: 200, data: { runners: [] } });
|
||||
});
|
||||
MockedOctokit.mockImplementation(() => ({ request: mockRequest } as any));
|
||||
|
||||
const result = await RunnerAvailabilityService.checkAvailability('owner', 'repo', 'token', [], 1);
|
||||
|
||||
// Should have stopped at first page (rate limited immediately)
|
||||
expect(mockRequest).toHaveBeenCalledTimes(1);
|
||||
// No runners found — should fallback
|
||||
expect(result.totalRunners).toBe(0);
|
||||
expect(result.shouldFallback).toBe(true);
|
||||
});
|
||||
|
||||
it('should handle pagination timeout gracefully', async () => {
|
||||
// Mock Date.now to simulate timeout
|
||||
const originalDateNow = Date.now;
|
||||
let callCount = 0;
|
||||
|
||||
const mockRequest = jest.fn().mockImplementation(() => {
|
||||
callCount++;
|
||||
// After first call, advance time past the timeout
|
||||
if (callCount >= 2) {
|
||||
Date.now = jest.fn(() => originalDateNow() + 31_000);
|
||||
}
|
||||
const runners = createMockRunners(
|
||||
Array.from({ length: 100 }, (_, i) => ({
|
||||
name: `runner-${callCount}-${i}`,
|
||||
status: 'online' as const,
|
||||
busy: false,
|
||||
labels: ['self-hosted'],
|
||||
})),
|
||||
);
|
||||
|
||||
return Promise.resolve({ status: 200, data: { runners } });
|
||||
});
|
||||
MockedOctokit.mockImplementation(() => ({ request: mockRequest } as any));
|
||||
|
||||
const result = await RunnerAvailabilityService.checkAvailability('owner', 'repo', 'token', [], 1);
|
||||
|
||||
// Should have stopped after timeout was detected (2 pages: first succeeds, second triggers timeout check)
|
||||
expect(mockRequest.mock.calls.length).toBeLessThanOrEqual(3);
|
||||
// Should have runners from pages fetched before timeout
|
||||
expect(result.totalRunners).toBeGreaterThan(0);
|
||||
|
||||
// Restore
|
||||
Date.now = originalDateNow;
|
||||
});
|
||||
});
|
||||
});
|
||||
@@ -0,0 +1,205 @@
|
||||
import { Octokit } from '@octokit/core';
|
||||
import OrchestratorLogger from './orchestrator-logger';
|
||||
|
||||
interface GitHubRunner {
|
||||
id: number;
|
||||
name: string;
|
||||
status: 'online' | 'offline';
|
||||
busy: boolean;
|
||||
labels: Array<{ name: string }>;
|
||||
}
|
||||
|
||||
interface RunnerCheckResult {
|
||||
shouldFallback: boolean;
|
||||
reason: string;
|
||||
totalRunners: number;
|
||||
matchingRunners: number;
|
||||
idleRunners: number;
|
||||
}
|
||||
|
||||
/**
|
||||
* Maximum number of pages to fetch when paginating through GitHub API results.
|
||||
* 100 pages * 100 per page = 10,000 runners maximum.
|
||||
*/
|
||||
const MAX_PAGINATION_PAGES = 100;
|
||||
|
||||
/**
|
||||
* Total timeout in milliseconds for the pagination loop.
|
||||
* Prevents indefinite API calls if GitHub is slow or pagination is unexpectedly deep.
|
||||
*/
|
||||
const PAGINATION_TIMEOUT_MS = 30_000;
|
||||
|
||||
/**
|
||||
* Checks GitHub Actions runner availability to support automatic provider fallback.
|
||||
*
|
||||
* When a user configures `runnerCheckEnabled: true` with a `fallbackProviderStrategy`,
|
||||
* this service queries the GitHub API for runner status before the build starts.
|
||||
* If insufficient runners are available, the orchestrator routes to the fallback provider.
|
||||
*/
|
||||
export class RunnerAvailabilityService {
|
||||
/**
|
||||
* Check if enough runners are available to handle the build.
|
||||
*
|
||||
* @param owner - GitHub repository owner
|
||||
* @param repo - GitHub repository name
|
||||
* @param token - GitHub token with repo/actions scope
|
||||
* @param requiredLabels - Labels runners must have (empty = any runner)
|
||||
* @param minAvailable - Minimum idle runners required
|
||||
* @returns RunnerCheckResult with decision and diagnostics
|
||||
*/
|
||||
static async checkAvailability(
|
||||
owner: string,
|
||||
repo: string,
|
||||
token: string,
|
||||
requiredLabels: string[],
|
||||
minAvailable: number,
|
||||
): Promise<RunnerCheckResult> {
|
||||
if (!token) {
|
||||
return {
|
||||
shouldFallback: false,
|
||||
reason: 'No GitHub token available — skipping runner check',
|
||||
totalRunners: 0,
|
||||
matchingRunners: 0,
|
||||
idleRunners: 0,
|
||||
};
|
||||
}
|
||||
|
||||
try {
|
||||
const octokit = new Octokit({ auth: token });
|
||||
|
||||
// Fetch all runners for the repository
|
||||
const runners = await RunnerAvailabilityService.fetchRunners(octokit, owner, repo);
|
||||
|
||||
if (runners.length === 0) {
|
||||
return {
|
||||
shouldFallback: true,
|
||||
reason: 'No runners registered for this repository',
|
||||
totalRunners: 0,
|
||||
matchingRunners: 0,
|
||||
idleRunners: 0,
|
||||
};
|
||||
}
|
||||
|
||||
// Filter by required labels
|
||||
const matching = RunnerAvailabilityService.filterByLabels(runners, requiredLabels);
|
||||
|
||||
// Count idle (online + not busy)
|
||||
const idle = matching.filter((r) => r.status === 'online' && !r.busy);
|
||||
|
||||
const result: RunnerCheckResult = {
|
||||
shouldFallback: idle.length < minAvailable,
|
||||
reason:
|
||||
idle.length >= minAvailable
|
||||
? `${idle.length} idle runner(s) available (need ${minAvailable})`
|
||||
: `Only ${idle.length} idle runner(s) available, need ${minAvailable}`,
|
||||
totalRunners: runners.length,
|
||||
matchingRunners: matching.length,
|
||||
idleRunners: idle.length,
|
||||
};
|
||||
|
||||
return result;
|
||||
} catch (error: any) {
|
||||
// If the API call fails (permissions, rate limit, etc.), don't block the build
|
||||
OrchestratorLogger.log(`Runner availability check failed: ${error.message}`);
|
||||
|
||||
return {
|
||||
shouldFallback: false,
|
||||
reason: `Runner check failed (${error.message}) — proceeding with primary provider`,
|
||||
totalRunners: 0,
|
||||
matchingRunners: 0,
|
||||
idleRunners: 0,
|
||||
};
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Fetch all runners for a repository, handling pagination.
|
||||
*
|
||||
* Includes defensive limits:
|
||||
* - Maximum page count (MAX_PAGINATION_PAGES) to prevent infinite loops
|
||||
* - Total timeout (PAGINATION_TIMEOUT_MS) to prevent indefinite API calls
|
||||
* - Rate-limit detection (HTTP 403/429 with X-RateLimit-Remaining header)
|
||||
*/
|
||||
private static async fetchRunners(octokit: Octokit, owner: string, repo: string): Promise<GitHubRunner[]> {
|
||||
const allRunners: GitHubRunner[] = [];
|
||||
let page = 1;
|
||||
const perPage = 100;
|
||||
const startTime = Date.now();
|
||||
|
||||
while (page <= MAX_PAGINATION_PAGES) {
|
||||
// Check total timeout
|
||||
if (Date.now() - startTime > PAGINATION_TIMEOUT_MS) {
|
||||
OrchestratorLogger.logWarning(
|
||||
`[RunnerAvailability] Pagination timeout reached after ${page - 1} pages and ${Date.now() - startTime}ms. ` +
|
||||
`Using ${allRunners.length} runners found so far.`,
|
||||
);
|
||||
break;
|
||||
}
|
||||
|
||||
let response: any;
|
||||
try {
|
||||
response = await octokit.request('GET /repos/{owner}/{repo}/actions/runners', {
|
||||
owner,
|
||||
repo,
|
||||
per_page: perPage,
|
||||
page,
|
||||
});
|
||||
} catch (requestError: any) {
|
||||
// Octokit throws for non-2xx responses. Check if this is a rate limit error.
|
||||
const status = requestError.status ?? requestError.response?.status;
|
||||
if (status === 403 || status === 429) {
|
||||
const resetTime =
|
||||
requestError.response?.headers?.['x-ratelimit-reset'] ?? requestError.headers?.['x-ratelimit-reset'];
|
||||
const resetMessage = resetTime
|
||||
? ` Resets at ${new Date(Number.parseInt(String(resetTime), 10) * 1000).toISOString()}`
|
||||
: '';
|
||||
OrchestratorLogger.logWarning(
|
||||
`[RunnerAvailability] GitHub API rate limit reached (HTTP ${status}).${resetMessage} ` +
|
||||
`Using ${allRunners.length} runners found so far.`,
|
||||
);
|
||||
break;
|
||||
}
|
||||
// Re-throw non-rate-limit errors to be handled by the outer catch
|
||||
throw requestError;
|
||||
}
|
||||
|
||||
const runners = (response.data.runners || []) as GitHubRunner[];
|
||||
allRunners.push(...runners);
|
||||
|
||||
if (runners.length < perPage) break;
|
||||
page++;
|
||||
}
|
||||
|
||||
if (page > MAX_PAGINATION_PAGES) {
|
||||
OrchestratorLogger.logWarning(
|
||||
`[RunnerAvailability] Maximum pagination limit reached (${MAX_PAGINATION_PAGES} pages). ` +
|
||||
`Using ${allRunners.length} runners found so far.`,
|
||||
);
|
||||
}
|
||||
|
||||
if (allRunners.length === 0) {
|
||||
OrchestratorLogger.log(
|
||||
'[RunnerAvailability] No runners found. Possible causes: ' +
|
||||
'wrong token permissions (needs repo or actions scope), ' +
|
||||
'no self-hosted runners registered, ' +
|
||||
'or runners are registered at the organization level instead of the repository.',
|
||||
);
|
||||
}
|
||||
|
||||
return allRunners;
|
||||
}
|
||||
|
||||
/**
|
||||
* Filter runners by required labels. A runner matches if it has ALL required labels.
|
||||
* If requiredLabels is empty, all runners match.
|
||||
*/
|
||||
private static filterByLabels(runners: GitHubRunner[], requiredLabels: string[]): GitHubRunner[] {
|
||||
if (requiredLabels.length === 0) return runners;
|
||||
|
||||
return runners.filter((runner) => {
|
||||
const runnerLabelNames = runner.labels.map((l) => l.name.toLowerCase());
|
||||
|
||||
return requiredLabels.every((required) => runnerLabelNames.includes(required.toLowerCase()));
|
||||
});
|
||||
}
|
||||
}
|
||||
@@ -1,159 +0,0 @@
|
||||
import OrchestratorLogger from '../core/orchestrator-logger';
|
||||
import { HotRunnerRegistry } from './hot-runner-registry';
|
||||
import { HotRunnerJobRequest, HotRunnerJobResult, HotRunnerStatus, HotRunnerTransport } from './hot-runner-types';
|
||||
|
||||
const POLL_INTERVAL_MS = 1000;
|
||||
|
||||
// eslint-disable-next-line no-unused-vars
|
||||
export type OutputCallback = (output: string) => void;
|
||||
|
||||
export class HotRunnerDispatcher {
|
||||
private transports: Map<string, HotRunnerTransport>;
|
||||
|
||||
constructor(transports: Map<string, HotRunnerTransport>) {
|
||||
this.transports = transports;
|
||||
}
|
||||
|
||||
/**
|
||||
* Dispatch a job to an available hot runner matching the request's build target.
|
||||
* If no runner is immediately available, waits up to the request timeout.
|
||||
* Returns the job result, or throws if no runner becomes available in time.
|
||||
*/
|
||||
async dispatchJob(
|
||||
request: HotRunnerJobRequest,
|
||||
registry: HotRunnerRegistry,
|
||||
unityVersion: string,
|
||||
onOutput?: OutputCallback,
|
||||
): Promise<HotRunnerJobResult> {
|
||||
OrchestratorLogger.log(`[HotRunner] Dispatching job ${request.jobId} (target: ${request.buildTarget})`);
|
||||
|
||||
// Find or wait for an available runner
|
||||
let runner = registry.findAvailableRunner({
|
||||
unityVersion,
|
||||
platform: request.buildTarget,
|
||||
});
|
||||
|
||||
if (!runner) {
|
||||
OrchestratorLogger.log(
|
||||
`[HotRunner] No idle runner available for ${unityVersion}/${request.buildTarget}, waiting...`,
|
||||
);
|
||||
runner = await this.waitForRunner({ unityVersion, platform: request.buildTarget }, request.timeout, registry);
|
||||
}
|
||||
|
||||
// Mark runner as busy
|
||||
registry.updateRunner(runner.id, {
|
||||
state: 'busy',
|
||||
currentJob: request.jobId,
|
||||
});
|
||||
|
||||
const transport = this.transports.get(runner.id);
|
||||
if (!transport) {
|
||||
registry.updateRunner(runner.id, { state: 'idle', currentJob: undefined });
|
||||
throw new Error(`[HotRunner] No transport available for runner ${runner.id}`);
|
||||
}
|
||||
|
||||
OrchestratorLogger.log(`[HotRunner] Sending job ${request.jobId} to runner ${runner.id}`);
|
||||
|
||||
const startTime = Date.now();
|
||||
|
||||
try {
|
||||
const result = await this.executeWithTimeout(transport, request);
|
||||
|
||||
const duration = Date.now() - startTime;
|
||||
OrchestratorLogger.log(
|
||||
`[HotRunner] Job ${request.jobId} completed on runner ${runner.id} in ${duration}ms (exit: ${result.exitCode})`,
|
||||
);
|
||||
|
||||
if (onOutput && result.output) {
|
||||
onOutput(result.output);
|
||||
}
|
||||
|
||||
// Mark runner as idle and increment job count
|
||||
const currentStatus = registry.getRunner(runner.id);
|
||||
registry.updateRunner(runner.id, {
|
||||
state: 'idle',
|
||||
currentJob: undefined,
|
||||
lastJobCompleted: request.jobId,
|
||||
jobsCompleted: (currentStatus?.jobsCompleted ?? 0) + 1,
|
||||
});
|
||||
|
||||
return result;
|
||||
} catch (error: any) {
|
||||
OrchestratorLogger.logWarning(`[HotRunner] Job ${request.jobId} failed on runner ${runner.id}: ${error.message}`);
|
||||
|
||||
// Mark runner as idle despite failure -- the health monitor will recycle if needed
|
||||
registry.updateRunner(runner.id, {
|
||||
state: 'idle',
|
||||
currentJob: undefined,
|
||||
});
|
||||
|
||||
throw error;
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Wait for an available runner matching the requirements.
|
||||
* Polls the registry at a fixed interval until one becomes available or timeout expires.
|
||||
*/
|
||||
async waitForRunner(
|
||||
requirements: { unityVersion: string; platform: string },
|
||||
timeoutMs: number,
|
||||
registry: HotRunnerRegistry,
|
||||
): Promise<HotRunnerStatus> {
|
||||
const deadline = Date.now() + timeoutMs;
|
||||
|
||||
while (Date.now() < deadline) {
|
||||
const runner = registry.findAvailableRunner(requirements);
|
||||
if (runner) {
|
||||
OrchestratorLogger.log(`[HotRunner] Runner ${runner.id} became available`);
|
||||
|
||||
return runner;
|
||||
}
|
||||
|
||||
await this.sleep(Math.min(POLL_INTERVAL_MS, deadline - Date.now()));
|
||||
}
|
||||
|
||||
throw new Error(
|
||||
`[HotRunner] Timed out waiting for available runner (${requirements.unityVersion}/${requirements.platform}) after ${timeoutMs}ms`,
|
||||
);
|
||||
}
|
||||
|
||||
/**
|
||||
* Execute a job on a transport with a timeout guard.
|
||||
* On timeout, disconnects the transport to release the connection
|
||||
* and prevent the orphaned sendJob promise from holding resources.
|
||||
*/
|
||||
private async executeWithTimeout(
|
||||
transport: HotRunnerTransport,
|
||||
request: HotRunnerJobRequest,
|
||||
): Promise<HotRunnerJobResult> {
|
||||
const TIMEOUT_SENTINEL = Symbol('timeout');
|
||||
|
||||
const timeoutPromise = new Promise<typeof TIMEOUT_SENTINEL>((resolve) => {
|
||||
setTimeout(() => {
|
||||
resolve(TIMEOUT_SENTINEL);
|
||||
}, request.timeout);
|
||||
});
|
||||
|
||||
const result = await Promise.race([transport.sendJob(request), timeoutPromise]);
|
||||
|
||||
if (result === TIMEOUT_SENTINEL) {
|
||||
// Disconnect the transport to clean up the orphaned sendJob call
|
||||
try {
|
||||
await transport.disconnect();
|
||||
} catch (disconnectError: any) {
|
||||
OrchestratorLogger.logWarning(
|
||||
`[HotRunner] Error disconnecting transport after timeout for job ${request.jobId}: ${disconnectError.message}`,
|
||||
);
|
||||
}
|
||||
|
||||
throw new Error(`[HotRunner] Job ${request.jobId} timed out after ${request.timeout}ms`);
|
||||
}
|
||||
|
||||
return result;
|
||||
}
|
||||
|
||||
private sleep(ms: number): Promise<void> {
|
||||
return new Promise((resolve) => setTimeout(resolve, ms));
|
||||
}
|
||||
}
|
||||
@@ -1,186 +0,0 @@
|
||||
import OrchestratorLogger from '../core/orchestrator-logger';
|
||||
import { HotRunnerRegistry } from './hot-runner-registry';
|
||||
import { HotRunnerTransport } from './hot-runner-types';
|
||||
|
||||
export class HotRunnerHealthMonitor {
|
||||
private intervalHandle: ReturnType<typeof setInterval> | undefined;
|
||||
private registry: HotRunnerRegistry | undefined;
|
||||
private transports: Map<string, HotRunnerTransport> = new Map();
|
||||
|
||||
/**
|
||||
* Start periodic health monitoring for all registered runners.
|
||||
*/
|
||||
startMonitoring(registry: HotRunnerRegistry, interval: number, transports: Map<string, HotRunnerTransport>): void {
|
||||
if (this.intervalHandle) {
|
||||
this.stopMonitoring();
|
||||
}
|
||||
|
||||
this.registry = registry;
|
||||
this.transports = transports;
|
||||
|
||||
OrchestratorLogger.log(`[HotRunner] Starting health monitoring (interval: ${interval}s)`);
|
||||
|
||||
this.intervalHandle = setInterval(() => {
|
||||
this.runHealthChecks().catch((error: any) => {
|
||||
OrchestratorLogger.logWarning(`[HotRunner] Health check cycle failed: ${error.message}`);
|
||||
});
|
||||
}, interval * 1000);
|
||||
}
|
||||
|
||||
/**
|
||||
* Stop periodic health monitoring.
|
||||
*/
|
||||
stopMonitoring(): void {
|
||||
if (this.intervalHandle) {
|
||||
clearInterval(this.intervalHandle);
|
||||
this.intervalHandle = undefined;
|
||||
OrchestratorLogger.log(`[HotRunner] Health monitoring stopped`);
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Check health of a specific runner by ID. Returns true if healthy.
|
||||
*/
|
||||
async checkHealth(runnerId: string): Promise<boolean> {
|
||||
if (!this.registry) {
|
||||
return false;
|
||||
}
|
||||
|
||||
const transport = this.transports.get(runnerId);
|
||||
if (!transport) {
|
||||
OrchestratorLogger.logWarning(`[HotRunner] No transport for runner ${runnerId}`);
|
||||
this.registry.updateRunner(runnerId, {
|
||||
state: 'unhealthy',
|
||||
lastHealthCheck: new Date().toISOString(),
|
||||
});
|
||||
|
||||
return false;
|
||||
}
|
||||
|
||||
try {
|
||||
const healthy = await transport.healthCheck();
|
||||
if (healthy) {
|
||||
const status = await transport.getStatus();
|
||||
this.registry.updateRunner(runnerId, {
|
||||
lastHealthCheck: new Date().toISOString(),
|
||||
memoryUsageMB: status.memoryUsageMB,
|
||||
uptime: status.uptime,
|
||||
libraryHash: status.libraryHash,
|
||||
});
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
OrchestratorLogger.logWarning(`[HotRunner] Runner ${runnerId} health check returned false`);
|
||||
this.registry.updateRunner(runnerId, {
|
||||
state: 'unhealthy',
|
||||
lastHealthCheck: new Date().toISOString(),
|
||||
});
|
||||
|
||||
return false;
|
||||
} catch (error: any) {
|
||||
OrchestratorLogger.logWarning(`[HotRunner] Runner ${runnerId} health check failed: ${error.message}`);
|
||||
this.registry.updateRunner(runnerId, {
|
||||
state: 'unhealthy',
|
||||
lastHealthCheck: new Date().toISOString(),
|
||||
});
|
||||
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Mark an unhealthy runner for cleanup and disconnect its transport.
|
||||
*/
|
||||
async recycleUnhealthyRunner(runnerId: string): Promise<void> {
|
||||
if (!this.registry) {
|
||||
return;
|
||||
}
|
||||
|
||||
OrchestratorLogger.log(`[HotRunner] Recycling unhealthy runner ${runnerId}`);
|
||||
this.registry.updateRunner(runnerId, { state: 'stopping' });
|
||||
|
||||
const transport = this.transports.get(runnerId);
|
||||
if (transport) {
|
||||
try {
|
||||
await transport.disconnect();
|
||||
} catch (error: any) {
|
||||
OrchestratorLogger.logWarning(`[HotRunner] Error disconnecting runner ${runnerId}: ${error.message}`);
|
||||
}
|
||||
this.transports.delete(runnerId);
|
||||
}
|
||||
|
||||
this.registry.unregisterRunner(runnerId);
|
||||
OrchestratorLogger.log(`[HotRunner] Runner ${runnerId} recycled and removed`);
|
||||
}
|
||||
|
||||
/**
|
||||
* Recycle a runner that has been idle longer than the maximum idle time.
|
||||
*/
|
||||
async recycleIdleRunner(runnerId: string, maxIdleTime: number): Promise<void> {
|
||||
if (!this.registry) {
|
||||
return;
|
||||
}
|
||||
|
||||
const runner = this.registry.getRunner(runnerId);
|
||||
if (!runner || runner.state !== 'idle') {
|
||||
return;
|
||||
}
|
||||
|
||||
const lastCheckTime = new Date(runner.lastHealthCheck).getTime();
|
||||
const now = Date.now();
|
||||
const idleSeconds = (now - lastCheckTime) / 1000;
|
||||
|
||||
if (idleSeconds >= maxIdleTime) {
|
||||
OrchestratorLogger.log(
|
||||
`[HotRunner] Runner ${runnerId} idle for ${Math.floor(idleSeconds)}s (max: ${maxIdleTime}s), recycling`,
|
||||
);
|
||||
await this.recycleUnhealthyRunner(runnerId);
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Run health checks and idle-recycle checks for all registered runners.
|
||||
*/
|
||||
private async runHealthChecks(): Promise<void> {
|
||||
if (!this.registry) {
|
||||
return;
|
||||
}
|
||||
|
||||
const runners = this.registry.listRunners();
|
||||
|
||||
for (const runner of runners) {
|
||||
if (runner.state === 'stopping') {
|
||||
continue;
|
||||
}
|
||||
|
||||
const healthy = await this.checkHealth(runner.id);
|
||||
|
||||
if (!healthy && runner.state !== 'starting') {
|
||||
await this.recycleUnhealthyRunner(runner.id);
|
||||
continue;
|
||||
}
|
||||
|
||||
// Check for idle timeout
|
||||
const config = this.registry.getConfig(runner.id);
|
||||
if (config && runner.state === 'idle') {
|
||||
await this.recycleIdleRunner(runner.id, config.maxIdleTime);
|
||||
}
|
||||
|
||||
// Check for max jobs before recycle
|
||||
if (config && config.maxJobsBeforeRecycle > 0 && runner.jobsCompleted >= config.maxJobsBeforeRecycle) {
|
||||
OrchestratorLogger.log(
|
||||
`[HotRunner] Runner ${runner.id} reached max jobs (${runner.jobsCompleted}/${config.maxJobsBeforeRecycle}), recycling`,
|
||||
);
|
||||
await this.recycleUnhealthyRunner(runner.id);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Whether health monitoring is currently active.
|
||||
*/
|
||||
get isMonitoring(): boolean {
|
||||
return this.intervalHandle !== undefined;
|
||||
}
|
||||
}
|
||||
@@ -1,315 +0,0 @@
|
||||
import fs from 'node:fs';
|
||||
import path from 'node:path';
|
||||
import { customAlphabet } from 'nanoid';
|
||||
import OrchestratorLogger from '../core/orchestrator-logger';
|
||||
import { HotRunnerConfig, HotRunnerStatus } from './hot-runner-types';
|
||||
|
||||
const generateId = customAlphabet('abcdefghijklmnopqrstuvwxyz0123456789', 12);
|
||||
|
||||
const PERSISTENCE_FILENAME = 'hot-runners.json';
|
||||
|
||||
const VALID_RUNNER_STATES: ReadonlySet<string> = new Set(['idle', 'busy', 'starting', 'stopping', 'unhealthy']);
|
||||
|
||||
export interface HotRunnerFilter {
|
||||
platform?: string;
|
||||
state?: string;
|
||||
unityVersion?: string;
|
||||
}
|
||||
|
||||
/**
|
||||
* Validate that a restored runner entry has all required fields with correct types.
|
||||
* Returns true if the entry is a valid HotRunnerStatus, false otherwise.
|
||||
*/
|
||||
function isValidRunnerStatus(entry: unknown): entry is HotRunnerStatus {
|
||||
if (typeof entry !== 'object' || entry === null) {
|
||||
return false;
|
||||
}
|
||||
|
||||
const record = entry as Record<string, unknown>;
|
||||
|
||||
return (
|
||||
typeof record.id === 'string' &&
|
||||
record.id.length > 0 &&
|
||||
typeof record.state === 'string' &&
|
||||
VALID_RUNNER_STATES.has(record.state) &&
|
||||
typeof record.unityVersion === 'string' &&
|
||||
typeof record.platform === 'string' &&
|
||||
typeof record.uptime === 'number' &&
|
||||
typeof record.jobsCompleted === 'number' &&
|
||||
typeof record.lastHealthCheck === 'string' &&
|
||||
typeof record.memoryUsageMB === 'number'
|
||||
);
|
||||
}
|
||||
|
||||
/**
|
||||
* Validate that a restored config entry has all required fields with correct types.
|
||||
* Returns true if the entry is a valid HotRunnerConfig, false otherwise.
|
||||
*/
|
||||
function isValidRunnerConfig(entry: unknown): entry is HotRunnerConfig {
|
||||
if (typeof entry !== 'object' || entry === null) {
|
||||
return false;
|
||||
}
|
||||
|
||||
const record = entry as Record<string, unknown>;
|
||||
|
||||
return (
|
||||
typeof record.enabled === 'boolean' &&
|
||||
typeof record.transport === 'string' &&
|
||||
['websocket', 'grpc', 'named-pipe'].includes(record.transport) &&
|
||||
typeof record.host === 'string' &&
|
||||
typeof record.port === 'number' &&
|
||||
typeof record.healthCheckInterval === 'number' &&
|
||||
typeof record.maxIdleTime === 'number' &&
|
||||
typeof record.maxJobsBeforeRecycle === 'number'
|
||||
);
|
||||
}
|
||||
|
||||
export class HotRunnerRegistry {
|
||||
private runners: Map<string, HotRunnerStatus> = new Map();
|
||||
private configs: Map<string, HotRunnerConfig> = new Map();
|
||||
private persistencePath: string;
|
||||
|
||||
constructor(persistenceDirectory?: string) {
|
||||
this.persistencePath = persistenceDirectory ? path.join(persistenceDirectory, PERSISTENCE_FILENAME) : '';
|
||||
}
|
||||
|
||||
/**
|
||||
* Register a new hot runner. Returns the generated runner ID.
|
||||
*/
|
||||
registerRunner(config: HotRunnerConfig): string {
|
||||
const id = `hr-${generateId()}`;
|
||||
|
||||
const status: HotRunnerStatus = {
|
||||
id,
|
||||
state: 'starting',
|
||||
unityVersion: config.unityVersion ?? 'unknown',
|
||||
platform: config.platform ?? 'unknown',
|
||||
uptime: 0,
|
||||
jobsCompleted: 0,
|
||||
lastHealthCheck: new Date().toISOString(),
|
||||
memoryUsageMB: 0,
|
||||
};
|
||||
|
||||
this.runners.set(id, status);
|
||||
this.configs.set(id, config);
|
||||
OrchestratorLogger.log(`[HotRunner] Registered runner ${id} (${status.unityVersion}/${status.platform})`);
|
||||
|
||||
this.persist();
|
||||
|
||||
return id;
|
||||
}
|
||||
|
||||
/**
|
||||
* Remove a runner from the registry.
|
||||
*/
|
||||
unregisterRunner(id: string): void {
|
||||
const existed = this.runners.delete(id);
|
||||
this.configs.delete(id);
|
||||
|
||||
if (existed) {
|
||||
OrchestratorLogger.log(`[HotRunner] Unregistered runner ${id}`);
|
||||
this.persist();
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Get a runner's current status by ID.
|
||||
*/
|
||||
getRunner(id: string): HotRunnerStatus | undefined {
|
||||
return this.runners.get(id);
|
||||
}
|
||||
|
||||
/**
|
||||
* Get a runner's config by ID.
|
||||
*/
|
||||
getConfig(id: string): HotRunnerConfig | undefined {
|
||||
return this.configs.get(id);
|
||||
}
|
||||
|
||||
/**
|
||||
* List all runners, optionally filtered by platform, state, or Unity version.
|
||||
*/
|
||||
listRunners(filter?: HotRunnerFilter): HotRunnerStatus[] {
|
||||
let results = [...this.runners.values()];
|
||||
|
||||
if (filter?.platform) {
|
||||
results = results.filter((runner) => runner.platform === filter.platform);
|
||||
}
|
||||
|
||||
if (filter?.state) {
|
||||
results = results.filter((runner) => runner.state === filter.state);
|
||||
}
|
||||
|
||||
if (filter?.unityVersion) {
|
||||
results = results.filter((runner) => runner.unityVersion === filter.unityVersion);
|
||||
}
|
||||
|
||||
return results;
|
||||
}
|
||||
|
||||
/**
|
||||
* Find an idle runner matching the given Unity version and platform requirements.
|
||||
*/
|
||||
findAvailableRunner(requirements: { unityVersion: string; platform: string }): HotRunnerStatus | undefined {
|
||||
return this.listRunners({
|
||||
state: 'idle',
|
||||
unityVersion: requirements.unityVersion,
|
||||
platform: requirements.platform,
|
||||
})[0];
|
||||
}
|
||||
|
||||
/**
|
||||
* Update a runner's status fields. Merges partial updates into existing status.
|
||||
*/
|
||||
updateRunner(id: string, update: Partial<HotRunnerStatus>): void {
|
||||
const existing = this.runners.get(id);
|
||||
if (!existing) {
|
||||
return;
|
||||
}
|
||||
|
||||
this.runners.set(id, { ...existing, ...update, id });
|
||||
this.persist();
|
||||
}
|
||||
|
||||
/**
|
||||
* Get the total number of registered runners.
|
||||
*/
|
||||
get size(): number {
|
||||
return this.runners.size;
|
||||
}
|
||||
|
||||
/**
|
||||
* Validate all runners in the registry and reset invalid ones to 'unhealthy'.
|
||||
* Returns the number of runners that were repaired.
|
||||
*/
|
||||
validateAndRepair(): number {
|
||||
let repaired = 0;
|
||||
|
||||
for (const [id, status] of this.runners) {
|
||||
// Cast to unknown to bypass the type guard narrowing to 'never',
|
||||
// since the Map is typed as HotRunnerStatus but entries may have
|
||||
// been corrupted via direct deserialization or unsafe casts.
|
||||
const entry = status as unknown as Record<string, unknown>;
|
||||
|
||||
if (!isValidRunnerStatus(entry)) {
|
||||
OrchestratorLogger.logWarning(`[HotRunner] Runner ${id} has invalid state, marking as unhealthy`);
|
||||
this.runners.set(id, {
|
||||
id,
|
||||
state: 'unhealthy',
|
||||
unityVersion: typeof entry.unityVersion === 'string' ? entry.unityVersion : 'unknown',
|
||||
platform: typeof entry.platform === 'string' ? entry.platform : 'unknown',
|
||||
uptime: typeof entry.uptime === 'number' ? entry.uptime : 0,
|
||||
jobsCompleted: typeof entry.jobsCompleted === 'number' ? entry.jobsCompleted : 0,
|
||||
lastHealthCheck: typeof entry.lastHealthCheck === 'string' ? entry.lastHealthCheck : new Date().toISOString(),
|
||||
memoryUsageMB: typeof entry.memoryUsageMB === 'number' ? entry.memoryUsageMB : 0,
|
||||
});
|
||||
repaired++;
|
||||
}
|
||||
}
|
||||
|
||||
if (repaired > 0) {
|
||||
this.persist();
|
||||
}
|
||||
|
||||
return repaired;
|
||||
}
|
||||
|
||||
/**
|
||||
* Persist current registry state to disk for crash recovery.
|
||||
* Validates data before writing to prevent persisting corrupt state.
|
||||
*/
|
||||
private persist(): void {
|
||||
if (!this.persistencePath) {
|
||||
return;
|
||||
}
|
||||
|
||||
try {
|
||||
// Validate data before persisting
|
||||
for (const [id, status] of this.runners) {
|
||||
if (!isValidRunnerStatus(status)) {
|
||||
OrchestratorLogger.logWarning(`[HotRunner] Skipping persistence -- runner ${id} has invalid state`);
|
||||
|
||||
return;
|
||||
}
|
||||
}
|
||||
|
||||
const data = {
|
||||
runners: Object.fromEntries(this.runners),
|
||||
configs: Object.fromEntries(this.configs),
|
||||
};
|
||||
const directory = path.dirname(this.persistencePath);
|
||||
if (!fs.existsSync(directory)) {
|
||||
fs.mkdirSync(directory, { recursive: true });
|
||||
}
|
||||
|
||||
fs.writeFileSync(this.persistencePath, JSON.stringify(data, undefined, 2));
|
||||
} catch (error: any) {
|
||||
OrchestratorLogger.logWarning(`[HotRunner] Failed to persist registry: ${error.message}`);
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Load registry state from disk. Returns the number of runners restored.
|
||||
* Validates each restored entry and discards corrupt entries with warnings.
|
||||
* If the persistence file itself is corrupt (invalid JSON), starts with
|
||||
* an empty registry.
|
||||
*/
|
||||
loadFromDisk(): number {
|
||||
if (!this.persistencePath || !fs.existsSync(this.persistencePath)) {
|
||||
return 0;
|
||||
}
|
||||
|
||||
let data: any;
|
||||
|
||||
try {
|
||||
const raw = fs.readFileSync(this.persistencePath, 'utf8');
|
||||
data = JSON.parse(raw);
|
||||
} catch (error: any) {
|
||||
OrchestratorLogger.logWarning(
|
||||
`[HotRunner] Persistence file is corrupt, starting with empty registry: ${error.message}`,
|
||||
);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
if (typeof data !== 'object' || data === null) {
|
||||
OrchestratorLogger.logWarning('[HotRunner] Persistence file has invalid structure, starting with empty registry');
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
let discarded = 0;
|
||||
|
||||
if (data.runners && typeof data.runners === 'object') {
|
||||
for (const [id, status] of Object.entries(data.runners)) {
|
||||
if (isValidRunnerStatus(status)) {
|
||||
this.runners.set(id, status);
|
||||
} else {
|
||||
OrchestratorLogger.logWarning(`[HotRunner] Discarding invalid runner entry '${id}' from persistence file`);
|
||||
discarded++;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if (data.configs && typeof data.configs === 'object') {
|
||||
for (const [id, config] of Object.entries(data.configs)) {
|
||||
// Only restore configs for runners that were successfully restored
|
||||
if (this.runners.has(id)) {
|
||||
if (isValidRunnerConfig(config)) {
|
||||
this.configs.set(id, config);
|
||||
} else {
|
||||
OrchestratorLogger.logWarning(`[HotRunner] Discarding invalid config entry '${id}' from persistence file`);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if (discarded > 0) {
|
||||
OrchestratorLogger.logWarning(`[HotRunner] Discarded ${discarded} invalid runner(s) from persistence file`);
|
||||
}
|
||||
|
||||
OrchestratorLogger.log(`[HotRunner] Restored ${this.runners.size} runner(s) from disk`);
|
||||
|
||||
return this.runners.size;
|
||||
}
|
||||
}
|
||||
@@ -1,166 +0,0 @@
|
||||
import OrchestratorLogger from '../core/orchestrator-logger';
|
||||
import { HotRunnerRegistry } from './hot-runner-registry';
|
||||
import { HotRunnerHealthMonitor } from './hot-runner-health-monitor';
|
||||
import { HotRunnerDispatcher, OutputCallback } from './hot-runner-dispatcher';
|
||||
import {
|
||||
HotRunnerConfig,
|
||||
HotRunnerJobRequest,
|
||||
HotRunnerJobResult,
|
||||
HotRunnerStatus,
|
||||
HotRunnerTransport,
|
||||
} from './hot-runner-types';
|
||||
import BuildParameters from '../../../build-parameters';
|
||||
|
||||
export class HotRunnerService {
|
||||
private registry: HotRunnerRegistry;
|
||||
private healthMonitor: HotRunnerHealthMonitor;
|
||||
private dispatcher: HotRunnerDispatcher;
|
||||
private transports: Map<string, HotRunnerTransport> = new Map();
|
||||
private config: HotRunnerConfig | undefined;
|
||||
|
||||
constructor(persistenceDirectory?: string) {
|
||||
this.registry = new HotRunnerRegistry(persistenceDirectory);
|
||||
this.healthMonitor = new HotRunnerHealthMonitor();
|
||||
this.dispatcher = new HotRunnerDispatcher(this.transports);
|
||||
}
|
||||
|
||||
/**
|
||||
* Initialize the hot runner service: load persisted state, start health monitoring.
|
||||
*/
|
||||
async initialize(config: HotRunnerConfig): Promise<void> {
|
||||
this.config = config;
|
||||
|
||||
OrchestratorLogger.log(
|
||||
`[HotRunner] Initializing service (transport: ${config.transport}, ${config.host}:${config.port})`,
|
||||
);
|
||||
|
||||
// Attempt to restore previously registered runners from disk
|
||||
const restored = this.registry.loadFromDisk();
|
||||
if (restored > 0) {
|
||||
OrchestratorLogger.log(`[HotRunner] Restored ${restored} runner(s) from persistence`);
|
||||
}
|
||||
|
||||
// Start health monitoring
|
||||
this.healthMonitor.startMonitoring(this.registry, config.healthCheckInterval, this.transports);
|
||||
|
||||
OrchestratorLogger.log(`[HotRunner] Service initialized`);
|
||||
}
|
||||
|
||||
/**
|
||||
* Register a runner with a transport implementation.
|
||||
* Returns the runner ID.
|
||||
*/
|
||||
registerRunner(config: HotRunnerConfig, transport: HotRunnerTransport): string {
|
||||
const id = this.registry.registerRunner(config);
|
||||
this.transports.set(id, transport);
|
||||
|
||||
return id;
|
||||
}
|
||||
|
||||
/**
|
||||
* Submit a build job to an available hot runner.
|
||||
* Converts BuildParameters to a HotRunnerJobRequest and dispatches.
|
||||
*/
|
||||
async submitBuild(params: BuildParameters, onOutput?: OutputCallback): Promise<HotRunnerJobResult> {
|
||||
const request: HotRunnerJobRequest = {
|
||||
jobId: params.buildGuid || `build-${Date.now()}`,
|
||||
buildMethod: params.buildMethod || undefined,
|
||||
buildTarget: params.targetPlatform,
|
||||
buildPath: params.buildPath,
|
||||
customParameters: params.customParameters ? this.parseCustomParameters(params.customParameters) : undefined,
|
||||
timeout: 30 * 60 * 1000, // 30 minutes default
|
||||
};
|
||||
|
||||
OrchestratorLogger.log(`[HotRunner] Submitting build: ${request.jobId} (target: ${request.buildTarget})`);
|
||||
|
||||
return this.dispatcher.dispatchJob(request, this.registry, params.editorVersion, onOutput);
|
||||
}
|
||||
|
||||
/**
|
||||
* Submit a test job to an available hot runner.
|
||||
* Converts BuildParameters and optional suite config to a test-mode HotRunnerJobRequest.
|
||||
*/
|
||||
async submitTest(
|
||||
params: BuildParameters,
|
||||
suiteConfig?: { testMode?: 'editmode' | 'playmode'; testSuitePath?: string },
|
||||
onOutput?: OutputCallback,
|
||||
): Promise<HotRunnerJobResult> {
|
||||
const request: HotRunnerJobRequest = {
|
||||
jobId: params.buildGuid || `test-${Date.now()}`,
|
||||
buildTarget: params.targetPlatform,
|
||||
customParameters: params.customParameters ? this.parseCustomParameters(params.customParameters) : undefined,
|
||||
timeout: 30 * 60 * 1000, // 30 minutes default
|
||||
testMode: suiteConfig?.testMode ?? 'editmode',
|
||||
testSuitePath: suiteConfig?.testSuitePath,
|
||||
};
|
||||
|
||||
OrchestratorLogger.log(`[HotRunner] Submitting test: ${request.jobId} (mode: ${request.testMode})`);
|
||||
|
||||
return this.dispatcher.dispatchJob(request, this.registry, params.editorVersion, onOutput);
|
||||
}
|
||||
|
||||
/**
|
||||
* Shut down the service: stop health monitoring, disconnect all transports,
|
||||
* and unregister all runners.
|
||||
*/
|
||||
async shutdown(): Promise<void> {
|
||||
OrchestratorLogger.log(`[HotRunner] Shutting down service`);
|
||||
|
||||
this.healthMonitor.stopMonitoring();
|
||||
|
||||
const disconnectPromises: Promise<void>[] = [];
|
||||
for (const [id, transport] of this.transports.entries()) {
|
||||
disconnectPromises.push(
|
||||
transport.disconnect().catch((error: any) => {
|
||||
OrchestratorLogger.logWarning(`[HotRunner] Error disconnecting runner ${id}: ${error.message}`);
|
||||
}),
|
||||
);
|
||||
}
|
||||
await Promise.all(disconnectPromises);
|
||||
|
||||
this.transports.clear();
|
||||
|
||||
OrchestratorLogger.log(`[HotRunner] Service shut down`);
|
||||
}
|
||||
|
||||
/**
|
||||
* Get the status of all registered runners.
|
||||
*/
|
||||
getStatus(): HotRunnerStatus[] {
|
||||
return this.registry.listRunners();
|
||||
}
|
||||
|
||||
/**
|
||||
* Get the underlying registry (for testing or advanced use).
|
||||
*/
|
||||
getRegistry(): HotRunnerRegistry {
|
||||
return this.registry;
|
||||
}
|
||||
|
||||
/**
|
||||
* Parse a space-separated custom parameters string into a key-value map.
|
||||
* Handles `-key value` and `-key=value` formats.
|
||||
*/
|
||||
private parseCustomParameters(raw: string): Record<string, string> {
|
||||
const result: Record<string, string> = {};
|
||||
const parts = raw.trim().split(/\s+/);
|
||||
|
||||
for (let i = 0; i < parts.length; i++) {
|
||||
const part = parts[i];
|
||||
if (part.startsWith('-')) {
|
||||
const key = part.replace(/^-+/, '');
|
||||
if (key.includes('=')) {
|
||||
const [k, ...v] = key.split('=');
|
||||
result[k] = v.join('=');
|
||||
} else if (i + 1 < parts.length && !parts[i + 1].startsWith('-')) {
|
||||
result[key] = parts[i + 1];
|
||||
i++;
|
||||
} else {
|
||||
result[key] = 'true';
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return result;
|
||||
}
|
||||
}
|
||||
@@ -1,54 +0,0 @@
|
||||
export interface HotRunnerConfig {
|
||||
enabled: boolean;
|
||||
transport: 'websocket' | 'grpc' | 'named-pipe';
|
||||
host: string;
|
||||
port: number;
|
||||
healthCheckInterval: number; // seconds
|
||||
maxIdleTime: number; // seconds before recycling
|
||||
maxJobsBeforeRecycle: number;
|
||||
unityVersion?: string;
|
||||
platform?: string;
|
||||
}
|
||||
|
||||
export interface HotRunnerStatus {
|
||||
id: string;
|
||||
state: 'idle' | 'busy' | 'starting' | 'stopping' | 'unhealthy';
|
||||
unityVersion: string;
|
||||
platform: string;
|
||||
currentJob?: string;
|
||||
lastJobCompleted?: string;
|
||||
uptime: number;
|
||||
jobsCompleted: number;
|
||||
lastHealthCheck: string;
|
||||
memoryUsageMB: number;
|
||||
libraryHash?: string;
|
||||
}
|
||||
|
||||
export interface HotRunnerJobRequest {
|
||||
jobId: string;
|
||||
buildMethod?: string;
|
||||
buildTarget: string;
|
||||
buildPath?: string;
|
||||
customParameters?: Record<string, string>;
|
||||
timeout: number;
|
||||
testMode?: 'editmode' | 'playmode';
|
||||
testSuitePath?: string;
|
||||
}
|
||||
|
||||
export interface HotRunnerJobResult {
|
||||
jobId: string;
|
||||
success: boolean;
|
||||
exitCode: number;
|
||||
duration: number;
|
||||
output: string;
|
||||
artifacts?: string[];
|
||||
testResults?: string; // path to test result file
|
||||
}
|
||||
|
||||
export interface HotRunnerTransport {
|
||||
connect(config: HotRunnerConfig): Promise<void>;
|
||||
disconnect(): Promise<void>;
|
||||
sendJob(request: HotRunnerJobRequest): Promise<HotRunnerJobResult>;
|
||||
getStatus(): Promise<HotRunnerStatus>;
|
||||
healthCheck(): Promise<boolean>;
|
||||
}
|
||||
@@ -1,740 +0,0 @@
|
||||
import fs from 'node:fs';
|
||||
import { HotRunnerRegistry } from './hot-runner-registry';
|
||||
import { HotRunnerHealthMonitor } from './hot-runner-health-monitor';
|
||||
import { HotRunnerDispatcher } from './hot-runner-dispatcher';
|
||||
import { HotRunnerService } from './hot-runner-service';
|
||||
import {
|
||||
HotRunnerConfig,
|
||||
HotRunnerJobRequest,
|
||||
HotRunnerJobResult,
|
||||
HotRunnerStatus,
|
||||
HotRunnerTransport,
|
||||
} from './hot-runner-types';
|
||||
|
||||
// Mock dependencies
|
||||
jest.mock('node:fs');
|
||||
jest.mock('../core/orchestrator-logger');
|
||||
|
||||
const mockFs = fs as jest.Mocked<typeof fs>;
|
||||
|
||||
function createMockConfig(overrides?: Partial<HotRunnerConfig>): HotRunnerConfig {
|
||||
return {
|
||||
enabled: true,
|
||||
transport: 'websocket',
|
||||
host: 'localhost',
|
||||
port: 9090,
|
||||
healthCheckInterval: 30,
|
||||
maxIdleTime: 3600,
|
||||
maxJobsBeforeRecycle: 100,
|
||||
unityVersion: '2022.3.0f1',
|
||||
platform: 'StandaloneWindows64',
|
||||
...overrides,
|
||||
};
|
||||
}
|
||||
|
||||
function createMockTransport(overrides?: Partial<HotRunnerTransport>): HotRunnerTransport {
|
||||
return {
|
||||
// eslint-disable-next-line unicorn/no-useless-undefined
|
||||
connect: jest.fn().mockResolvedValue(undefined),
|
||||
// eslint-disable-next-line unicorn/no-useless-undefined
|
||||
disconnect: jest.fn().mockResolvedValue(undefined),
|
||||
sendJob: jest.fn().mockResolvedValue({
|
||||
jobId: 'test-job',
|
||||
success: true,
|
||||
exitCode: 0,
|
||||
duration: 5000,
|
||||
output: 'Build succeeded',
|
||||
artifacts: ['build/output.exe'],
|
||||
} as HotRunnerJobResult),
|
||||
getStatus: jest.fn().mockResolvedValue({
|
||||
id: 'mock-runner',
|
||||
state: 'idle',
|
||||
unityVersion: '2022.3.0f1',
|
||||
platform: 'StandaloneWindows64',
|
||||
uptime: 3600,
|
||||
jobsCompleted: 5,
|
||||
lastHealthCheck: new Date().toISOString(),
|
||||
memoryUsageMB: 1024,
|
||||
} as HotRunnerStatus),
|
||||
healthCheck: jest.fn().mockResolvedValue(true),
|
||||
...overrides,
|
||||
};
|
||||
}
|
||||
|
||||
function createMockJobRequest(overrides?: Partial<HotRunnerJobRequest>): HotRunnerJobRequest {
|
||||
return {
|
||||
jobId: 'job-001',
|
||||
buildTarget: 'StandaloneWindows64',
|
||||
timeout: 60000,
|
||||
...overrides,
|
||||
};
|
||||
}
|
||||
|
||||
// --- Registry Tests ---
|
||||
|
||||
describe('HotRunnerRegistry', () => {
|
||||
let registry: HotRunnerRegistry;
|
||||
|
||||
beforeEach(() => {
|
||||
jest.clearAllMocks();
|
||||
registry = new HotRunnerRegistry();
|
||||
});
|
||||
|
||||
it('should register a runner and return an ID', () => {
|
||||
const config = createMockConfig();
|
||||
const id = registry.registerRunner(config);
|
||||
|
||||
expect(id).toMatch(/^hr-/);
|
||||
expect(registry.size).toBe(1);
|
||||
});
|
||||
|
||||
it('should retrieve a registered runner by ID', () => {
|
||||
const config = createMockConfig();
|
||||
const id = registry.registerRunner(config);
|
||||
const runner = registry.getRunner(id);
|
||||
|
||||
expect(runner).toBeDefined();
|
||||
expect(runner!.id).toBe(id);
|
||||
expect(runner!.state).toBe('starting');
|
||||
expect(runner!.unityVersion).toBe('2022.3.0f1');
|
||||
expect(runner!.platform).toBe('StandaloneWindows64');
|
||||
});
|
||||
|
||||
it('should return undefined for unknown runner ID', () => {
|
||||
const runner = registry.getRunner('nonexistent');
|
||||
expect(runner).toBeUndefined();
|
||||
});
|
||||
|
||||
it('should unregister a runner', () => {
|
||||
const id = registry.registerRunner(createMockConfig());
|
||||
expect(registry.size).toBe(1);
|
||||
|
||||
registry.unregisterRunner(id);
|
||||
expect(registry.size).toBe(0);
|
||||
expect(registry.getRunner(id)).toBeUndefined();
|
||||
});
|
||||
|
||||
it('should handle unregistering a nonexistent runner gracefully', () => {
|
||||
registry.unregisterRunner('nonexistent');
|
||||
expect(registry.size).toBe(0);
|
||||
});
|
||||
|
||||
it('should list all runners without filter', () => {
|
||||
registry.registerRunner(createMockConfig({ platform: 'StandaloneWindows64' }));
|
||||
registry.registerRunner(createMockConfig({ platform: 'StandaloneLinux64' }));
|
||||
registry.registerRunner(createMockConfig({ platform: 'StandaloneOSX' }));
|
||||
|
||||
const all = registry.listRunners();
|
||||
expect(all).toHaveLength(3);
|
||||
});
|
||||
|
||||
it('should filter runners by platform', () => {
|
||||
registry.registerRunner(createMockConfig({ platform: 'StandaloneWindows64' }));
|
||||
registry.registerRunner(createMockConfig({ platform: 'StandaloneLinux64' }));
|
||||
registry.registerRunner(createMockConfig({ platform: 'StandaloneWindows64' }));
|
||||
|
||||
const windows = registry.listRunners({ platform: 'StandaloneWindows64' });
|
||||
expect(windows).toHaveLength(2);
|
||||
|
||||
const linux = registry.listRunners({ platform: 'StandaloneLinux64' });
|
||||
expect(linux).toHaveLength(1);
|
||||
});
|
||||
|
||||
it('should filter runners by state', () => {
|
||||
const id1 = registry.registerRunner(createMockConfig());
|
||||
registry.registerRunner(createMockConfig());
|
||||
|
||||
registry.updateRunner(id1, { state: 'idle' });
|
||||
|
||||
// second runner remains in 'starting' state
|
||||
|
||||
const idle = registry.listRunners({ state: 'idle' });
|
||||
expect(idle).toHaveLength(1);
|
||||
expect(idle[0].id).toBe(id1);
|
||||
});
|
||||
|
||||
it('should filter runners by Unity version', () => {
|
||||
registry.registerRunner(createMockConfig({ unityVersion: '2022.3.0f1' }));
|
||||
registry.registerRunner(createMockConfig({ unityVersion: '2023.1.0f1' }));
|
||||
registry.registerRunner(createMockConfig({ unityVersion: '2022.3.0f1' }));
|
||||
|
||||
const v2022 = registry.listRunners({ unityVersion: '2022.3.0f1' });
|
||||
expect(v2022).toHaveLength(2);
|
||||
});
|
||||
|
||||
it('should find an available idle runner matching requirements', () => {
|
||||
const id1 = registry.registerRunner(
|
||||
createMockConfig({ unityVersion: '2022.3.0f1', platform: 'StandaloneWindows64' }),
|
||||
);
|
||||
registry.updateRunner(id1, { state: 'idle' });
|
||||
|
||||
const id2 = registry.registerRunner(
|
||||
createMockConfig({ unityVersion: '2023.1.0f1', platform: 'StandaloneLinux64' }),
|
||||
);
|
||||
registry.updateRunner(id2, { state: 'idle' });
|
||||
|
||||
const found = registry.findAvailableRunner({
|
||||
unityVersion: '2022.3.0f1',
|
||||
platform: 'StandaloneWindows64',
|
||||
});
|
||||
|
||||
expect(found).toBeDefined();
|
||||
expect(found!.id).toBe(id1);
|
||||
});
|
||||
|
||||
it('should return undefined when no runner matches requirements', () => {
|
||||
const id = registry.registerRunner(
|
||||
createMockConfig({ unityVersion: '2022.3.0f1', platform: 'StandaloneWindows64' }),
|
||||
);
|
||||
registry.updateRunner(id, { state: 'idle' });
|
||||
|
||||
const found = registry.findAvailableRunner({
|
||||
unityVersion: '2023.1.0f1',
|
||||
platform: 'StandaloneLinux64',
|
||||
});
|
||||
|
||||
expect(found).toBeUndefined();
|
||||
});
|
||||
|
||||
it('should update runner status fields', () => {
|
||||
const id = registry.registerRunner(createMockConfig());
|
||||
registry.updateRunner(id, { state: 'idle', memoryUsageMB: 2048 });
|
||||
|
||||
const runner = registry.getRunner(id);
|
||||
expect(runner!.state).toBe('idle');
|
||||
expect(runner!.memoryUsageMB).toBe(2048);
|
||||
|
||||
// ID should not be overridden by the update
|
||||
expect(runner!.id).toBe(id);
|
||||
});
|
||||
|
||||
it('should persist and load registry from disk', () => {
|
||||
const persistenceRegistry = new HotRunnerRegistry('/tmp/test');
|
||||
mockFs.existsSync.mockReturnValue(true);
|
||||
mockFs.writeFileSync.mockImplementation(() => {});
|
||||
mockFs.mkdirSync.mockImplementation(() => '' as any);
|
||||
|
||||
const id = persistenceRegistry.registerRunner(createMockConfig());
|
||||
|
||||
// Verify writeFileSync was called for persistence
|
||||
expect(mockFs.writeFileSync).toHaveBeenCalled();
|
||||
const writtenData = JSON.parse((mockFs.writeFileSync as jest.Mock).mock.calls[0][1] as string);
|
||||
expect(writtenData.runners).toBeDefined();
|
||||
expect(writtenData.runners[id]).toBeDefined();
|
||||
});
|
||||
|
||||
it('should load runners from disk on loadFromDisk', () => {
|
||||
const persistenceRegistry = new HotRunnerRegistry('/tmp/test');
|
||||
const storedData = {
|
||||
runners: {
|
||||
'hr-restored': {
|
||||
id: 'hr-restored',
|
||||
state: 'idle',
|
||||
unityVersion: '2022.3.0f1',
|
||||
platform: 'StandaloneWindows64',
|
||||
uptime: 100,
|
||||
jobsCompleted: 3,
|
||||
lastHealthCheck: new Date().toISOString(),
|
||||
memoryUsageMB: 512,
|
||||
},
|
||||
},
|
||||
configs: {
|
||||
'hr-restored': createMockConfig(),
|
||||
},
|
||||
};
|
||||
|
||||
mockFs.existsSync.mockReturnValue(true);
|
||||
mockFs.readFileSync.mockReturnValue(JSON.stringify(storedData));
|
||||
|
||||
const count = persistenceRegistry.loadFromDisk();
|
||||
expect(count).toBe(1);
|
||||
expect(persistenceRegistry.getRunner('hr-restored')).toBeDefined();
|
||||
});
|
||||
|
||||
it('should discard invalid runner entries when loading from disk', () => {
|
||||
const persistenceRegistry = new HotRunnerRegistry('/tmp/test');
|
||||
const storedData = {
|
||||
runners: {
|
||||
'hr-valid': {
|
||||
id: 'hr-valid',
|
||||
state: 'idle',
|
||||
unityVersion: '2022.3.0f1',
|
||||
platform: 'StandaloneWindows64',
|
||||
uptime: 100,
|
||||
jobsCompleted: 3,
|
||||
lastHealthCheck: new Date().toISOString(),
|
||||
memoryUsageMB: 512,
|
||||
},
|
||||
'hr-invalid': {
|
||||
// Missing required fields like state, unityVersion
|
||||
id: 'hr-invalid',
|
||||
},
|
||||
'hr-bad-state': {
|
||||
id: 'hr-bad-state',
|
||||
state: 'nonexistent-state',
|
||||
unityVersion: '2022.3.0f1',
|
||||
platform: 'StandaloneWindows64',
|
||||
uptime: 0,
|
||||
jobsCompleted: 0,
|
||||
lastHealthCheck: new Date().toISOString(),
|
||||
memoryUsageMB: 0,
|
||||
},
|
||||
},
|
||||
configs: {
|
||||
'hr-valid': createMockConfig(),
|
||||
'hr-invalid': createMockConfig(),
|
||||
'hr-bad-state': createMockConfig(),
|
||||
},
|
||||
};
|
||||
|
||||
mockFs.existsSync.mockReturnValue(true);
|
||||
mockFs.readFileSync.mockReturnValue(JSON.stringify(storedData));
|
||||
|
||||
const count = persistenceRegistry.loadFromDisk();
|
||||
expect(count).toBe(1);
|
||||
expect(persistenceRegistry.getRunner('hr-valid')).toBeDefined();
|
||||
expect(persistenceRegistry.getRunner('hr-invalid')).toBeUndefined();
|
||||
expect(persistenceRegistry.getRunner('hr-bad-state')).toBeUndefined();
|
||||
});
|
||||
|
||||
it('should handle corrupt JSON persistence file gracefully', () => {
|
||||
const persistenceRegistry = new HotRunnerRegistry('/tmp/test');
|
||||
|
||||
mockFs.existsSync.mockReturnValue(true);
|
||||
mockFs.readFileSync.mockReturnValue('{ invalid json !!!');
|
||||
|
||||
const count = persistenceRegistry.loadFromDisk();
|
||||
expect(count).toBe(0);
|
||||
expect(persistenceRegistry.size).toBe(0);
|
||||
});
|
||||
|
||||
it('should handle persistence file with invalid top-level structure', () => {
|
||||
const persistenceRegistry = new HotRunnerRegistry('/tmp/test');
|
||||
|
||||
mockFs.existsSync.mockReturnValue(true);
|
||||
mockFs.readFileSync.mockReturnValue('"just a string"');
|
||||
|
||||
const count = persistenceRegistry.loadFromDisk();
|
||||
expect(count).toBe(0);
|
||||
});
|
||||
|
||||
it('should handle persistence file with null runners', () => {
|
||||
const persistenceRegistry = new HotRunnerRegistry('/tmp/test');
|
||||
|
||||
mockFs.existsSync.mockReturnValue(true);
|
||||
mockFs.readFileSync.mockReturnValue('{"runners": null, "configs": null}');
|
||||
|
||||
const count = persistenceRegistry.loadFromDisk();
|
||||
expect(count).toBe(0);
|
||||
});
|
||||
|
||||
it('should validate and repair invalid runners', () => {
|
||||
const persistenceRegistry = new HotRunnerRegistry('/tmp/test');
|
||||
mockFs.existsSync.mockReturnValue(true);
|
||||
mockFs.writeFileSync.mockImplementation(() => {});
|
||||
mockFs.mkdirSync.mockImplementation(() => '' as any);
|
||||
|
||||
// Register a valid runner first
|
||||
const id = persistenceRegistry.registerRunner(createMockConfig());
|
||||
persistenceRegistry.updateRunner(id, { state: 'idle' });
|
||||
|
||||
// Manually corrupt the runner's state by setting an invalid state
|
||||
// (we access via the public API -- updateRunner with a cast)
|
||||
persistenceRegistry.updateRunner(id, { state: 'invalid-state' as any });
|
||||
|
||||
const repaired = persistenceRegistry.validateAndRepair();
|
||||
expect(repaired).toBe(1);
|
||||
|
||||
const runner = persistenceRegistry.getRunner(id);
|
||||
expect(runner!.state).toBe('unhealthy');
|
||||
});
|
||||
|
||||
it('should not discard configs for valid runners when loading from disk', () => {
|
||||
const persistenceRegistry = new HotRunnerRegistry('/tmp/test');
|
||||
const storedData = {
|
||||
runners: {
|
||||
'hr-valid': {
|
||||
id: 'hr-valid',
|
||||
state: 'idle',
|
||||
unityVersion: '2022.3.0f1',
|
||||
platform: 'StandaloneWindows64',
|
||||
uptime: 100,
|
||||
jobsCompleted: 3,
|
||||
lastHealthCheck: new Date().toISOString(),
|
||||
memoryUsageMB: 512,
|
||||
},
|
||||
},
|
||||
configs: {
|
||||
'hr-valid': createMockConfig(),
|
||||
},
|
||||
};
|
||||
|
||||
mockFs.existsSync.mockReturnValue(true);
|
||||
mockFs.readFileSync.mockReturnValue(JSON.stringify(storedData));
|
||||
|
||||
persistenceRegistry.loadFromDisk();
|
||||
expect(persistenceRegistry.getConfig('hr-valid')).toBeDefined();
|
||||
});
|
||||
});
|
||||
|
||||
// --- Health Monitor Tests ---
|
||||
|
||||
describe('HotRunnerHealthMonitor', () => {
|
||||
let monitor: HotRunnerHealthMonitor;
|
||||
let registry: HotRunnerRegistry;
|
||||
let transports: Map<string, HotRunnerTransport>;
|
||||
|
||||
beforeEach(() => {
|
||||
jest.clearAllMocks();
|
||||
jest.useFakeTimers();
|
||||
monitor = new HotRunnerHealthMonitor();
|
||||
registry = new HotRunnerRegistry();
|
||||
transports = new Map();
|
||||
});
|
||||
|
||||
afterEach(() => {
|
||||
monitor.stopMonitoring();
|
||||
jest.useRealTimers();
|
||||
});
|
||||
|
||||
it('should start and stop monitoring', () => {
|
||||
monitor.startMonitoring(registry, 30, transports);
|
||||
expect(monitor.isMonitoring).toBe(true);
|
||||
|
||||
monitor.stopMonitoring();
|
||||
expect(monitor.isMonitoring).toBe(false);
|
||||
});
|
||||
|
||||
it('should report healthy when transport health check passes', async () => {
|
||||
const id = registry.registerRunner(createMockConfig());
|
||||
registry.updateRunner(id, { state: 'idle' });
|
||||
|
||||
const transport = createMockTransport();
|
||||
transports.set(id, transport);
|
||||
monitor.startMonitoring(registry, 30, transports);
|
||||
|
||||
const healthy = await monitor.checkHealth(id);
|
||||
expect(healthy).toBe(true);
|
||||
expect(transport.healthCheck).toHaveBeenCalled();
|
||||
});
|
||||
|
||||
it('should mark runner as unhealthy when health check fails', async () => {
|
||||
const id = registry.registerRunner(createMockConfig());
|
||||
registry.updateRunner(id, { state: 'idle' });
|
||||
|
||||
const transport = createMockTransport({
|
||||
healthCheck: jest.fn().mockResolvedValue(false),
|
||||
});
|
||||
transports.set(id, transport);
|
||||
monitor.startMonitoring(registry, 30, transports);
|
||||
|
||||
const healthy = await monitor.checkHealth(id);
|
||||
expect(healthy).toBe(false);
|
||||
|
||||
const runner = registry.getRunner(id);
|
||||
expect(runner!.state).toBe('unhealthy');
|
||||
});
|
||||
|
||||
it('should mark runner as unhealthy when health check throws', async () => {
|
||||
const id = registry.registerRunner(createMockConfig());
|
||||
registry.updateRunner(id, { state: 'idle' });
|
||||
|
||||
const transport = createMockTransport({
|
||||
healthCheck: jest.fn().mockRejectedValue(new Error('Connection refused')),
|
||||
});
|
||||
transports.set(id, transport);
|
||||
monitor.startMonitoring(registry, 30, transports);
|
||||
|
||||
const healthy = await monitor.checkHealth(id);
|
||||
expect(healthy).toBe(false);
|
||||
});
|
||||
|
||||
it('should recycle unhealthy runner and remove from registry', async () => {
|
||||
const id = registry.registerRunner(createMockConfig());
|
||||
const transport = createMockTransport();
|
||||
transports.set(id, transport);
|
||||
monitor.startMonitoring(registry, 30, transports);
|
||||
|
||||
await monitor.recycleUnhealthyRunner(id);
|
||||
|
||||
expect(registry.getRunner(id)).toBeUndefined();
|
||||
expect(transport.disconnect).toHaveBeenCalled();
|
||||
expect(transports.has(id)).toBe(false);
|
||||
});
|
||||
|
||||
it('should recycle idle runner when max idle time exceeded', async () => {
|
||||
const id = registry.registerRunner(createMockConfig({ maxIdleTime: 60 }));
|
||||
|
||||
// Set lastHealthCheck to 120 seconds ago
|
||||
const oldDate = new Date(Date.now() - 120 * 1000).toISOString();
|
||||
registry.updateRunner(id, { state: 'idle', lastHealthCheck: oldDate });
|
||||
|
||||
const transport = createMockTransport();
|
||||
transports.set(id, transport);
|
||||
monitor.startMonitoring(registry, 30, transports);
|
||||
|
||||
await monitor.recycleIdleRunner(id, 60);
|
||||
|
||||
expect(registry.getRunner(id)).toBeUndefined();
|
||||
});
|
||||
|
||||
it('should not recycle idle runner when within max idle time', async () => {
|
||||
const id = registry.registerRunner(createMockConfig({ maxIdleTime: 3600 }));
|
||||
registry.updateRunner(id, {
|
||||
state: 'idle',
|
||||
lastHealthCheck: new Date().toISOString(),
|
||||
});
|
||||
|
||||
const transport = createMockTransport();
|
||||
transports.set(id, transport);
|
||||
monitor.startMonitoring(registry, 30, transports);
|
||||
|
||||
await monitor.recycleIdleRunner(id, 3600);
|
||||
|
||||
// Runner should still exist
|
||||
expect(registry.getRunner(id)).toBeDefined();
|
||||
});
|
||||
|
||||
it('should return false when no transport exists for runner', async () => {
|
||||
const id = registry.registerRunner(createMockConfig());
|
||||
|
||||
// Do not set any transport for this runner
|
||||
monitor.startMonitoring(registry, 30, transports);
|
||||
|
||||
const healthy = await monitor.checkHealth(id);
|
||||
expect(healthy).toBe(false);
|
||||
});
|
||||
});
|
||||
|
||||
// --- Dispatcher Tests ---
|
||||
|
||||
describe('HotRunnerDispatcher', () => {
|
||||
let registry: HotRunnerRegistry;
|
||||
let transports: Map<string, HotRunnerTransport>;
|
||||
let dispatcher: HotRunnerDispatcher;
|
||||
|
||||
beforeEach(() => {
|
||||
jest.clearAllMocks();
|
||||
registry = new HotRunnerRegistry();
|
||||
transports = new Map();
|
||||
dispatcher = new HotRunnerDispatcher(transports);
|
||||
});
|
||||
|
||||
it('should dispatch a job to an available runner', async () => {
|
||||
const id = registry.registerRunner(createMockConfig());
|
||||
registry.updateRunner(id, { state: 'idle' });
|
||||
|
||||
const transport = createMockTransport();
|
||||
transports.set(id, transport);
|
||||
|
||||
const request = createMockJobRequest();
|
||||
const result = await dispatcher.dispatchJob(request, registry, '2022.3.0f1');
|
||||
|
||||
expect(result.success).toBe(true);
|
||||
expect(result.exitCode).toBe(0);
|
||||
expect(transport.sendJob).toHaveBeenCalledWith(request);
|
||||
});
|
||||
|
||||
it('should mark runner as busy during job execution', async () => {
|
||||
const id = registry.registerRunner(createMockConfig());
|
||||
registry.updateRunner(id, { state: 'idle' });
|
||||
|
||||
const statesDuringJob: string[] = [];
|
||||
const transport = createMockTransport({
|
||||
sendJob: jest.fn().mockImplementation(async () => {
|
||||
const runner = registry.getRunner(id);
|
||||
if (runner) statesDuringJob.push(runner.state);
|
||||
|
||||
return {
|
||||
jobId: 'job-001',
|
||||
success: true,
|
||||
exitCode: 0,
|
||||
duration: 1000,
|
||||
output: 'ok',
|
||||
};
|
||||
}),
|
||||
});
|
||||
transports.set(id, transport);
|
||||
|
||||
await dispatcher.dispatchJob(createMockJobRequest(), registry, '2022.3.0f1');
|
||||
|
||||
expect(statesDuringJob).toContain('busy');
|
||||
|
||||
// After completion, should be idle again
|
||||
const runner = registry.getRunner(id);
|
||||
expect(runner!.state).toBe('idle');
|
||||
});
|
||||
|
||||
it('should increment jobsCompleted after successful dispatch', async () => {
|
||||
const id = registry.registerRunner(createMockConfig());
|
||||
registry.updateRunner(id, { state: 'idle', jobsCompleted: 5 });
|
||||
|
||||
const transport = createMockTransport();
|
||||
transports.set(id, transport);
|
||||
|
||||
await dispatcher.dispatchJob(createMockJobRequest(), registry, '2022.3.0f1');
|
||||
|
||||
const runner = registry.getRunner(id);
|
||||
expect(runner!.jobsCompleted).toBe(6);
|
||||
});
|
||||
|
||||
it('should throw when no runner is available and wait times out', async () => {
|
||||
// No runners registered at all
|
||||
const request = createMockJobRequest({ timeout: 100 });
|
||||
|
||||
await expect(dispatcher.dispatchJob(request, registry, '2022.3.0f1')).rejects.toThrow(/Timed out waiting/);
|
||||
});
|
||||
|
||||
it('should throw when runner has no transport', async () => {
|
||||
const id = registry.registerRunner(createMockConfig());
|
||||
registry.updateRunner(id, { state: 'idle' });
|
||||
|
||||
// No transport set for this runner
|
||||
|
||||
const request = createMockJobRequest();
|
||||
|
||||
await expect(dispatcher.dispatchJob(request, registry, '2022.3.0f1')).rejects.toThrow(/No transport available/);
|
||||
});
|
||||
|
||||
it('should handle job failure and return runner to idle', async () => {
|
||||
const id = registry.registerRunner(createMockConfig());
|
||||
registry.updateRunner(id, { state: 'idle' });
|
||||
|
||||
const transport = createMockTransport({
|
||||
sendJob: jest.fn().mockRejectedValue(new Error('Unity crashed')),
|
||||
});
|
||||
transports.set(id, transport);
|
||||
|
||||
await expect(dispatcher.dispatchJob(createMockJobRequest(), registry, '2022.3.0f1')).rejects.toThrow(
|
||||
'Unity crashed',
|
||||
);
|
||||
|
||||
// Runner should be back to idle despite failure
|
||||
const runner = registry.getRunner(id);
|
||||
expect(runner!.state).toBe('idle');
|
||||
});
|
||||
|
||||
it('should handle job timeout', async () => {
|
||||
const id = registry.registerRunner(createMockConfig());
|
||||
registry.updateRunner(id, { state: 'idle' });
|
||||
|
||||
const transport = createMockTransport({
|
||||
sendJob: jest.fn().mockImplementation(
|
||||
() => new Promise((resolve) => setTimeout(resolve, 60000)), // never resolves within timeout
|
||||
),
|
||||
});
|
||||
transports.set(id, transport);
|
||||
|
||||
const request = createMockJobRequest({ timeout: 50 });
|
||||
|
||||
await expect(dispatcher.dispatchJob(request, registry, '2022.3.0f1')).rejects.toThrow(/timed out/);
|
||||
});
|
||||
|
||||
it('should disconnect transport on job timeout', async () => {
|
||||
const id = registry.registerRunner(createMockConfig());
|
||||
registry.updateRunner(id, { state: 'idle' });
|
||||
|
||||
const transport = createMockTransport({
|
||||
sendJob: jest.fn().mockImplementation(
|
||||
() => new Promise((resolve) => setTimeout(resolve, 60000)), // never resolves within timeout
|
||||
),
|
||||
});
|
||||
transports.set(id, transport);
|
||||
|
||||
const request = createMockJobRequest({ timeout: 50 });
|
||||
|
||||
await expect(dispatcher.dispatchJob(request, registry, '2022.3.0f1')).rejects.toThrow(/timed out/);
|
||||
|
||||
// Transport should have been disconnected to clean up orphaned connection
|
||||
expect(transport.disconnect).toHaveBeenCalled();
|
||||
});
|
||||
|
||||
it('should call output callback with job output', async () => {
|
||||
const id = registry.registerRunner(createMockConfig());
|
||||
registry.updateRunner(id, { state: 'idle' });
|
||||
|
||||
const transport = createMockTransport();
|
||||
transports.set(id, transport);
|
||||
|
||||
const outputCallback = jest.fn();
|
||||
await dispatcher.dispatchJob(createMockJobRequest(), registry, '2022.3.0f1', outputCallback);
|
||||
|
||||
expect(outputCallback).toHaveBeenCalledWith('Build succeeded');
|
||||
});
|
||||
|
||||
it('should wait for runner to become available', async () => {
|
||||
const id = registry.registerRunner(createMockConfig());
|
||||
|
||||
// Runner starts in 'starting' state, not idle
|
||||
|
||||
const transport = createMockTransport();
|
||||
transports.set(id, transport);
|
||||
|
||||
// Simulate runner becoming idle after a short delay
|
||||
setTimeout(() => {
|
||||
registry.updateRunner(id, { state: 'idle' });
|
||||
}, 50);
|
||||
|
||||
const request = createMockJobRequest({ timeout: 5000 });
|
||||
const result = await dispatcher.dispatchJob(request, registry, '2022.3.0f1');
|
||||
|
||||
expect(result.success).toBe(true);
|
||||
});
|
||||
});
|
||||
|
||||
// --- Service Integration Tests ---
|
||||
|
||||
describe('HotRunnerService', () => {
|
||||
let service: HotRunnerService;
|
||||
|
||||
beforeEach(() => {
|
||||
jest.clearAllMocks();
|
||||
mockFs.existsSync.mockReturnValue(false);
|
||||
service = new HotRunnerService();
|
||||
});
|
||||
|
||||
afterEach(async () => {
|
||||
await service.shutdown();
|
||||
});
|
||||
|
||||
it('should initialize and shut down cleanly', async () => {
|
||||
const config = createMockConfig();
|
||||
await service.initialize(config);
|
||||
|
||||
const status = service.getStatus();
|
||||
expect(status).toEqual([]);
|
||||
|
||||
await service.shutdown();
|
||||
});
|
||||
|
||||
it('should register a runner with transport', async () => {
|
||||
await service.initialize(createMockConfig());
|
||||
|
||||
const transport = createMockTransport();
|
||||
const id = service.registerRunner(createMockConfig(), transport);
|
||||
|
||||
expect(id).toMatch(/^hr-/);
|
||||
expect(service.getStatus()).toHaveLength(1);
|
||||
});
|
||||
|
||||
it('should disconnect all transports on shutdown', async () => {
|
||||
await service.initialize(createMockConfig());
|
||||
|
||||
const transport1 = createMockTransport();
|
||||
const transport2 = createMockTransport();
|
||||
service.registerRunner(createMockConfig(), transport1);
|
||||
service.registerRunner(createMockConfig(), transport2);
|
||||
|
||||
await service.shutdown();
|
||||
|
||||
expect(transport1.disconnect).toHaveBeenCalled();
|
||||
expect(transport2.disconnect).toHaveBeenCalled();
|
||||
});
|
||||
|
||||
it('should expose the underlying registry', async () => {
|
||||
await service.initialize(createMockConfig());
|
||||
const registry = service.getRegistry();
|
||||
|
||||
expect(registry).toBeInstanceOf(HotRunnerRegistry);
|
||||
});
|
||||
});
|
||||
@@ -1,11 +0,0 @@
|
||||
export { HotRunnerService } from './hot-runner-service';
|
||||
export { HotRunnerRegistry } from './hot-runner-registry';
|
||||
export { HotRunnerHealthMonitor } from './hot-runner-health-monitor';
|
||||
export { HotRunnerDispatcher } from './hot-runner-dispatcher';
|
||||
export type {
|
||||
HotRunnerConfig,
|
||||
HotRunnerStatus,
|
||||
HotRunnerJobRequest,
|
||||
HotRunnerJobResult,
|
||||
HotRunnerTransport,
|
||||
} from './hot-runner-types';
|
||||
Reference in New Issue
Block a user