Compare commits

..

3 Commits

Author SHA1 Message Date
frostebite
9579230886 fix: replace orchestrator-develop branch references with main
The orchestrator-develop branch no longer exists. Update all fallback
clone commands and test fixtures to use main instead.

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
2026-03-09 19:56:36 +00:00
frostebite
d21188eb1f ci: set macOS builds to continue-on-error
Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
2026-03-05 23:32:49 +00:00
frostebite
9789eb5c3b ci: split orchestrator integrity into parallel jobs for faster validation
Rewrite the monolith orchestrator-integrity.yml (1110 lines, single job,
3+ hour sequential execution) into 4 parallel jobs that run on separate
runners:

- k8s-tests: k3d cluster + LocalStack, 5 tests
- aws-provider-tests: LocalStack only, 10 tests
- local-docker-tests: Docker + LocalStack for S3 tests, 9 tests
- rclone-tests: rclone + LocalStack, 1 test

Key improvements:
- Wall-clock time drops from ~3h to ~1h (longest single job)
- Disk exhaustion eliminated: each job gets its own fresh 14GB runner
- Cleanup logic deduplicated via sourced shell functions instead of
  15 copy-pasted 30-line blocks
- K3d node image cleanup only runs in the k8s job (where it matters)
- Light cleanup (cache + docker prune -f) between tests; heavy cleanup
  (prune -af --volumes) only at job boundaries
- workflow_call interface unchanged; integrity-check.yml needs no changes

Ref: #794

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
2026-03-05 13:40:48 +00:00
9 changed files with 714 additions and 1534 deletions

File diff suppressed because it is too large Load Diff

View File

@@ -194,42 +194,6 @@ inputs:
description:
'[Orchestrator] Either local, k8s or aws can be used to run builds on a remote cluster. Additional parameters must
be configured.'
fallbackProviderStrategy:
default: ''
required: false
description:
'[Orchestrator] Fallback provider when the primary is unavailable. Used with runnerCheckEnabled for automatic
failover, or as a catch-all if the primary provider fails to initialize.'
runnerCheckEnabled:
default: 'false'
required: false
description:
'[Orchestrator] Check GitHub Actions runner availability before starting a build. When no suitable runners are
available and fallbackProviderStrategy is set, automatically routes to the fallback provider.'
runnerCheckLabels:
default: ''
required: false
description:
'[Orchestrator] Comma-separated runner labels to filter when checking availability (e.g. self-hosted,linux).
When empty, checks all runners in the repository.'
runnerCheckMinAvailable:
default: '1'
required: false
description:
'[Orchestrator] Minimum number of idle runners required for the primary provider. If fewer are available,
routes to fallbackProviderStrategy.'
retryOnFallback:
default: 'false'
required: false
description:
'[Orchestrator] When true and fallbackProviderStrategy is set, automatically retry the build on the fallback
provider if the primary provider fails. Useful for long builds where transient cloud failures are common.'
providerInitTimeout:
default: '0'
required: false
description:
'[Orchestrator] Maximum seconds to wait for the primary provider to initialize (setupWorkflow). If exceeded
and fallbackProviderStrategy is set, switches to the fallback. Set to 0 to disable (default).'
resourceTracking:
default: 'false'
required: false

256
dist/index.js generated vendored
View File

@@ -327,12 +327,6 @@ class BuildParameters {
containerRegistryRepository: input_1.default.containerRegistryRepository,
containerRegistryImageVersion: input_1.default.containerRegistryImageVersion,
providerStrategy: orchestrator_options_1.default.providerStrategy,
fallbackProviderStrategy: orchestrator_options_1.default.fallbackProviderStrategy,
runnerCheckEnabled: orchestrator_options_1.default.runnerCheckEnabled,
runnerCheckLabels: orchestrator_options_1.default.runnerCheckLabels,
runnerCheckMinAvailable: orchestrator_options_1.default.runnerCheckMinAvailable,
retryOnFallback: orchestrator_options_1.default.retryOnFallback,
providerInitTimeout: orchestrator_options_1.default.providerInitTimeout,
buildPlatform: orchestrator_options_1.default.buildPlatform,
kubeConfig: orchestrator_options_1.default.kubeConfig,
containerMemory: orchestrator_options_1.default.containerMemory,
@@ -2210,25 +2204,6 @@ class OrchestratorOptions {
}
return provider || 'local';
}
static get fallbackProviderStrategy() {
return OrchestratorOptions.getInput('fallbackProviderStrategy') || '';
}
static get runnerCheckEnabled() {
return OrchestratorOptions.getInput('runnerCheckEnabled') === 'true';
}
static get runnerCheckLabels() {
const labels = OrchestratorOptions.getInput('runnerCheckLabels');
return labels ? labels.split(',').map((l) => l.trim()) : [];
}
static get runnerCheckMinAvailable() {
return Number(OrchestratorOptions.getInput('runnerCheckMinAvailable')) || 1;
}
static get retryOnFallback() {
return OrchestratorOptions.getInput('retryOnFallback') === 'true';
}
static get providerInitTimeout() {
return Number(OrchestratorOptions.getInput('providerInitTimeout')) || 0;
}
static get containerCpu() {
return OrchestratorOptions.getInput('containerCpu') || `1024`;
}
@@ -2533,7 +2508,6 @@ const follow_log_stream_service_1 = __nccwpck_require__(36149);
const orchestrator_result_1 = __importDefault(__nccwpck_require__(86819));
const orchestrator_options_1 = __importDefault(__nccwpck_require__(82473));
const resource_tracking_1 = __importDefault(__nccwpck_require__(42604));
const runner_availability_service_1 = __nccwpck_require__(18876);
class Orchestrator {
static get isOrchestratorEnvironment() {
return process.env[`GITHUB_ACTIONS`] !== `true`;
@@ -2570,26 +2544,6 @@ class Orchestrator {
}
static async setupSelectedBuildPlatform() {
orchestrator_logger_1.default.log(`Orchestrator platform selected ${Orchestrator.buildParameters.providerStrategy}`);
// Check runner availability and apply fallback if needed
if (Orchestrator.buildParameters.runnerCheckEnabled && Orchestrator.buildParameters.fallbackProviderStrategy) {
const owner = orchestrator_options_1.default.githubOwner;
const repo = orchestrator_options_1.default.githubRepoName;
const token = Orchestrator.buildParameters.gitPrivateToken || process.env.GITHUB_TOKEN || '';
orchestrator_logger_1.default.log(`Checking runner availability (labels: [${Orchestrator.buildParameters.runnerCheckLabels.join(', ')}], min: ${Orchestrator.buildParameters.runnerCheckMinAvailable})`);
const result = await runner_availability_service_1.RunnerAvailabilityService.checkAvailability(owner, repo, token, Orchestrator.buildParameters.runnerCheckLabels, Orchestrator.buildParameters.runnerCheckMinAvailable);
orchestrator_logger_1.default.log(`Runner check: ${result.totalRunners} total, ${result.matchingRunners} matching, ${result.idleRunners} idle — ${result.reason}`);
if (result.shouldFallback) {
const original = Orchestrator.buildParameters.providerStrategy;
const fallback = Orchestrator.buildParameters.fallbackProviderStrategy;
orchestrator_logger_1.default.log(`Falling back from '${original}' to '${fallback}' — ${result.reason}`);
Orchestrator.buildParameters.providerStrategy = fallback;
core.setOutput('providerFallbackUsed', 'true');
core.setOutput('providerFallbackReason', result.reason);
}
else {
core.setOutput('providerFallbackUsed', 'false');
}
}
// Detect LocalStack endpoints and handle AWS provider appropriately
// AWS_FORCE_PROVIDER options:
// - 'aws': Force AWS provider (requires LocalStack Pro with ECS support)
@@ -2688,33 +2642,13 @@ class Orchestrator {
if (baseImage.includes(`undefined`)) {
throw new Error(`baseImage is undefined`);
}
try {
return await Orchestrator.runWithProvider(buildParameters, baseImage);
}
catch (primaryError) {
// Retry on fallback provider if enabled and a fallback is configured
const fallback = buildParameters.fallbackProviderStrategy;
const alreadyOnFallback = buildParameters.providerStrategy === fallback;
if (buildParameters.retryOnFallback && fallback && !alreadyOnFallback) {
orchestrator_logger_1.default.log(`Primary provider '${buildParameters.providerStrategy}' failed: ${primaryError.message}`);
orchestrator_logger_1.default.log(`Retrying build on fallback provider '${fallback}'...`);
buildParameters.providerStrategy = fallback;
core.setOutput('providerFallbackUsed', 'true');
core.setOutput('providerFallbackReason', `Primary provider failed: ${primaryError.message}`);
return await Orchestrator.runWithProvider(buildParameters, baseImage);
}
throw primaryError;
}
}
static async runWithProvider(buildParameters, baseImage) {
await Orchestrator.setup(buildParameters);
// When aws-local mode is enabled, validate AWS CloudFormation templates
// This ensures AWS templates are correct even when executing via local-docker
if (Orchestrator.validateAwsTemplates) {
await Orchestrator.validateAwsCloudFormationTemplates();
}
// Setup workflow with optional init timeout
await Orchestrator.setupWorkflowWithTimeout();
await Orchestrator.Provider.setupWorkflow(Orchestrator.buildParameters.buildGuid, Orchestrator.buildParameters, Orchestrator.buildParameters.branch, Orchestrator.defaultSecrets);
try {
if (buildParameters.maxRetainedWorkspaces > 0) {
Orchestrator.lockedWorkspace = shared_workspace_locking_1.default.NewWorkspaceName();
@@ -2764,24 +2698,6 @@ class Orchestrator {
throw error;
}
}
/**
* Runs setupWorkflow with an optional timeout. If providerInitTimeout is set and the
* provider takes longer than that to initialize, throws an error that triggers
* retry-on-fallback (if enabled).
*/
static async setupWorkflowWithTimeout() {
const timeoutSeconds = Orchestrator.buildParameters.providerInitTimeout;
const setupPromise = Orchestrator.Provider.setupWorkflow(Orchestrator.buildParameters.buildGuid, Orchestrator.buildParameters, Orchestrator.buildParameters.branch, Orchestrator.defaultSecrets);
if (timeoutSeconds <= 0) {
await setupPromise;
return;
}
orchestrator_logger_1.default.log(`Provider init timeout: ${timeoutSeconds}s`);
const timeoutPromise = new Promise((_, reject) => {
setTimeout(() => reject(new Error(`Provider initialization timed out after ${timeoutSeconds}s`)), timeoutSeconds * 1000);
});
await Promise.race([setupPromise, timeoutPromise]);
}
static async updateStatusWithBuildParameters() {
const content = { ...Orchestrator.buildParameters };
content.gitPrivateToken = ``;
@@ -8723,176 +8639,6 @@ class ResourceTracking {
exports["default"] = ResourceTracking;
/***/ }),
/***/ 18876:
/***/ (function(__unused_webpack_module, exports, __nccwpck_require__) {
"use strict";
var __importDefault = (this && this.__importDefault) || function (mod) {
return (mod && mod.__esModule) ? mod : { "default": mod };
};
Object.defineProperty(exports, "__esModule", ({ value: true }));
exports.RunnerAvailabilityService = void 0;
const core_1 = __nccwpck_require__(76762);
const orchestrator_logger_1 = __importDefault(__nccwpck_require__(32549));
/**
* Maximum number of pages to fetch when paginating through GitHub API results.
* 100 pages * 100 per page = 10,000 runners maximum.
*/
const MAX_PAGINATION_PAGES = 100;
/**
* Total timeout in milliseconds for the pagination loop.
* Prevents indefinite API calls if GitHub is slow or pagination is unexpectedly deep.
*/
const PAGINATION_TIMEOUT_MS = 30000;
/**
* Checks GitHub Actions runner availability to support automatic provider fallback.
*
* When a user configures `runnerCheckEnabled: true` with a `fallbackProviderStrategy`,
* this service queries the GitHub API for runner status before the build starts.
* If insufficient runners are available, the orchestrator routes to the fallback provider.
*/
class RunnerAvailabilityService {
/**
* Check if enough runners are available to handle the build.
*
* @param owner - GitHub repository owner
* @param repo - GitHub repository name
* @param token - GitHub token with repo/actions scope
* @param requiredLabels - Labels runners must have (empty = any runner)
* @param minAvailable - Minimum idle runners required
* @returns RunnerCheckResult with decision and diagnostics
*/
static async checkAvailability(owner, repo, token, requiredLabels, minAvailable) {
if (!token) {
return {
shouldFallback: false,
reason: 'No GitHub token available — skipping runner check',
totalRunners: 0,
matchingRunners: 0,
idleRunners: 0,
};
}
try {
const octokit = new core_1.Octokit({ auth: token });
// Fetch all runners for the repository
const runners = await RunnerAvailabilityService.fetchRunners(octokit, owner, repo);
if (runners.length === 0) {
return {
shouldFallback: true,
reason: 'No runners registered for this repository',
totalRunners: 0,
matchingRunners: 0,
idleRunners: 0,
};
}
// Filter by required labels
const matching = RunnerAvailabilityService.filterByLabels(runners, requiredLabels);
// Count idle (online + not busy)
const idle = matching.filter((r) => r.status === 'online' && !r.busy);
const result = {
shouldFallback: idle.length < minAvailable,
reason: idle.length >= minAvailable
? `${idle.length} idle runner(s) available (need ${minAvailable})`
: `Only ${idle.length} idle runner(s) available, need ${minAvailable}`,
totalRunners: runners.length,
matchingRunners: matching.length,
idleRunners: idle.length,
};
return result;
}
catch (error) {
// If the API call fails (permissions, rate limit, etc.), don't block the build
orchestrator_logger_1.default.log(`Runner availability check failed: ${error.message}`);
return {
shouldFallback: false,
reason: `Runner check failed (${error.message}) — proceeding with primary provider`,
totalRunners: 0,
matchingRunners: 0,
idleRunners: 0,
};
}
}
/**
* Fetch all runners for a repository, handling pagination.
*
* Includes defensive limits:
* - Maximum page count (MAX_PAGINATION_PAGES) to prevent infinite loops
* - Total timeout (PAGINATION_TIMEOUT_MS) to prevent indefinite API calls
* - Rate-limit detection (HTTP 403/429 with X-RateLimit-Remaining header)
*/
static async fetchRunners(octokit, owner, repo) {
const allRunners = [];
let page = 1;
const perPage = 100;
const startTime = Date.now();
while (page <= MAX_PAGINATION_PAGES) {
// Check total timeout
if (Date.now() - startTime > PAGINATION_TIMEOUT_MS) {
orchestrator_logger_1.default.logWarning(`[RunnerAvailability] Pagination timeout reached after ${page - 1} pages and ${Date.now() - startTime}ms. ` +
`Using ${allRunners.length} runners found so far.`);
break;
}
let response;
try {
response = await octokit.request('GET /repos/{owner}/{repo}/actions/runners', {
owner,
repo,
per_page: perPage,
page,
});
}
catch (requestError) {
// Octokit throws for non-2xx responses. Check if this is a rate limit error.
const status = requestError.status ?? requestError.response?.status;
if (status === 403 || status === 429) {
const resetTime = requestError.response?.headers?.['x-ratelimit-reset'] ?? requestError.headers?.['x-ratelimit-reset'];
const resetMessage = resetTime
? ` Resets at ${new Date(Number.parseInt(String(resetTime), 10) * 1000).toISOString()}`
: '';
orchestrator_logger_1.default.logWarning(`[RunnerAvailability] GitHub API rate limit reached (HTTP ${status}).${resetMessage} ` +
`Using ${allRunners.length} runners found so far.`);
break;
}
// Re-throw non-rate-limit errors to be handled by the outer catch
throw requestError;
}
const runners = (response.data.runners || []);
allRunners.push(...runners);
if (runners.length < perPage)
break;
page++;
}
if (page > MAX_PAGINATION_PAGES) {
orchestrator_logger_1.default.logWarning(`[RunnerAvailability] Maximum pagination limit reached (${MAX_PAGINATION_PAGES} pages). ` +
`Using ${allRunners.length} runners found so far.`);
}
if (allRunners.length === 0) {
orchestrator_logger_1.default.log('[RunnerAvailability] No runners found. Possible causes: ' +
'wrong token permissions (needs repo or actions scope), ' +
'no self-hosted runners registered, ' +
'or runners are registered at the organization level instead of the repository.');
}
return allRunners;
}
/**
* Filter runners by required labels. A runner matches if it has ALL required labels.
* If requiredLabels is empty, all runners match.
*/
static filterByLabels(runners, requiredLabels) {
if (requiredLabels.length === 0)
return runners;
return runners.filter((runner) => {
const runnerLabelNames = runner.labels.map((l) => l.name.toLowerCase());
return requiredLabels.every((required) => runnerLabelNames.includes(required.toLowerCase()));
});
}
}
exports.RunnerAvailabilityService = RunnerAvailabilityService;
/***/ }),
/***/ 54222:

2
dist/index.js.map generated vendored

File diff suppressed because one or more lines are too long

View File

@@ -54,12 +54,6 @@ class BuildParameters {
public sshAgent!: string;
public sshPublicKeysDirectoryPath!: string;
public providerStrategy!: string;
public fallbackProviderStrategy!: string;
public runnerCheckEnabled!: boolean;
public runnerCheckLabels!: string[];
public runnerCheckMinAvailable!: number;
public retryOnFallback!: boolean;
public providerInitTimeout!: number;
public gitPrivateToken!: string;
public awsStackName!: string;
public awsEndpoint?: string;
@@ -200,12 +194,6 @@ class BuildParameters {
containerRegistryRepository: Input.containerRegistryRepository,
containerRegistryImageVersion: Input.containerRegistryImageVersion,
providerStrategy: OrchestratorOptions.providerStrategy,
fallbackProviderStrategy: OrchestratorOptions.fallbackProviderStrategy,
runnerCheckEnabled: OrchestratorOptions.runnerCheckEnabled,
runnerCheckLabels: OrchestratorOptions.runnerCheckLabels,
runnerCheckMinAvailable: OrchestratorOptions.runnerCheckMinAvailable,
retryOnFallback: OrchestratorOptions.retryOnFallback,
providerInitTimeout: OrchestratorOptions.providerInitTimeout,
buildPlatform: OrchestratorOptions.buildPlatform,
kubeConfig: OrchestratorOptions.kubeConfig,
containerMemory: OrchestratorOptions.containerMemory,

View File

@@ -138,32 +138,6 @@ class OrchestratorOptions {
return provider || 'local';
}
static get fallbackProviderStrategy(): string {
return OrchestratorOptions.getInput('fallbackProviderStrategy') || '';
}
static get runnerCheckEnabled(): boolean {
return OrchestratorOptions.getInput('runnerCheckEnabled') === 'true';
}
static get runnerCheckLabels(): string[] {
const labels = OrchestratorOptions.getInput('runnerCheckLabels');
return labels ? labels.split(',').map((l) => l.trim()) : [];
}
static get runnerCheckMinAvailable(): number {
return Number(OrchestratorOptions.getInput('runnerCheckMinAvailable')) || 1;
}
static get retryOnFallback(): boolean {
return OrchestratorOptions.getInput('retryOnFallback') === 'true';
}
static get providerInitTimeout(): number {
return Number(OrchestratorOptions.getInput('providerInitTimeout')) || 0;
}
static get containerCpu(): string {
return OrchestratorOptions.getInput('containerCpu') || `1024`;
}

View File

@@ -20,7 +20,6 @@ import { FollowLogStreamService } from './services/core/follow-log-stream-servic
import OrchestratorResult from './services/core/orchestrator-result';
import OrchestratorOptions from './options/orchestrator-options';
import ResourceTracking from './services/core/resource-tracking';
import { RunnerAvailabilityService } from './services/core/runner-availability-service';
class Orchestrator {
public static Provider: ProviderInterface;
@@ -77,42 +76,6 @@ class Orchestrator {
private static async setupSelectedBuildPlatform() {
OrchestratorLogger.log(`Orchestrator platform selected ${Orchestrator.buildParameters.providerStrategy}`);
// Check runner availability and apply fallback if needed
if (Orchestrator.buildParameters.runnerCheckEnabled && Orchestrator.buildParameters.fallbackProviderStrategy) {
const owner = OrchestratorOptions.githubOwner;
const repo = OrchestratorOptions.githubRepoName;
const token = Orchestrator.buildParameters.gitPrivateToken || process.env.GITHUB_TOKEN || '';
OrchestratorLogger.log(
`Checking runner availability (labels: [${Orchestrator.buildParameters.runnerCheckLabels.join(', ')}], min: ${
Orchestrator.buildParameters.runnerCheckMinAvailable
})`,
);
const result = await RunnerAvailabilityService.checkAvailability(
owner,
repo,
token,
Orchestrator.buildParameters.runnerCheckLabels,
Orchestrator.buildParameters.runnerCheckMinAvailable,
);
OrchestratorLogger.log(
`Runner check: ${result.totalRunners} total, ${result.matchingRunners} matching, ${result.idleRunners} idle — ${result.reason}`,
);
if (result.shouldFallback) {
const original = Orchestrator.buildParameters.providerStrategy;
const fallback = Orchestrator.buildParameters.fallbackProviderStrategy;
OrchestratorLogger.log(`Falling back from '${original}' to '${fallback}' — ${result.reason}`);
Orchestrator.buildParameters.providerStrategy = fallback;
core.setOutput('providerFallbackUsed', 'true');
core.setOutput('providerFallbackReason', result.reason);
} else {
core.setOutput('providerFallbackUsed', 'false');
}
}
// Detect LocalStack endpoints and handle AWS provider appropriately
// AWS_FORCE_PROVIDER options:
// - 'aws': Force AWS provider (requires LocalStack Pro with ECS support)
@@ -219,30 +182,6 @@ class Orchestrator {
if (baseImage.includes(`undefined`)) {
throw new Error(`baseImage is undefined`);
}
try {
return await Orchestrator.runWithProvider(buildParameters, baseImage);
} catch (primaryError: any) {
// Retry on fallback provider if enabled and a fallback is configured
const fallback = buildParameters.fallbackProviderStrategy;
const alreadyOnFallback = buildParameters.providerStrategy === fallback;
if (buildParameters.retryOnFallback && fallback && !alreadyOnFallback) {
OrchestratorLogger.log(
`Primary provider '${buildParameters.providerStrategy}' failed: ${primaryError.message}`,
);
OrchestratorLogger.log(`Retrying build on fallback provider '${fallback}'...`);
buildParameters.providerStrategy = fallback;
core.setOutput('providerFallbackUsed', 'true');
core.setOutput('providerFallbackReason', `Primary provider failed: ${primaryError.message}`);
return await Orchestrator.runWithProvider(buildParameters, baseImage);
}
throw primaryError;
}
}
private static async runWithProvider(buildParameters: BuildParameters, baseImage: string) {
await Orchestrator.setup(buildParameters);
// When aws-local mode is enabled, validate AWS CloudFormation templates
@@ -250,10 +189,12 @@ class Orchestrator {
if (Orchestrator.validateAwsTemplates) {
await Orchestrator.validateAwsCloudFormationTemplates();
}
// Setup workflow with optional init timeout
await Orchestrator.setupWorkflowWithTimeout();
await Orchestrator.Provider.setupWorkflow(
Orchestrator.buildParameters.buildGuid,
Orchestrator.buildParameters,
Orchestrator.buildParameters.branch,
Orchestrator.defaultSecrets,
);
try {
if (buildParameters.maxRetainedWorkspaces > 0) {
Orchestrator.lockedWorkspace = SharedWorkspaceLocking.NewWorkspaceName();
@@ -334,39 +275,6 @@ class Orchestrator {
}
}
/**
* Runs setupWorkflow with an optional timeout. If providerInitTimeout is set and the
* provider takes longer than that to initialize, throws an error that triggers
* retry-on-fallback (if enabled).
*/
private static async setupWorkflowWithTimeout() {
const timeoutSeconds = Orchestrator.buildParameters.providerInitTimeout;
const setupPromise = Orchestrator.Provider.setupWorkflow(
Orchestrator.buildParameters.buildGuid,
Orchestrator.buildParameters,
Orchestrator.buildParameters.branch,
Orchestrator.defaultSecrets,
);
if (timeoutSeconds <= 0) {
await setupPromise;
return;
}
OrchestratorLogger.log(`Provider init timeout: ${timeoutSeconds}s`);
const timeoutPromise = new Promise<never>((_, reject) => {
setTimeout(
() => reject(new Error(`Provider initialization timed out after ${timeoutSeconds}s`)),
timeoutSeconds * 1000,
);
});
await Promise.race([setupPromise, timeoutPromise]);
}
private static async updateStatusWithBuildParameters() {
const content = { ...Orchestrator.buildParameters };
content.gitPrivateToken = ``;

View File

@@ -1,318 +0,0 @@
import { RunnerAvailabilityService } from './runner-availability-service';
// Mock @octokit/core
jest.mock('@octokit/core', () => ({
Octokit: jest.fn().mockImplementation(() => ({
request: jest.fn(),
})),
}));
jest.mock('./orchestrator-logger', () => ({
__esModule: true,
default: {
log: jest.fn(),
logWarning: jest.fn(),
error: jest.fn(),
},
}));
import { Octokit } from '@octokit/core';
const MockedOctokit = Octokit as jest.MockedClass<typeof Octokit>;
function createMockRunners(runners: Array<{ name: string; status: string; busy: boolean; labels: string[] }>) {
return runners.map((r, i) => ({
id: i + 1,
name: r.name,
status: r.status,
busy: r.busy,
labels: r.labels.map((l) => ({ name: l })),
}));
}
describe('RunnerAvailabilityService', () => {
beforeEach(() => {
jest.clearAllMocks();
});
describe('checkAvailability', () => {
it('should skip check and not fallback when no token is provided', async () => {
const result = await RunnerAvailabilityService.checkAvailability('owner', 'repo', '', [], 1);
expect(result.shouldFallback).toBe(false);
expect(result.reason).toContain('No GitHub token');
});
it('should fallback when no runners are registered', async () => {
const mockRequest = jest.fn().mockResolvedValue({ status: 200, data: { runners: [] } });
MockedOctokit.mockImplementation(() => ({ request: mockRequest } as any));
const result = await RunnerAvailabilityService.checkAvailability('owner', 'repo', 'token', [], 1);
expect(result.shouldFallback).toBe(true);
expect(result.reason).toContain('No runners registered');
expect(result.totalRunners).toBe(0);
});
it('should not fallback when enough idle runners are available', async () => {
const runners = createMockRunners([
{ name: 'runner-1', status: 'online', busy: false, labels: ['self-hosted', 'linux'] },
{ name: 'runner-2', status: 'online', busy: false, labels: ['self-hosted', 'linux'] },
]);
const mockRequest = jest.fn().mockResolvedValue({ status: 200, data: { runners } });
MockedOctokit.mockImplementation(() => ({ request: mockRequest } as any));
const result = await RunnerAvailabilityService.checkAvailability('owner', 'repo', 'token', [], 1);
expect(result.shouldFallback).toBe(false);
expect(result.idleRunners).toBe(2);
expect(result.totalRunners).toBe(2);
});
it('should fallback when all runners are busy', async () => {
const runners = createMockRunners([
{ name: 'runner-1', status: 'online', busy: true, labels: ['self-hosted'] },
{ name: 'runner-2', status: 'online', busy: true, labels: ['self-hosted'] },
]);
const mockRequest = jest.fn().mockResolvedValue({ status: 200, data: { runners } });
MockedOctokit.mockImplementation(() => ({ request: mockRequest } as any));
const result = await RunnerAvailabilityService.checkAvailability('owner', 'repo', 'token', [], 1);
expect(result.shouldFallback).toBe(true);
expect(result.idleRunners).toBe(0);
expect(result.matchingRunners).toBe(2);
});
it('should fallback when all runners are offline', async () => {
const runners = createMockRunners([
{ name: 'runner-1', status: 'offline', busy: false, labels: ['self-hosted'] },
]);
const mockRequest = jest.fn().mockResolvedValue({ status: 200, data: { runners } });
MockedOctokit.mockImplementation(() => ({ request: mockRequest } as any));
const result = await RunnerAvailabilityService.checkAvailability('owner', 'repo', 'token', [], 1);
expect(result.shouldFallback).toBe(true);
expect(result.idleRunners).toBe(0);
});
it('should filter runners by required labels', async () => {
const runners = createMockRunners([
{ name: 'linux-runner', status: 'online', busy: false, labels: ['self-hosted', 'linux'] },
{ name: 'windows-runner', status: 'online', busy: false, labels: ['self-hosted', 'windows'] },
]);
const mockRequest = jest.fn().mockResolvedValue({ status: 200, data: { runners } });
MockedOctokit.mockImplementation(() => ({ request: mockRequest } as any));
const result = await RunnerAvailabilityService.checkAvailability(
'owner',
'repo',
'token',
['self-hosted', 'linux'],
1,
);
expect(result.shouldFallback).toBe(false);
expect(result.matchingRunners).toBe(1);
expect(result.idleRunners).toBe(1);
expect(result.totalRunners).toBe(2);
});
it('should fallback when no runners match required labels', async () => {
const runners = createMockRunners([
{ name: 'windows-runner', status: 'online', busy: false, labels: ['self-hosted', 'windows'] },
]);
const mockRequest = jest.fn().mockResolvedValue({ status: 200, data: { runners } });
MockedOctokit.mockImplementation(() => ({ request: mockRequest } as any));
const result = await RunnerAvailabilityService.checkAvailability(
'owner',
'repo',
'token',
['self-hosted', 'linux'],
1,
);
expect(result.shouldFallback).toBe(true);
expect(result.matchingRunners).toBe(0);
expect(result.idleRunners).toBe(0);
});
it('should respect minAvailable threshold', async () => {
const runners = createMockRunners([{ name: 'runner-1', status: 'online', busy: false, labels: ['self-hosted'] }]);
const mockRequest = jest.fn().mockResolvedValue({ status: 200, data: { runners } });
MockedOctokit.mockImplementation(() => ({ request: mockRequest } as any));
// Need 2, have 1 — should fallback
const result = await RunnerAvailabilityService.checkAvailability('owner', 'repo', 'token', [], 2);
expect(result.shouldFallback).toBe(true);
expect(result.idleRunners).toBe(1);
});
it('should be case-insensitive for label matching', async () => {
const runners = createMockRunners([
{ name: 'runner-1', status: 'online', busy: false, labels: ['Self-Hosted', 'Linux'] },
]);
const mockRequest = jest.fn().mockResolvedValue({ status: 200, data: { runners } });
MockedOctokit.mockImplementation(() => ({ request: mockRequest } as any));
const result = await RunnerAvailabilityService.checkAvailability(
'owner',
'repo',
'token',
['self-hosted', 'linux'],
1,
);
expect(result.shouldFallback).toBe(false);
expect(result.matchingRunners).toBe(1);
});
it('should not fallback on API error (fail-open)', async () => {
const mockRequest = jest.fn().mockRejectedValue(new Error('403 Forbidden'));
MockedOctokit.mockImplementation(() => ({ request: mockRequest } as any));
const result = await RunnerAvailabilityService.checkAvailability('owner', 'repo', 'token', [], 1);
expect(result.shouldFallback).toBe(false);
expect(result.reason).toContain('Runner check failed');
});
it('should count only online+idle runners', async () => {
const runners = createMockRunners([
{ name: 'idle', status: 'online', busy: false, labels: ['self-hosted'] },
{ name: 'busy', status: 'online', busy: true, labels: ['self-hosted'] },
{ name: 'offline', status: 'offline', busy: false, labels: ['self-hosted'] },
]);
const mockRequest = jest.fn().mockResolvedValue({ status: 200, data: { runners } });
MockedOctokit.mockImplementation(() => ({ request: mockRequest } as any));
const result = await RunnerAvailabilityService.checkAvailability('owner', 'repo', 'token', [], 1);
expect(result.shouldFallback).toBe(false);
expect(result.totalRunners).toBe(3);
expect(result.matchingRunners).toBe(3);
expect(result.idleRunners).toBe(1);
});
});
describe('pagination limits', () => {
it('should stop paginating after reaching the page limit', async () => {
// Return full pages (100 runners each) to force continued pagination
let callCount = 0;
const mockRequest = jest.fn().mockImplementation(() => {
callCount++;
const runners = createMockRunners(
Array.from({ length: 100 }, (_, i) => ({
name: `runner-${callCount}-${i}`,
status: 'online' as const,
busy: false,
labels: ['self-hosted'],
})),
);
return Promise.resolve({ status: 200, data: { runners } });
});
MockedOctokit.mockImplementation(() => ({ request: mockRequest } as any));
const result = await RunnerAvailabilityService.checkAvailability('owner', 'repo', 'token', [], 1);
// Should have called at most 100 pages (the MAX_PAGINATION_PAGES limit)
expect(mockRequest).toHaveBeenCalledTimes(100);
// Should still have runners from the pages it did fetch
expect(result.totalRunners).toBe(10000);
expect(result.shouldFallback).toBe(false);
});
it('should stop paginating on rate limit (HTTP 403)', async () => {
let callCount = 0;
const mockRequest = jest.fn().mockImplementation(() => {
callCount++;
if (callCount === 2) {
// Octokit throws for non-2xx responses
const error: any = new Error('API rate limit exceeded');
error.status = 403;
error.response = {
status: 403,
headers: { 'x-ratelimit-reset': String(Math.floor(Date.now() / 1000) + 3600) },
};
return Promise.reject(error);
}
const runners = createMockRunners(
Array.from({ length: 100 }, (_, i) => ({
name: `runner-${i}`,
status: 'online' as const,
busy: false,
labels: ['self-hosted'],
})),
);
return Promise.resolve({ status: 200, data: { runners } });
});
MockedOctokit.mockImplementation(() => ({ request: mockRequest } as any));
const result = await RunnerAvailabilityService.checkAvailability('owner', 'repo', 'token', [], 1);
// Should have stopped at page 2 (rate limited)
expect(mockRequest).toHaveBeenCalledTimes(2);
// Should use the 100 runners from the first page
expect(result.totalRunners).toBe(100);
expect(result.shouldFallback).toBe(false);
});
it('should stop paginating on rate limit (HTTP 429)', async () => {
let callCount = 0;
const mockRequest = jest.fn().mockImplementation(() => {
callCount++;
if (callCount === 1) {
// Octokit throws for non-2xx responses
const error: any = new Error('Too Many Requests');
error.status = 429;
error.response = { status: 429, headers: {} };
return Promise.reject(error);
}
return Promise.resolve({ status: 200, data: { runners: [] } });
});
MockedOctokit.mockImplementation(() => ({ request: mockRequest } as any));
const result = await RunnerAvailabilityService.checkAvailability('owner', 'repo', 'token', [], 1);
// Should have stopped at first page (rate limited immediately)
expect(mockRequest).toHaveBeenCalledTimes(1);
// No runners found — should fallback
expect(result.totalRunners).toBe(0);
expect(result.shouldFallback).toBe(true);
});
it('should handle pagination timeout gracefully', async () => {
// Mock Date.now to simulate timeout
const originalDateNow = Date.now;
let callCount = 0;
const mockRequest = jest.fn().mockImplementation(() => {
callCount++;
// After first call, advance time past the timeout
if (callCount >= 2) {
Date.now = jest.fn(() => originalDateNow() + 31_000);
}
const runners = createMockRunners(
Array.from({ length: 100 }, (_, i) => ({
name: `runner-${callCount}-${i}`,
status: 'online' as const,
busy: false,
labels: ['self-hosted'],
})),
);
return Promise.resolve({ status: 200, data: { runners } });
});
MockedOctokit.mockImplementation(() => ({ request: mockRequest } as any));
const result = await RunnerAvailabilityService.checkAvailability('owner', 'repo', 'token', [], 1);
// Should have stopped after timeout was detected (2 pages: first succeeds, second triggers timeout check)
expect(mockRequest.mock.calls.length).toBeLessThanOrEqual(3);
// Should have runners from pages fetched before timeout
expect(result.totalRunners).toBeGreaterThan(0);
// Restore
Date.now = originalDateNow;
});
});
});

View File

@@ -1,205 +0,0 @@
import { Octokit } from '@octokit/core';
import OrchestratorLogger from './orchestrator-logger';
interface GitHubRunner {
id: number;
name: string;
status: 'online' | 'offline';
busy: boolean;
labels: Array<{ name: string }>;
}
interface RunnerCheckResult {
shouldFallback: boolean;
reason: string;
totalRunners: number;
matchingRunners: number;
idleRunners: number;
}
/**
* Maximum number of pages to fetch when paginating through GitHub API results.
* 100 pages * 100 per page = 10,000 runners maximum.
*/
const MAX_PAGINATION_PAGES = 100;
/**
* Total timeout in milliseconds for the pagination loop.
* Prevents indefinite API calls if GitHub is slow or pagination is unexpectedly deep.
*/
const PAGINATION_TIMEOUT_MS = 30_000;
/**
* Checks GitHub Actions runner availability to support automatic provider fallback.
*
* When a user configures `runnerCheckEnabled: true` with a `fallbackProviderStrategy`,
* this service queries the GitHub API for runner status before the build starts.
* If insufficient runners are available, the orchestrator routes to the fallback provider.
*/
export class RunnerAvailabilityService {
/**
* Check if enough runners are available to handle the build.
*
* @param owner - GitHub repository owner
* @param repo - GitHub repository name
* @param token - GitHub token with repo/actions scope
* @param requiredLabels - Labels runners must have (empty = any runner)
* @param minAvailable - Minimum idle runners required
* @returns RunnerCheckResult with decision and diagnostics
*/
static async checkAvailability(
owner: string,
repo: string,
token: string,
requiredLabels: string[],
minAvailable: number,
): Promise<RunnerCheckResult> {
if (!token) {
return {
shouldFallback: false,
reason: 'No GitHub token available — skipping runner check',
totalRunners: 0,
matchingRunners: 0,
idleRunners: 0,
};
}
try {
const octokit = new Octokit({ auth: token });
// Fetch all runners for the repository
const runners = await RunnerAvailabilityService.fetchRunners(octokit, owner, repo);
if (runners.length === 0) {
return {
shouldFallback: true,
reason: 'No runners registered for this repository',
totalRunners: 0,
matchingRunners: 0,
idleRunners: 0,
};
}
// Filter by required labels
const matching = RunnerAvailabilityService.filterByLabels(runners, requiredLabels);
// Count idle (online + not busy)
const idle = matching.filter((r) => r.status === 'online' && !r.busy);
const result: RunnerCheckResult = {
shouldFallback: idle.length < minAvailable,
reason:
idle.length >= minAvailable
? `${idle.length} idle runner(s) available (need ${minAvailable})`
: `Only ${idle.length} idle runner(s) available, need ${minAvailable}`,
totalRunners: runners.length,
matchingRunners: matching.length,
idleRunners: idle.length,
};
return result;
} catch (error: any) {
// If the API call fails (permissions, rate limit, etc.), don't block the build
OrchestratorLogger.log(`Runner availability check failed: ${error.message}`);
return {
shouldFallback: false,
reason: `Runner check failed (${error.message}) — proceeding with primary provider`,
totalRunners: 0,
matchingRunners: 0,
idleRunners: 0,
};
}
}
/**
* Fetch all runners for a repository, handling pagination.
*
* Includes defensive limits:
* - Maximum page count (MAX_PAGINATION_PAGES) to prevent infinite loops
* - Total timeout (PAGINATION_TIMEOUT_MS) to prevent indefinite API calls
* - Rate-limit detection (HTTP 403/429 with X-RateLimit-Remaining header)
*/
private static async fetchRunners(octokit: Octokit, owner: string, repo: string): Promise<GitHubRunner[]> {
const allRunners: GitHubRunner[] = [];
let page = 1;
const perPage = 100;
const startTime = Date.now();
while (page <= MAX_PAGINATION_PAGES) {
// Check total timeout
if (Date.now() - startTime > PAGINATION_TIMEOUT_MS) {
OrchestratorLogger.logWarning(
`[RunnerAvailability] Pagination timeout reached after ${page - 1} pages and ${Date.now() - startTime}ms. ` +
`Using ${allRunners.length} runners found so far.`,
);
break;
}
let response: any;
try {
response = await octokit.request('GET /repos/{owner}/{repo}/actions/runners', {
owner,
repo,
per_page: perPage,
page,
});
} catch (requestError: any) {
// Octokit throws for non-2xx responses. Check if this is a rate limit error.
const status = requestError.status ?? requestError.response?.status;
if (status === 403 || status === 429) {
const resetTime =
requestError.response?.headers?.['x-ratelimit-reset'] ?? requestError.headers?.['x-ratelimit-reset'];
const resetMessage = resetTime
? ` Resets at ${new Date(Number.parseInt(String(resetTime), 10) * 1000).toISOString()}`
: '';
OrchestratorLogger.logWarning(
`[RunnerAvailability] GitHub API rate limit reached (HTTP ${status}).${resetMessage} ` +
`Using ${allRunners.length} runners found so far.`,
);
break;
}
// Re-throw non-rate-limit errors to be handled by the outer catch
throw requestError;
}
const runners = (response.data.runners || []) as GitHubRunner[];
allRunners.push(...runners);
if (runners.length < perPage) break;
page++;
}
if (page > MAX_PAGINATION_PAGES) {
OrchestratorLogger.logWarning(
`[RunnerAvailability] Maximum pagination limit reached (${MAX_PAGINATION_PAGES} pages). ` +
`Using ${allRunners.length} runners found so far.`,
);
}
if (allRunners.length === 0) {
OrchestratorLogger.log(
'[RunnerAvailability] No runners found. Possible causes: ' +
'wrong token permissions (needs repo or actions scope), ' +
'no self-hosted runners registered, ' +
'or runners are registered at the organization level instead of the repository.',
);
}
return allRunners;
}
/**
* Filter runners by required labels. A runner matches if it has ALL required labels.
* If requiredLabels is empty, all runners match.
*/
private static filterByLabels(runners: GitHubRunner[], requiredLabels: string[]): GitHubRunner[] {
if (requiredLabels.length === 0) return runners;
return runners.filter((runner) => {
const runnerLabelNames = runner.labels.map((l) => l.name.toLowerCase());
return requiredLabels.every((required) => runnerLabelNames.includes(required.toLowerCase()));
});
}
}