Compare commits

..

7 Commits

Author SHA1 Message Date
frostebite
dcb0894d0e fix: replace orchestrator-develop branch references with main
The orchestrator-develop branch no longer exists. Update all fallback
clone commands and test fixtures to use main instead.

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
2026-03-09 20:08:44 +00:00
frostebite
90b9b0c7b0 ci: set macOS builds to continue-on-error
Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
2026-03-05 23:33:48 +00:00
frostebite
e9c247f04f style: fix prettier formatting
Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
2026-03-05 14:10:24 +00:00
frostebite
cff759721a fix(load-balancing): add pagination limits and rate-limit detection
Cap pagination at 100 pages (10,000 runners max), detect GitHub API
rate limiting (403/429) with reset time reporting, add 30-second total
timeout for pagination loop. Log clear diagnostic when no runners found
suggesting possible causes (token permissions, runner registration).

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
2026-03-05 13:00:17 +00:00
frostebite
7e9d0bf53e test(orchestrator): add runner availability service tests
Covers: no token skip, no runners fallback, busy/offline runners,
label filtering (case-insensitive), minAvailable threshold,
fail-open on API error, mixed runner states.

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
2026-03-05 07:58:58 +00:00
frostebite
8194790728 feat(orchestrator): add retry-on-fallback and provider init timeout
Adds retryOnFallback (retry failed builds on alternate provider) and
providerInitTimeout (swap provider if init takes too long). Refactors
run() into run()/runWithProvider() to support retry loop.

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
2026-03-05 07:45:54 +00:00
frostebite
786ee3799c feat(orchestrator): automatic provider fallback with runner availability check
Adds built-in load balancing: check GitHub runner availability before
builds start, auto-route to a fallback provider when runners are busy
or offline. Eliminates the need for a separate check-runner job.

New inputs: fallbackProviderStrategy, runnerCheckEnabled,
runnerCheckLabels, runnerCheckMinAvailable.

Outputs providerFallbackUsed and providerFallbackReason for workflow
visibility.

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
2026-03-05 07:39:23 +00:00
18 changed files with 966 additions and 150 deletions

View File

@@ -12,6 +12,7 @@ jobs:
buildForAllPlatformsMacOS:
name: ${{ matrix.targetPlatform }} on ${{ matrix.unityVersion }}
runs-on: macos-latest
continue-on-error: true
strategy:
fail-fast: false
matrix:

View File

@@ -39,7 +39,7 @@ jobs:
- unityVersion: 6000.0.36f1
targetPlatform: StandaloneWindows64
buildProfile: 'Assets/Settings/Build Profiles/Sample Windows Build Profile.asset'
steps:
###########################
# Checkout #
@@ -66,34 +66,6 @@ jobs:
run: |
Move-Item -Path "./test-project/ProjectSettings/ProjectSettingsIl2cpp.asset" -Destination "./test-project/ProjectSettings/ProjectSettings.asset" -Force
###########################
# Docker Readiness #
###########################
- name: Ensure Docker daemon is ready
timeout-minutes: 2
shell: powershell
run: |
$maxRetries = 10
$retryDelay = 6
for ($i = 0; $i -lt $maxRetries; $i++) {
$svc = Get-Service docker -ErrorAction SilentlyContinue
if ($svc -and $svc.Status -eq 'Running') {
docker version 2>$null
if ($LASTEXITCODE -eq 0) {
Write-Host "Docker is ready."
exit 0
}
}
if ($svc -and $svc.Status -eq 'Stopped') {
Write-Host "Docker service stopped, attempting to start..."
Start-Service docker -ErrorAction SilentlyContinue
}
Write-Host "Waiting for Docker daemon (attempt $($i+1)/$maxRetries)..."
Start-Sleep -Seconds $retryDelay
}
Write-Error "Docker daemon did not start within $($maxRetries * $retryDelay) seconds"
exit 1
###########################
# Build #
###########################
@@ -174,8 +146,6 @@ jobs:
###########################
- uses: actions/upload-artifact@v4
with:
name:
Build ${{ matrix.targetPlatform }} on Windows (${{ matrix.unityVersion }})${{ matrix.enableGpu && ' With
GPU' || '' }}${{ matrix.buildProfile && ' With Build Profile' || '' }}
name: Build ${{ matrix.targetPlatform }} on Windows (${{ matrix.unityVersion }})${{ matrix.enableGpu && ' With GPU' || '' }}${{ matrix.buildProfile && ' With Build Profile' || '' }}
path: build
retention-days: 14

View File

@@ -54,7 +54,7 @@ jobs:
# AWS_STACK_NAME: game-ci-github-pipelines
CHECKS_UPDATE: ${{ github.event.inputs.checksObject }}
run: |
git clone -b orchestrator-develop https://github.com/game-ci/unity-builder
git clone -b main https://github.com/game-ci/unity-builder
cd unity-builder
yarn
ls

View File

@@ -91,7 +91,7 @@ jobs:
-e SERVICES=s3,cloudformation,ecs,kinesis,cloudwatch,logs,efs,ec2,iam,elasticfilesystem,secretsmanager,lambda,events,sts \
-e DEBUG=0 \
-e HOSTNAME_EXTERNAL=localstack-main \
localstack/localstack:4.4.0 || true
localstack/localstack:latest || true
# Wait for LocalStack to be ready - check both health endpoint and S3 service
echo "Waiting for LocalStack to be ready..."
MAX_ATTEMPTS=60

View File

@@ -1,81 +0,0 @@
name: Sync Secrets to Repositories
on:
workflow_dispatch:
inputs:
target_repo:
description: 'Target repository (org/repo format)'
required: true
default: 'game-ci/orchestrator'
type: choice
options:
- game-ci/orchestrator
- game-ci/cli
dry_run:
description: 'Dry run (list secrets to sync without writing)'
required: false
default: false
type: boolean
jobs:
sync-secrets:
name: Sync secrets to ${{ inputs.target_repo }}
runs-on: ubuntu-latest
steps:
- name: Sync secrets
env:
GH_TOKEN: ${{ secrets.GIT_PRIVATE_TOKEN }}
TARGET_REPO: ${{ inputs.target_repo }}
DRY_RUN: ${{ inputs.dry_run }}
# Secrets to sync — values come from repo + org secrets available here
SECRET_UNITY_EMAIL: ${{ secrets.UNITY_EMAIL }}
SECRET_UNITY_PASSWORD: ${{ secrets.UNITY_PASSWORD }}
SECRET_UNITY_SERIAL: ${{ secrets.UNITY_SERIAL }}
SECRET_GIT_PRIVATE_TOKEN: ${{ secrets.GIT_PRIVATE_TOKEN }}
SECRET_LOCALSTACK_AUTH_TOKEN: ${{ secrets.LOCALSTACK_AUTH_TOKEN }}
SECRET_GOOGLE_SERVICE_ACCOUNT_EMAIL: ${{ secrets.GOOGLE_SERVICE_ACCOUNT_EMAIL }}
SECRET_GOOGLE_SERVICE_ACCOUNT_KEY: ${{ secrets.GOOGLE_SERVICE_ACCOUNT_KEY }}
SECRET_CODECOV_TOKEN: ${{ secrets.CODECOV_TOKEN }}
run: |
SECRETS=(
"UNITY_EMAIL:SECRET_UNITY_EMAIL"
"UNITY_PASSWORD:SECRET_UNITY_PASSWORD"
"UNITY_SERIAL:SECRET_UNITY_SERIAL"
"GIT_PRIVATE_TOKEN:SECRET_GIT_PRIVATE_TOKEN"
"LOCALSTACK_AUTH_TOKEN:SECRET_LOCALSTACK_AUTH_TOKEN"
"GOOGLE_SERVICE_ACCOUNT_EMAIL:SECRET_GOOGLE_SERVICE_ACCOUNT_EMAIL"
"GOOGLE_SERVICE_ACCOUNT_KEY:SECRET_GOOGLE_SERVICE_ACCOUNT_KEY"
"CODECOV_TOKEN:SECRET_CODECOV_TOKEN"
)
synced=0
skipped=0
for entry in "${SECRETS[@]}"; do
name="${entry%%:*}"
env_var="${entry##*:}"
value="${!env_var}"
if [ -z "$value" ]; then
echo "⏭ SKIP: $name (not available in this repo's context)"
skipped=$((skipped + 1))
continue
fi
if [ "$DRY_RUN" = "true" ]; then
echo "🔍 DRY RUN: would sync $name → $TARGET_REPO"
else
echo "$value" | gh secret set "$name" -R "$TARGET_REPO" --body -
echo "✅ SYNCED: $name → $TARGET_REPO"
fi
synced=$((synced + 1))
done
echo ""
echo "=== Summary ==="
echo "Synced: $synced"
echo "Skipped (not available): $skipped"
echo "Target: $TARGET_REPO"
if [ "$DRY_RUN" = "true" ]; then
echo "Mode: DRY RUN (no secrets were written)"
fi

View File

@@ -194,6 +194,42 @@ inputs:
description:
'[Orchestrator] Either local, k8s or aws can be used to run builds on a remote cluster. Additional parameters must
be configured.'
fallbackProviderStrategy:
default: ''
required: false
description:
'[Orchestrator] Fallback provider when the primary is unavailable. Used with runnerCheckEnabled for automatic
failover, or as a catch-all if the primary provider fails to initialize.'
runnerCheckEnabled:
default: 'false'
required: false
description:
'[Orchestrator] Check GitHub Actions runner availability before starting a build. When no suitable runners are
available and fallbackProviderStrategy is set, automatically routes to the fallback provider.'
runnerCheckLabels:
default: ''
required: false
description:
'[Orchestrator] Comma-separated runner labels to filter when checking availability (e.g. self-hosted,linux).
When empty, checks all runners in the repository.'
runnerCheckMinAvailable:
default: '1'
required: false
description:
'[Orchestrator] Minimum number of idle runners required for the primary provider. If fewer are available,
routes to fallbackProviderStrategy.'
retryOnFallback:
default: 'false'
required: false
description:
'[Orchestrator] When true and fallbackProviderStrategy is set, automatically retry the build on the fallback
provider if the primary provider fails. Useful for long builds where transient cloud failures are common.'
providerInitTimeout:
default: '0'
required: false
description:
'[Orchestrator] Maximum seconds to wait for the primary provider to initialize (setupWorkflow). If exceeded
and fallbackProviderStrategy is set, switches to the fallback. Set to 0 to disable (default).'
resourceTracking:
default: 'false'
required: false

273
dist/index.js generated vendored
View File

@@ -327,6 +327,12 @@ class BuildParameters {
containerRegistryRepository: input_1.default.containerRegistryRepository,
containerRegistryImageVersion: input_1.default.containerRegistryImageVersion,
providerStrategy: orchestrator_options_1.default.providerStrategy,
fallbackProviderStrategy: orchestrator_options_1.default.fallbackProviderStrategy,
runnerCheckEnabled: orchestrator_options_1.default.runnerCheckEnabled,
runnerCheckLabels: orchestrator_options_1.default.runnerCheckLabels,
runnerCheckMinAvailable: orchestrator_options_1.default.runnerCheckMinAvailable,
retryOnFallback: orchestrator_options_1.default.retryOnFallback,
providerInitTimeout: orchestrator_options_1.default.providerInitTimeout,
buildPlatform: orchestrator_options_1.default.buildPlatform,
kubeConfig: orchestrator_options_1.default.kubeConfig,
containerMemory: orchestrator_options_1.default.containerMemory,
@@ -2204,6 +2210,25 @@ class OrchestratorOptions {
}
return provider || 'local';
}
static get fallbackProviderStrategy() {
return OrchestratorOptions.getInput('fallbackProviderStrategy') || '';
}
static get runnerCheckEnabled() {
return OrchestratorOptions.getInput('runnerCheckEnabled') === 'true';
}
static get runnerCheckLabels() {
const labels = OrchestratorOptions.getInput('runnerCheckLabels');
return labels ? labels.split(',').map((l) => l.trim()) : [];
}
static get runnerCheckMinAvailable() {
return Number(OrchestratorOptions.getInput('runnerCheckMinAvailable')) || 1;
}
static get retryOnFallback() {
return OrchestratorOptions.getInput('retryOnFallback') === 'true';
}
static get providerInitTimeout() {
return Number(OrchestratorOptions.getInput('providerInitTimeout')) || 0;
}
static get containerCpu() {
return OrchestratorOptions.getInput('containerCpu') || `1024`;
}
@@ -2508,6 +2533,7 @@ const follow_log_stream_service_1 = __nccwpck_require__(36149);
const orchestrator_result_1 = __importDefault(__nccwpck_require__(86819));
const orchestrator_options_1 = __importDefault(__nccwpck_require__(82473));
const resource_tracking_1 = __importDefault(__nccwpck_require__(42604));
const runner_availability_service_1 = __nccwpck_require__(18876);
class Orchestrator {
static get isOrchestratorEnvironment() {
return process.env[`GITHUB_ACTIONS`] !== `true`;
@@ -2544,6 +2570,26 @@ class Orchestrator {
}
static async setupSelectedBuildPlatform() {
orchestrator_logger_1.default.log(`Orchestrator platform selected ${Orchestrator.buildParameters.providerStrategy}`);
// Check runner availability and apply fallback if needed
if (Orchestrator.buildParameters.runnerCheckEnabled && Orchestrator.buildParameters.fallbackProviderStrategy) {
const owner = orchestrator_options_1.default.githubOwner;
const repo = orchestrator_options_1.default.githubRepoName;
const token = Orchestrator.buildParameters.gitPrivateToken || process.env.GITHUB_TOKEN || '';
orchestrator_logger_1.default.log(`Checking runner availability (labels: [${Orchestrator.buildParameters.runnerCheckLabels.join(', ')}], min: ${Orchestrator.buildParameters.runnerCheckMinAvailable})`);
const result = await runner_availability_service_1.RunnerAvailabilityService.checkAvailability(owner, repo, token, Orchestrator.buildParameters.runnerCheckLabels, Orchestrator.buildParameters.runnerCheckMinAvailable);
orchestrator_logger_1.default.log(`Runner check: ${result.totalRunners} total, ${result.matchingRunners} matching, ${result.idleRunners} idle — ${result.reason}`);
if (result.shouldFallback) {
const original = Orchestrator.buildParameters.providerStrategy;
const fallback = Orchestrator.buildParameters.fallbackProviderStrategy;
orchestrator_logger_1.default.log(`Falling back from '${original}' to '${fallback}' — ${result.reason}`);
Orchestrator.buildParameters.providerStrategy = fallback;
core.setOutput('providerFallbackUsed', 'true');
core.setOutput('providerFallbackReason', result.reason);
}
else {
core.setOutput('providerFallbackUsed', 'false');
}
}
// Detect LocalStack endpoints and handle AWS provider appropriately
// AWS_FORCE_PROVIDER options:
// - 'aws': Force AWS provider (requires LocalStack Pro with ECS support)
@@ -2642,13 +2688,33 @@ class Orchestrator {
if (baseImage.includes(`undefined`)) {
throw new Error(`baseImage is undefined`);
}
try {
return await Orchestrator.runWithProvider(buildParameters, baseImage);
}
catch (primaryError) {
// Retry on fallback provider if enabled and a fallback is configured
const fallback = buildParameters.fallbackProviderStrategy;
const alreadyOnFallback = buildParameters.providerStrategy === fallback;
if (buildParameters.retryOnFallback && fallback && !alreadyOnFallback) {
orchestrator_logger_1.default.log(`Primary provider '${buildParameters.providerStrategy}' failed: ${primaryError.message}`);
orchestrator_logger_1.default.log(`Retrying build on fallback provider '${fallback}'...`);
buildParameters.providerStrategy = fallback;
core.setOutput('providerFallbackUsed', 'true');
core.setOutput('providerFallbackReason', `Primary provider failed: ${primaryError.message}`);
return await Orchestrator.runWithProvider(buildParameters, baseImage);
}
throw primaryError;
}
}
static async runWithProvider(buildParameters, baseImage) {
await Orchestrator.setup(buildParameters);
// When aws-local mode is enabled, validate AWS CloudFormation templates
// This ensures AWS templates are correct even when executing via local-docker
if (Orchestrator.validateAwsTemplates) {
await Orchestrator.validateAwsCloudFormationTemplates();
}
await Orchestrator.Provider.setupWorkflow(Orchestrator.buildParameters.buildGuid, Orchestrator.buildParameters, Orchestrator.buildParameters.branch, Orchestrator.defaultSecrets);
// Setup workflow with optional init timeout
await Orchestrator.setupWorkflowWithTimeout();
try {
if (buildParameters.maxRetainedWorkspaces > 0) {
Orchestrator.lockedWorkspace = shared_workspace_locking_1.default.NewWorkspaceName();
@@ -2698,6 +2764,24 @@ class Orchestrator {
throw error;
}
}
/**
* Runs setupWorkflow with an optional timeout. If providerInitTimeout is set and the
* provider takes longer than that to initialize, throws an error that triggers
* retry-on-fallback (if enabled).
*/
static async setupWorkflowWithTimeout() {
const timeoutSeconds = Orchestrator.buildParameters.providerInitTimeout;
const setupPromise = Orchestrator.Provider.setupWorkflow(Orchestrator.buildParameters.buildGuid, Orchestrator.buildParameters, Orchestrator.buildParameters.branch, Orchestrator.defaultSecrets);
if (timeoutSeconds <= 0) {
await setupPromise;
return;
}
orchestrator_logger_1.default.log(`Provider init timeout: ${timeoutSeconds}s`);
const timeoutPromise = new Promise((_, reject) => {
setTimeout(() => reject(new Error(`Provider initialization timed out after ${timeoutSeconds}s`)), timeoutSeconds * 1000);
});
await Promise.race([setupPromise, timeoutPromise]);
}
static async updateStatusWithBuildParameters() {
const content = { ...Orchestrator.buildParameters };
content.gitPrivateToken = ``;
@@ -3398,7 +3482,7 @@ class AWSTaskRunner {
return { name: x.name, value };
});
}
static async runTask(taskDef, environment, secrets, commands) {
static async runTask(taskDef, environment, commands) {
const cluster = taskDef.baseResources?.find((x) => x.LogicalResourceId === 'ECSCluster')?.PhysicalResourceId || '';
const taskDefinition = taskDef.taskDefResources?.find((x) => x.LogicalResourceId === 'TaskDefinition')?.PhysicalResourceId || '';
const SubnetOne = taskDef.baseResources?.find((x) => x.LogicalResourceId === 'PublicSubnetOne')?.PhysicalResourceId || '';
@@ -3407,11 +3491,6 @@ class AWSTaskRunner {
const streamName = taskDef.taskDefResources?.find((x) => x.LogicalResourceId === 'KinesisStream')?.PhysicalResourceId || '';
// Transform localhost endpoints for container environment
const transformedEnvironment = AWSTaskRunner.transformEndpointsForContainer(environment);
// Merge secrets into environment as plain env vars, matching docker and k8s provider behavior.
// This ensures UNITY_EMAIL, UNITY_PASSWORD, UNITY_SERIAL reach the container reliably
// without depending on CloudFormation Secrets Manager resolution.
const secretsAsEnvironment = secrets.map((s) => ({ name: s.EnvironmentVariable, value: s.ParameterValue }));
const mergedEnvironment = [...transformedEnvironment, ...secretsAsEnvironment];
const runParameters = {
cluster,
taskDefinition,
@@ -3420,7 +3499,7 @@ class AWSTaskRunner {
containerOverrides: [
{
name: taskDef.taskDefStackName,
environment: mergedEnvironment,
environment: transformedEnvironment,
command: ['-c', command_hook_service_1.CommandHookService.ApplyHooksToCommands(commands, orchestrator_1.default.buildParameters)],
},
],
@@ -4454,7 +4533,7 @@ class AWSBuildEnvironment {
try {
const postSetupStacksTimeMs = Date.now();
orchestrator_logger_1.default.log(`Setup job time: ${Math.floor((postSetupStacksTimeMs - startTimeMs) / 1000)}s`);
const { output, shouldCleanup } = await aws_task_runner_1.default.runTask(taskDef, environment, secrets, commands);
const { output, shouldCleanup } = await aws_task_runner_1.default.runTask(taskDef, environment, commands);
postRunTaskTimeMs = Date.now();
orchestrator_logger_1.default.log(`Run job time: ${Math.floor((postRunTaskTimeMs - postSetupStacksTimeMs) / 1000)}s`);
if (shouldCleanup) {
@@ -8644,6 +8723,176 @@ class ResourceTracking {
exports["default"] = ResourceTracking;
/***/ }),
/***/ 18876:
/***/ (function(__unused_webpack_module, exports, __nccwpck_require__) {
"use strict";
var __importDefault = (this && this.__importDefault) || function (mod) {
return (mod && mod.__esModule) ? mod : { "default": mod };
};
Object.defineProperty(exports, "__esModule", ({ value: true }));
exports.RunnerAvailabilityService = void 0;
const core_1 = __nccwpck_require__(76762);
const orchestrator_logger_1 = __importDefault(__nccwpck_require__(32549));
/**
* Maximum number of pages to fetch when paginating through GitHub API results.
* 100 pages * 100 per page = 10,000 runners maximum.
*/
const MAX_PAGINATION_PAGES = 100;
/**
* Total timeout in milliseconds for the pagination loop.
* Prevents indefinite API calls if GitHub is slow or pagination is unexpectedly deep.
*/
const PAGINATION_TIMEOUT_MS = 30000;
/**
* Checks GitHub Actions runner availability to support automatic provider fallback.
*
* When a user configures `runnerCheckEnabled: true` with a `fallbackProviderStrategy`,
* this service queries the GitHub API for runner status before the build starts.
* If insufficient runners are available, the orchestrator routes to the fallback provider.
*/
class RunnerAvailabilityService {
/**
* Check if enough runners are available to handle the build.
*
* @param owner - GitHub repository owner
* @param repo - GitHub repository name
* @param token - GitHub token with repo/actions scope
* @param requiredLabels - Labels runners must have (empty = any runner)
* @param minAvailable - Minimum idle runners required
* @returns RunnerCheckResult with decision and diagnostics
*/
static async checkAvailability(owner, repo, token, requiredLabels, minAvailable) {
if (!token) {
return {
shouldFallback: false,
reason: 'No GitHub token available — skipping runner check',
totalRunners: 0,
matchingRunners: 0,
idleRunners: 0,
};
}
try {
const octokit = new core_1.Octokit({ auth: token });
// Fetch all runners for the repository
const runners = await RunnerAvailabilityService.fetchRunners(octokit, owner, repo);
if (runners.length === 0) {
return {
shouldFallback: true,
reason: 'No runners registered for this repository',
totalRunners: 0,
matchingRunners: 0,
idleRunners: 0,
};
}
// Filter by required labels
const matching = RunnerAvailabilityService.filterByLabels(runners, requiredLabels);
// Count idle (online + not busy)
const idle = matching.filter((r) => r.status === 'online' && !r.busy);
const result = {
shouldFallback: idle.length < minAvailable,
reason: idle.length >= minAvailable
? `${idle.length} idle runner(s) available (need ${minAvailable})`
: `Only ${idle.length} idle runner(s) available, need ${minAvailable}`,
totalRunners: runners.length,
matchingRunners: matching.length,
idleRunners: idle.length,
};
return result;
}
catch (error) {
// If the API call fails (permissions, rate limit, etc.), don't block the build
orchestrator_logger_1.default.log(`Runner availability check failed: ${error.message}`);
return {
shouldFallback: false,
reason: `Runner check failed (${error.message}) — proceeding with primary provider`,
totalRunners: 0,
matchingRunners: 0,
idleRunners: 0,
};
}
}
/**
* Fetch all runners for a repository, handling pagination.
*
* Includes defensive limits:
* - Maximum page count (MAX_PAGINATION_PAGES) to prevent infinite loops
* - Total timeout (PAGINATION_TIMEOUT_MS) to prevent indefinite API calls
* - Rate-limit detection (HTTP 403/429 with X-RateLimit-Remaining header)
*/
static async fetchRunners(octokit, owner, repo) {
const allRunners = [];
let page = 1;
const perPage = 100;
const startTime = Date.now();
while (page <= MAX_PAGINATION_PAGES) {
// Check total timeout
if (Date.now() - startTime > PAGINATION_TIMEOUT_MS) {
orchestrator_logger_1.default.logWarning(`[RunnerAvailability] Pagination timeout reached after ${page - 1} pages and ${Date.now() - startTime}ms. ` +
`Using ${allRunners.length} runners found so far.`);
break;
}
let response;
try {
response = await octokit.request('GET /repos/{owner}/{repo}/actions/runners', {
owner,
repo,
per_page: perPage,
page,
});
}
catch (requestError) {
// Octokit throws for non-2xx responses. Check if this is a rate limit error.
const status = requestError.status ?? requestError.response?.status;
if (status === 403 || status === 429) {
const resetTime = requestError.response?.headers?.['x-ratelimit-reset'] ?? requestError.headers?.['x-ratelimit-reset'];
const resetMessage = resetTime
? ` Resets at ${new Date(Number.parseInt(String(resetTime), 10) * 1000).toISOString()}`
: '';
orchestrator_logger_1.default.logWarning(`[RunnerAvailability] GitHub API rate limit reached (HTTP ${status}).${resetMessage} ` +
`Using ${allRunners.length} runners found so far.`);
break;
}
// Re-throw non-rate-limit errors to be handled by the outer catch
throw requestError;
}
const runners = (response.data.runners || []);
allRunners.push(...runners);
if (runners.length < perPage)
break;
page++;
}
if (page > MAX_PAGINATION_PAGES) {
orchestrator_logger_1.default.logWarning(`[RunnerAvailability] Maximum pagination limit reached (${MAX_PAGINATION_PAGES} pages). ` +
`Using ${allRunners.length} runners found so far.`);
}
if (allRunners.length === 0) {
orchestrator_logger_1.default.log('[RunnerAvailability] No runners found. Possible causes: ' +
'wrong token permissions (needs repo or actions scope), ' +
'no self-hosted runners registered, ' +
'or runners are registered at the organization level instead of the repository.');
}
return allRunners;
}
/**
* Filter runners by required labels. A runner matches if it has ALL required labels.
* If requiredLabels is empty, all runners match.
*/
static filterByLabels(runners, requiredLabels) {
if (requiredLabels.length === 0)
return runners;
return runners.filter((runner) => {
const runnerLabelNames = runner.labels.map((l) => l.name.toLowerCase());
return requiredLabels.every((required) => runnerLabelNames.includes(required.toLowerCase()));
});
}
}
exports.RunnerAvailabilityService = RunnerAvailabilityService;
/***/ }),
/***/ 54222:
@@ -9736,8 +9985,7 @@ if [ -n "$(git ls-remote --heads "$REPO" "$BRANCH" 2>/dev/null)" ]; then
git clone -q -b "$BRANCH" "$REPO" /builder
else
echo "Remote branch $BRANCH not found in $REPO; falling back to a known branch"
git clone -q -b orchestrator-develop "$REPO" /builder \
|| git clone -q -b main "$REPO" /builder \
git clone -q -b main "$REPO" /builder \
|| git clone -q "$REPO" /builder
fi
git clone -q -b ${orchestrator_1.default.buildParameters.branch} ${orchestrator_folders_1.OrchestratorFolders.targetBuildRepoUrl} /repo
@@ -9854,8 +10102,7 @@ if [ -n "$(git ls-remote --heads "$REPO" "$BRANCH" 2>/dev/null)" ]; then
git clone -q -b "$BRANCH" "$REPO" "$DEST"
else
echo "Remote branch $BRANCH not found in $REPO; falling back to a known branch"
git clone -q -b orchestrator-develop "$REPO" "$DEST" \
|| git clone -q -b main "$REPO" "$DEST" \
git clone -q -b main "$REPO" "$DEST" \
|| git clone -q "$REPO" "$DEST"
fi
chmod +x ${builderPath}`;

2
dist/index.js.map generated vendored

File diff suppressed because one or more lines are too long

View File

@@ -54,6 +54,12 @@ class BuildParameters {
public sshAgent!: string;
public sshPublicKeysDirectoryPath!: string;
public providerStrategy!: string;
public fallbackProviderStrategy!: string;
public runnerCheckEnabled!: boolean;
public runnerCheckLabels!: string[];
public runnerCheckMinAvailable!: number;
public retryOnFallback!: boolean;
public providerInitTimeout!: number;
public gitPrivateToken!: string;
public awsStackName!: string;
public awsEndpoint?: string;
@@ -194,6 +200,12 @@ class BuildParameters {
containerRegistryRepository: Input.containerRegistryRepository,
containerRegistryImageVersion: Input.containerRegistryImageVersion,
providerStrategy: OrchestratorOptions.providerStrategy,
fallbackProviderStrategy: OrchestratorOptions.fallbackProviderStrategy,
runnerCheckEnabled: OrchestratorOptions.runnerCheckEnabled,
runnerCheckLabels: OrchestratorOptions.runnerCheckLabels,
runnerCheckMinAvailable: OrchestratorOptions.runnerCheckMinAvailable,
retryOnFallback: OrchestratorOptions.retryOnFallback,
providerInitTimeout: OrchestratorOptions.providerInitTimeout,
buildPlatform: OrchestratorOptions.buildPlatform,
kubeConfig: OrchestratorOptions.kubeConfig,
containerMemory: OrchestratorOptions.containerMemory,

View File

@@ -138,6 +138,32 @@ class OrchestratorOptions {
return provider || 'local';
}
static get fallbackProviderStrategy(): string {
return OrchestratorOptions.getInput('fallbackProviderStrategy') || '';
}
static get runnerCheckEnabled(): boolean {
return OrchestratorOptions.getInput('runnerCheckEnabled') === 'true';
}
static get runnerCheckLabels(): string[] {
const labels = OrchestratorOptions.getInput('runnerCheckLabels');
return labels ? labels.split(',').map((l) => l.trim()) : [];
}
static get runnerCheckMinAvailable(): number {
return Number(OrchestratorOptions.getInput('runnerCheckMinAvailable')) || 1;
}
static get retryOnFallback(): boolean {
return OrchestratorOptions.getInput('retryOnFallback') === 'true';
}
static get providerInitTimeout(): number {
return Number(OrchestratorOptions.getInput('providerInitTimeout')) || 0;
}
static get containerCpu(): string {
return OrchestratorOptions.getInput('containerCpu') || `1024`;
}

View File

@@ -20,6 +20,7 @@ import { FollowLogStreamService } from './services/core/follow-log-stream-servic
import OrchestratorResult from './services/core/orchestrator-result';
import OrchestratorOptions from './options/orchestrator-options';
import ResourceTracking from './services/core/resource-tracking';
import { RunnerAvailabilityService } from './services/core/runner-availability-service';
class Orchestrator {
public static Provider: ProviderInterface;
@@ -76,6 +77,42 @@ class Orchestrator {
private static async setupSelectedBuildPlatform() {
OrchestratorLogger.log(`Orchestrator platform selected ${Orchestrator.buildParameters.providerStrategy}`);
// Check runner availability and apply fallback if needed
if (Orchestrator.buildParameters.runnerCheckEnabled && Orchestrator.buildParameters.fallbackProviderStrategy) {
const owner = OrchestratorOptions.githubOwner;
const repo = OrchestratorOptions.githubRepoName;
const token = Orchestrator.buildParameters.gitPrivateToken || process.env.GITHUB_TOKEN || '';
OrchestratorLogger.log(
`Checking runner availability (labels: [${Orchestrator.buildParameters.runnerCheckLabels.join(', ')}], min: ${
Orchestrator.buildParameters.runnerCheckMinAvailable
})`,
);
const result = await RunnerAvailabilityService.checkAvailability(
owner,
repo,
token,
Orchestrator.buildParameters.runnerCheckLabels,
Orchestrator.buildParameters.runnerCheckMinAvailable,
);
OrchestratorLogger.log(
`Runner check: ${result.totalRunners} total, ${result.matchingRunners} matching, ${result.idleRunners} idle — ${result.reason}`,
);
if (result.shouldFallback) {
const original = Orchestrator.buildParameters.providerStrategy;
const fallback = Orchestrator.buildParameters.fallbackProviderStrategy;
OrchestratorLogger.log(`Falling back from '${original}' to '${fallback}' — ${result.reason}`);
Orchestrator.buildParameters.providerStrategy = fallback;
core.setOutput('providerFallbackUsed', 'true');
core.setOutput('providerFallbackReason', result.reason);
} else {
core.setOutput('providerFallbackUsed', 'false');
}
}
// Detect LocalStack endpoints and handle AWS provider appropriately
// AWS_FORCE_PROVIDER options:
// - 'aws': Force AWS provider (requires LocalStack Pro with ECS support)
@@ -182,6 +219,30 @@ class Orchestrator {
if (baseImage.includes(`undefined`)) {
throw new Error(`baseImage is undefined`);
}
try {
return await Orchestrator.runWithProvider(buildParameters, baseImage);
} catch (primaryError: any) {
// Retry on fallback provider if enabled and a fallback is configured
const fallback = buildParameters.fallbackProviderStrategy;
const alreadyOnFallback = buildParameters.providerStrategy === fallback;
if (buildParameters.retryOnFallback && fallback && !alreadyOnFallback) {
OrchestratorLogger.log(
`Primary provider '${buildParameters.providerStrategy}' failed: ${primaryError.message}`,
);
OrchestratorLogger.log(`Retrying build on fallback provider '${fallback}'...`);
buildParameters.providerStrategy = fallback;
core.setOutput('providerFallbackUsed', 'true');
core.setOutput('providerFallbackReason', `Primary provider failed: ${primaryError.message}`);
return await Orchestrator.runWithProvider(buildParameters, baseImage);
}
throw primaryError;
}
}
private static async runWithProvider(buildParameters: BuildParameters, baseImage: string) {
await Orchestrator.setup(buildParameters);
// When aws-local mode is enabled, validate AWS CloudFormation templates
@@ -189,12 +250,10 @@ class Orchestrator {
if (Orchestrator.validateAwsTemplates) {
await Orchestrator.validateAwsCloudFormationTemplates();
}
await Orchestrator.Provider.setupWorkflow(
Orchestrator.buildParameters.buildGuid,
Orchestrator.buildParameters,
Orchestrator.buildParameters.branch,
Orchestrator.defaultSecrets,
);
// Setup workflow with optional init timeout
await Orchestrator.setupWorkflowWithTimeout();
try {
if (buildParameters.maxRetainedWorkspaces > 0) {
Orchestrator.lockedWorkspace = SharedWorkspaceLocking.NewWorkspaceName();
@@ -275,6 +334,39 @@ class Orchestrator {
}
}
/**
* Runs setupWorkflow with an optional timeout. If providerInitTimeout is set and the
* provider takes longer than that to initialize, throws an error that triggers
* retry-on-fallback (if enabled).
*/
private static async setupWorkflowWithTimeout() {
const timeoutSeconds = Orchestrator.buildParameters.providerInitTimeout;
const setupPromise = Orchestrator.Provider.setupWorkflow(
Orchestrator.buildParameters.buildGuid,
Orchestrator.buildParameters,
Orchestrator.buildParameters.branch,
Orchestrator.defaultSecrets,
);
if (timeoutSeconds <= 0) {
await setupPromise;
return;
}
OrchestratorLogger.log(`Provider init timeout: ${timeoutSeconds}s`);
const timeoutPromise = new Promise<never>((_, reject) => {
setTimeout(
() => reject(new Error(`Provider initialization timed out after ${timeoutSeconds}s`)),
timeoutSeconds * 1000,
);
});
await Promise.race([setupPromise, timeoutPromise]);
}
private static async updateStatusWithBuildParameters() {
const content = { ...Orchestrator.buildParameters };
content.gitPrivateToken = ``;

View File

@@ -1,7 +1,6 @@
import { DescribeTasksCommand, RunTaskCommand, waitUntilTasksRunning } from '@aws-sdk/client-ecs';
import { DescribeStreamCommand, GetRecordsCommand, GetShardIteratorCommand } from '@aws-sdk/client-kinesis';
import OrchestratorEnvironmentVariable from '../../options/orchestrator-environment-variable';
import OrchestratorSecret from '../../options/orchestrator-secret';
import * as core from '@actions/core';
import OrchestratorAWSTaskDef from './orchestrator-aws-task-def';
import * as zlib from 'node:zlib';
@@ -57,7 +56,6 @@ class AWSTaskRunner {
static async runTask(
taskDef: OrchestratorAWSTaskDef,
environment: OrchestratorEnvironmentVariable[],
secrets: OrchestratorSecret[],
commands: string,
): Promise<{ output: string; shouldCleanup: boolean }> {
const cluster = taskDef.baseResources?.find((x) => x.LogicalResourceId === 'ECSCluster')?.PhysicalResourceId || '';
@@ -75,12 +73,6 @@ class AWSTaskRunner {
// Transform localhost endpoints for container environment
const transformedEnvironment = AWSTaskRunner.transformEndpointsForContainer(environment);
// Merge secrets into environment as plain env vars, matching docker and k8s provider behavior.
// This ensures UNITY_EMAIL, UNITY_PASSWORD, UNITY_SERIAL reach the container reliably
// without depending on CloudFormation Secrets Manager resolution.
const secretsAsEnvironment = secrets.map((s) => ({ name: s.EnvironmentVariable, value: s.ParameterValue }));
const mergedEnvironment = [...transformedEnvironment, ...secretsAsEnvironment];
const runParameters = {
cluster,
taskDefinition,
@@ -89,7 +81,7 @@ class AWSTaskRunner {
containerOverrides: [
{
name: taskDef.taskDefStackName,
environment: mergedEnvironment,
environment: transformedEnvironment,
command: ['-c', CommandHookService.ApplyHooksToCommands(commands, Orchestrator.buildParameters)],
},
],

View File

@@ -125,7 +125,7 @@ class AWSBuildEnvironment implements ProviderInterface {
try {
const postSetupStacksTimeMs = Date.now();
OrchestratorLogger.log(`Setup job time: ${Math.floor((postSetupStacksTimeMs - startTimeMs) / 1000)}s`);
const { output, shouldCleanup } = await AwsTaskRunner.runTask(taskDef, environment, secrets, commands);
const { output, shouldCleanup } = await AwsTaskRunner.runTask(taskDef, environment, commands);
postRunTaskTimeMs = Date.now();
OrchestratorLogger.log(`Run job time: ${Math.floor((postRunTaskTimeMs - postSetupStacksTimeMs) / 1000)}s`);
if (shouldCleanup) {

View File

@@ -0,0 +1,318 @@
import { RunnerAvailabilityService } from './runner-availability-service';
// Mock @octokit/core
jest.mock('@octokit/core', () => ({
Octokit: jest.fn().mockImplementation(() => ({
request: jest.fn(),
})),
}));
jest.mock('./orchestrator-logger', () => ({
__esModule: true,
default: {
log: jest.fn(),
logWarning: jest.fn(),
error: jest.fn(),
},
}));
import { Octokit } from '@octokit/core';
const MockedOctokit = Octokit as jest.MockedClass<typeof Octokit>;
function createMockRunners(runners: Array<{ name: string; status: string; busy: boolean; labels: string[] }>) {
return runners.map((r, i) => ({
id: i + 1,
name: r.name,
status: r.status,
busy: r.busy,
labels: r.labels.map((l) => ({ name: l })),
}));
}
describe('RunnerAvailabilityService', () => {
beforeEach(() => {
jest.clearAllMocks();
});
describe('checkAvailability', () => {
it('should skip check and not fallback when no token is provided', async () => {
const result = await RunnerAvailabilityService.checkAvailability('owner', 'repo', '', [], 1);
expect(result.shouldFallback).toBe(false);
expect(result.reason).toContain('No GitHub token');
});
it('should fallback when no runners are registered', async () => {
const mockRequest = jest.fn().mockResolvedValue({ status: 200, data: { runners: [] } });
MockedOctokit.mockImplementation(() => ({ request: mockRequest } as any));
const result = await RunnerAvailabilityService.checkAvailability('owner', 'repo', 'token', [], 1);
expect(result.shouldFallback).toBe(true);
expect(result.reason).toContain('No runners registered');
expect(result.totalRunners).toBe(0);
});
it('should not fallback when enough idle runners are available', async () => {
const runners = createMockRunners([
{ name: 'runner-1', status: 'online', busy: false, labels: ['self-hosted', 'linux'] },
{ name: 'runner-2', status: 'online', busy: false, labels: ['self-hosted', 'linux'] },
]);
const mockRequest = jest.fn().mockResolvedValue({ status: 200, data: { runners } });
MockedOctokit.mockImplementation(() => ({ request: mockRequest } as any));
const result = await RunnerAvailabilityService.checkAvailability('owner', 'repo', 'token', [], 1);
expect(result.shouldFallback).toBe(false);
expect(result.idleRunners).toBe(2);
expect(result.totalRunners).toBe(2);
});
it('should fallback when all runners are busy', async () => {
const runners = createMockRunners([
{ name: 'runner-1', status: 'online', busy: true, labels: ['self-hosted'] },
{ name: 'runner-2', status: 'online', busy: true, labels: ['self-hosted'] },
]);
const mockRequest = jest.fn().mockResolvedValue({ status: 200, data: { runners } });
MockedOctokit.mockImplementation(() => ({ request: mockRequest } as any));
const result = await RunnerAvailabilityService.checkAvailability('owner', 'repo', 'token', [], 1);
expect(result.shouldFallback).toBe(true);
expect(result.idleRunners).toBe(0);
expect(result.matchingRunners).toBe(2);
});
it('should fallback when all runners are offline', async () => {
const runners = createMockRunners([
{ name: 'runner-1', status: 'offline', busy: false, labels: ['self-hosted'] },
]);
const mockRequest = jest.fn().mockResolvedValue({ status: 200, data: { runners } });
MockedOctokit.mockImplementation(() => ({ request: mockRequest } as any));
const result = await RunnerAvailabilityService.checkAvailability('owner', 'repo', 'token', [], 1);
expect(result.shouldFallback).toBe(true);
expect(result.idleRunners).toBe(0);
});
it('should filter runners by required labels', async () => {
const runners = createMockRunners([
{ name: 'linux-runner', status: 'online', busy: false, labels: ['self-hosted', 'linux'] },
{ name: 'windows-runner', status: 'online', busy: false, labels: ['self-hosted', 'windows'] },
]);
const mockRequest = jest.fn().mockResolvedValue({ status: 200, data: { runners } });
MockedOctokit.mockImplementation(() => ({ request: mockRequest } as any));
const result = await RunnerAvailabilityService.checkAvailability(
'owner',
'repo',
'token',
['self-hosted', 'linux'],
1,
);
expect(result.shouldFallback).toBe(false);
expect(result.matchingRunners).toBe(1);
expect(result.idleRunners).toBe(1);
expect(result.totalRunners).toBe(2);
});
it('should fallback when no runners match required labels', async () => {
const runners = createMockRunners([
{ name: 'windows-runner', status: 'online', busy: false, labels: ['self-hosted', 'windows'] },
]);
const mockRequest = jest.fn().mockResolvedValue({ status: 200, data: { runners } });
MockedOctokit.mockImplementation(() => ({ request: mockRequest } as any));
const result = await RunnerAvailabilityService.checkAvailability(
'owner',
'repo',
'token',
['self-hosted', 'linux'],
1,
);
expect(result.shouldFallback).toBe(true);
expect(result.matchingRunners).toBe(0);
expect(result.idleRunners).toBe(0);
});
it('should respect minAvailable threshold', async () => {
const runners = createMockRunners([{ name: 'runner-1', status: 'online', busy: false, labels: ['self-hosted'] }]);
const mockRequest = jest.fn().mockResolvedValue({ status: 200, data: { runners } });
MockedOctokit.mockImplementation(() => ({ request: mockRequest } as any));
// Need 2, have 1 — should fallback
const result = await RunnerAvailabilityService.checkAvailability('owner', 'repo', 'token', [], 2);
expect(result.shouldFallback).toBe(true);
expect(result.idleRunners).toBe(1);
});
it('should be case-insensitive for label matching', async () => {
const runners = createMockRunners([
{ name: 'runner-1', status: 'online', busy: false, labels: ['Self-Hosted', 'Linux'] },
]);
const mockRequest = jest.fn().mockResolvedValue({ status: 200, data: { runners } });
MockedOctokit.mockImplementation(() => ({ request: mockRequest } as any));
const result = await RunnerAvailabilityService.checkAvailability(
'owner',
'repo',
'token',
['self-hosted', 'linux'],
1,
);
expect(result.shouldFallback).toBe(false);
expect(result.matchingRunners).toBe(1);
});
it('should not fallback on API error (fail-open)', async () => {
const mockRequest = jest.fn().mockRejectedValue(new Error('403 Forbidden'));
MockedOctokit.mockImplementation(() => ({ request: mockRequest } as any));
const result = await RunnerAvailabilityService.checkAvailability('owner', 'repo', 'token', [], 1);
expect(result.shouldFallback).toBe(false);
expect(result.reason).toContain('Runner check failed');
});
it('should count only online+idle runners', async () => {
const runners = createMockRunners([
{ name: 'idle', status: 'online', busy: false, labels: ['self-hosted'] },
{ name: 'busy', status: 'online', busy: true, labels: ['self-hosted'] },
{ name: 'offline', status: 'offline', busy: false, labels: ['self-hosted'] },
]);
const mockRequest = jest.fn().mockResolvedValue({ status: 200, data: { runners } });
MockedOctokit.mockImplementation(() => ({ request: mockRequest } as any));
const result = await RunnerAvailabilityService.checkAvailability('owner', 'repo', 'token', [], 1);
expect(result.shouldFallback).toBe(false);
expect(result.totalRunners).toBe(3);
expect(result.matchingRunners).toBe(3);
expect(result.idleRunners).toBe(1);
});
});
describe('pagination limits', () => {
it('should stop paginating after reaching the page limit', async () => {
// Return full pages (100 runners each) to force continued pagination
let callCount = 0;
const mockRequest = jest.fn().mockImplementation(() => {
callCount++;
const runners = createMockRunners(
Array.from({ length: 100 }, (_, i) => ({
name: `runner-${callCount}-${i}`,
status: 'online' as const,
busy: false,
labels: ['self-hosted'],
})),
);
return Promise.resolve({ status: 200, data: { runners } });
});
MockedOctokit.mockImplementation(() => ({ request: mockRequest } as any));
const result = await RunnerAvailabilityService.checkAvailability('owner', 'repo', 'token', [], 1);
// Should have called at most 100 pages (the MAX_PAGINATION_PAGES limit)
expect(mockRequest).toHaveBeenCalledTimes(100);
// Should still have runners from the pages it did fetch
expect(result.totalRunners).toBe(10000);
expect(result.shouldFallback).toBe(false);
});
it('should stop paginating on rate limit (HTTP 403)', async () => {
let callCount = 0;
const mockRequest = jest.fn().mockImplementation(() => {
callCount++;
if (callCount === 2) {
// Octokit throws for non-2xx responses
const error: any = new Error('API rate limit exceeded');
error.status = 403;
error.response = {
status: 403,
headers: { 'x-ratelimit-reset': String(Math.floor(Date.now() / 1000) + 3600) },
};
return Promise.reject(error);
}
const runners = createMockRunners(
Array.from({ length: 100 }, (_, i) => ({
name: `runner-${i}`,
status: 'online' as const,
busy: false,
labels: ['self-hosted'],
})),
);
return Promise.resolve({ status: 200, data: { runners } });
});
MockedOctokit.mockImplementation(() => ({ request: mockRequest } as any));
const result = await RunnerAvailabilityService.checkAvailability('owner', 'repo', 'token', [], 1);
// Should have stopped at page 2 (rate limited)
expect(mockRequest).toHaveBeenCalledTimes(2);
// Should use the 100 runners from the first page
expect(result.totalRunners).toBe(100);
expect(result.shouldFallback).toBe(false);
});
it('should stop paginating on rate limit (HTTP 429)', async () => {
let callCount = 0;
const mockRequest = jest.fn().mockImplementation(() => {
callCount++;
if (callCount === 1) {
// Octokit throws for non-2xx responses
const error: any = new Error('Too Many Requests');
error.status = 429;
error.response = { status: 429, headers: {} };
return Promise.reject(error);
}
return Promise.resolve({ status: 200, data: { runners: [] } });
});
MockedOctokit.mockImplementation(() => ({ request: mockRequest } as any));
const result = await RunnerAvailabilityService.checkAvailability('owner', 'repo', 'token', [], 1);
// Should have stopped at first page (rate limited immediately)
expect(mockRequest).toHaveBeenCalledTimes(1);
// No runners found — should fallback
expect(result.totalRunners).toBe(0);
expect(result.shouldFallback).toBe(true);
});
it('should handle pagination timeout gracefully', async () => {
// Mock Date.now to simulate timeout
const originalDateNow = Date.now;
let callCount = 0;
const mockRequest = jest.fn().mockImplementation(() => {
callCount++;
// After first call, advance time past the timeout
if (callCount >= 2) {
Date.now = jest.fn(() => originalDateNow() + 31_000);
}
const runners = createMockRunners(
Array.from({ length: 100 }, (_, i) => ({
name: `runner-${callCount}-${i}`,
status: 'online' as const,
busy: false,
labels: ['self-hosted'],
})),
);
return Promise.resolve({ status: 200, data: { runners } });
});
MockedOctokit.mockImplementation(() => ({ request: mockRequest } as any));
const result = await RunnerAvailabilityService.checkAvailability('owner', 'repo', 'token', [], 1);
// Should have stopped after timeout was detected (2 pages: first succeeds, second triggers timeout check)
expect(mockRequest.mock.calls.length).toBeLessThanOrEqual(3);
// Should have runners from pages fetched before timeout
expect(result.totalRunners).toBeGreaterThan(0);
// Restore
Date.now = originalDateNow;
});
});
});

View File

@@ -0,0 +1,205 @@
import { Octokit } from '@octokit/core';
import OrchestratorLogger from './orchestrator-logger';
interface GitHubRunner {
id: number;
name: string;
status: 'online' | 'offline';
busy: boolean;
labels: Array<{ name: string }>;
}
interface RunnerCheckResult {
shouldFallback: boolean;
reason: string;
totalRunners: number;
matchingRunners: number;
idleRunners: number;
}
/**
* Maximum number of pages to fetch when paginating through GitHub API results.
* 100 pages * 100 per page = 10,000 runners maximum.
*/
const MAX_PAGINATION_PAGES = 100;
/**
* Total timeout in milliseconds for the pagination loop.
* Prevents indefinite API calls if GitHub is slow or pagination is unexpectedly deep.
*/
const PAGINATION_TIMEOUT_MS = 30_000;
/**
* Checks GitHub Actions runner availability to support automatic provider fallback.
*
* When a user configures `runnerCheckEnabled: true` with a `fallbackProviderStrategy`,
* this service queries the GitHub API for runner status before the build starts.
* If insufficient runners are available, the orchestrator routes to the fallback provider.
*/
export class RunnerAvailabilityService {
/**
* Check if enough runners are available to handle the build.
*
* @param owner - GitHub repository owner
* @param repo - GitHub repository name
* @param token - GitHub token with repo/actions scope
* @param requiredLabels - Labels runners must have (empty = any runner)
* @param minAvailable - Minimum idle runners required
* @returns RunnerCheckResult with decision and diagnostics
*/
static async checkAvailability(
owner: string,
repo: string,
token: string,
requiredLabels: string[],
minAvailable: number,
): Promise<RunnerCheckResult> {
if (!token) {
return {
shouldFallback: false,
reason: 'No GitHub token available — skipping runner check',
totalRunners: 0,
matchingRunners: 0,
idleRunners: 0,
};
}
try {
const octokit = new Octokit({ auth: token });
// Fetch all runners for the repository
const runners = await RunnerAvailabilityService.fetchRunners(octokit, owner, repo);
if (runners.length === 0) {
return {
shouldFallback: true,
reason: 'No runners registered for this repository',
totalRunners: 0,
matchingRunners: 0,
idleRunners: 0,
};
}
// Filter by required labels
const matching = RunnerAvailabilityService.filterByLabels(runners, requiredLabels);
// Count idle (online + not busy)
const idle = matching.filter((r) => r.status === 'online' && !r.busy);
const result: RunnerCheckResult = {
shouldFallback: idle.length < minAvailable,
reason:
idle.length >= minAvailable
? `${idle.length} idle runner(s) available (need ${minAvailable})`
: `Only ${idle.length} idle runner(s) available, need ${minAvailable}`,
totalRunners: runners.length,
matchingRunners: matching.length,
idleRunners: idle.length,
};
return result;
} catch (error: any) {
// If the API call fails (permissions, rate limit, etc.), don't block the build
OrchestratorLogger.log(`Runner availability check failed: ${error.message}`);
return {
shouldFallback: false,
reason: `Runner check failed (${error.message}) — proceeding with primary provider`,
totalRunners: 0,
matchingRunners: 0,
idleRunners: 0,
};
}
}
/**
* Fetch all runners for a repository, handling pagination.
*
* Includes defensive limits:
* - Maximum page count (MAX_PAGINATION_PAGES) to prevent infinite loops
* - Total timeout (PAGINATION_TIMEOUT_MS) to prevent indefinite API calls
* - Rate-limit detection (HTTP 403/429 with X-RateLimit-Remaining header)
*/
private static async fetchRunners(octokit: Octokit, owner: string, repo: string): Promise<GitHubRunner[]> {
const allRunners: GitHubRunner[] = [];
let page = 1;
const perPage = 100;
const startTime = Date.now();
while (page <= MAX_PAGINATION_PAGES) {
// Check total timeout
if (Date.now() - startTime > PAGINATION_TIMEOUT_MS) {
OrchestratorLogger.logWarning(
`[RunnerAvailability] Pagination timeout reached after ${page - 1} pages and ${Date.now() - startTime}ms. ` +
`Using ${allRunners.length} runners found so far.`,
);
break;
}
let response: any;
try {
response = await octokit.request('GET /repos/{owner}/{repo}/actions/runners', {
owner,
repo,
per_page: perPage,
page,
});
} catch (requestError: any) {
// Octokit throws for non-2xx responses. Check if this is a rate limit error.
const status = requestError.status ?? requestError.response?.status;
if (status === 403 || status === 429) {
const resetTime =
requestError.response?.headers?.['x-ratelimit-reset'] ?? requestError.headers?.['x-ratelimit-reset'];
const resetMessage = resetTime
? ` Resets at ${new Date(Number.parseInt(String(resetTime), 10) * 1000).toISOString()}`
: '';
OrchestratorLogger.logWarning(
`[RunnerAvailability] GitHub API rate limit reached (HTTP ${status}).${resetMessage} ` +
`Using ${allRunners.length} runners found so far.`,
);
break;
}
// Re-throw non-rate-limit errors to be handled by the outer catch
throw requestError;
}
const runners = (response.data.runners || []) as GitHubRunner[];
allRunners.push(...runners);
if (runners.length < perPage) break;
page++;
}
if (page > MAX_PAGINATION_PAGES) {
OrchestratorLogger.logWarning(
`[RunnerAvailability] Maximum pagination limit reached (${MAX_PAGINATION_PAGES} pages). ` +
`Using ${allRunners.length} runners found so far.`,
);
}
if (allRunners.length === 0) {
OrchestratorLogger.log(
'[RunnerAvailability] No runners found. Possible causes: ' +
'wrong token permissions (needs repo or actions scope), ' +
'no self-hosted runners registered, ' +
'or runners are registered at the organization level instead of the repository.',
);
}
return allRunners;
}
/**
* Filter runners by required labels. A runner matches if it has ALL required labels.
* If requiredLabels is empty, all runners match.
*/
private static filterByLabels(runners: GitHubRunner[], requiredLabels: string[]): GitHubRunner[] {
if (requiredLabels.length === 0) return runners;
return runners.filter((runner) => {
const runnerLabelNames = runner.labels.map((l) => l.name.toLowerCase());
return requiredLabels.every((required) => runnerLabelNames.includes(required.toLowerCase()));
});
}
}

View File

@@ -30,7 +30,7 @@ describe('Orchestrator Caching', () => {
targetPlatform: 'StandaloneLinux64',
cacheKey: `test-case-${uuidv4()}`,
containerHookFiles: `debug-cache`,
orchestratorBranch: `orchestrator-develop`,
orchestratorBranch: `main`,
orchestratorDebug: true,
};

View File

@@ -33,8 +33,7 @@ if [ -n "$(git ls-remote --heads "$REPO" "$BRANCH" 2>/dev/null)" ]; then
git clone -q -b "$BRANCH" "$REPO" /builder
else
echo "Remote branch $BRANCH not found in $REPO; falling back to a known branch"
git clone -q -b orchestrator-develop "$REPO" /builder \
|| git clone -q -b main "$REPO" /builder \
git clone -q -b main "$REPO" /builder \
|| git clone -q "$REPO" /builder
fi
git clone -q -b ${Orchestrator.buildParameters.branch} ${OrchestratorFolders.targetBuildRepoUrl} /repo

View File

@@ -99,8 +99,7 @@ if [ -n "$(git ls-remote --heads "$REPO" "$BRANCH" 2>/dev/null)" ]; then
git clone -q -b "$BRANCH" "$REPO" "$DEST"
else
echo "Remote branch $BRANCH not found in $REPO; falling back to a known branch"
git clone -q -b orchestrator-develop "$REPO" "$DEST" \
|| git clone -q -b main "$REPO" "$DEST" \
git clone -q -b main "$REPO" "$DEST" \
|| git clone -q "$REPO" "$DEST"
fi
chmod +x ${builderPath}`;