feat(orchestrator): automatic provider fallback with runner availability check

Adds built-in load balancing: check GitHub runner availability before builds start, auto-route to a fallback provider when runners are busy or offline. Eliminates the need for a separate check-runner job. New inputs: fallbackProviderStrategy, runnerCheckEnabled, runnerCheckLabels, runnerCheckMinAvailable. Outputs providerFallbackUsed and providerFallbackReason for workflow visibility. Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
2026-07-31 02:53:57 -07:00 · 2026-03-05 07:39:23 +00:00
parent 9d475434d3
commit 786ee3799c
7 changed files with 386 additions and 1 deletions
@@ -194,6 +194,30 @@ inputs:
    description:
      '[Orchestrator] Either local, k8s or aws can be used to run builds on a remote cluster. Additional parameters must
      be configured.'
+  fallbackProviderStrategy:
+    default: ''
+    required: false
+    description:
+      '[Orchestrator] Fallback provider when the primary is unavailable. Used with runnerCheckEnabled for automatic
+      failover, or as a catch-all if the primary provider fails to initialize.'
+  runnerCheckEnabled:
+    default: 'false'
+    required: false
+    description:
+      '[Orchestrator] Check GitHub Actions runner availability before starting a build. When no suitable runners are
+      available and fallbackProviderStrategy is set, automatically routes to the fallback provider.'
+  runnerCheckLabels:
+    default: ''
+    required: false
+    description:
+      '[Orchestrator] Comma-separated runner labels to filter when checking availability (e.g. self-hosted,linux).
+      When empty, checks all runners in the repository.'
+  runnerCheckMinAvailable:
+    default: '1'
+    required: false
+    description:
+      '[Orchestrator] Minimum number of idle runners required for the primary provider. If fewer are available,
+      routes to fallbackProviderStrategy.'
  resourceTracking:
    default: 'false'
    required: false
@@ -327,6 +327,10 @@ class BuildParameters {
            containerRegistryRepository: input_1.default.containerRegistryRepository,
            containerRegistryImageVersion: input_1.default.containerRegistryImageVersion,
            providerStrategy: orchestrator_options_1.default.providerStrategy,
+            fallbackProviderStrategy: orchestrator_options_1.default.fallbackProviderStrategy,
+            runnerCheckEnabled: orchestrator_options_1.default.runnerCheckEnabled,
+            runnerCheckLabels: orchestrator_options_1.default.runnerCheckLabels,
+            runnerCheckMinAvailable: orchestrator_options_1.default.runnerCheckMinAvailable,
            buildPlatform: orchestrator_options_1.default.buildPlatform,
            kubeConfig: orchestrator_options_1.default.kubeConfig,
            containerMemory: orchestrator_options_1.default.containerMemory,
@@ -2204,6 +2208,19 @@ class OrchestratorOptions {
        }
        return provider || 'local';
    }
+    static get fallbackProviderStrategy() {
+        return OrchestratorOptions.getInput('fallbackProviderStrategy') || '';
+    }
+    static get runnerCheckEnabled() {
+        return OrchestratorOptions.getInput('runnerCheckEnabled') === 'true';
+    }
+    static get runnerCheckLabels() {
+        const labels = OrchestratorOptions.getInput('runnerCheckLabels');
+        return labels ? labels.split(',').map((l) => l.trim()) : [];
+    }
+    static get runnerCheckMinAvailable() {
+        return Number(OrchestratorOptions.getInput('runnerCheckMinAvailable')) || 1;
+    }
    static get containerCpu() {
        return OrchestratorOptions.getInput('containerCpu') || `1024`;
    }
@@ -2508,6 +2525,7 @@ const follow_log_stream_service_1 = __nccwpck_require__(36149);
 const orchestrator_result_1 = __importDefault(__nccwpck_require__(86819));
 const orchestrator_options_1 = __importDefault(__nccwpck_require__(82473));
 const resource_tracking_1 = __importDefault(__nccwpck_require__(42604));
+const runner_availability_service_1 = __nccwpck_require__(18876);
 class Orchestrator {
    static get isOrchestratorEnvironment() {
        return process.env[`GITHUB_ACTIONS`] !== `true`;
@@ -2544,6 +2562,26 @@ class Orchestrator {
    }
    static async setupSelectedBuildPlatform() {
        orchestrator_logger_1.default.log(`Orchestrator platform selected ${Orchestrator.buildParameters.providerStrategy}`);
+        // Check runner availability and apply fallback if needed
+        if (Orchestrator.buildParameters.runnerCheckEnabled && Orchestrator.buildParameters.fallbackProviderStrategy) {
+            const owner = orchestrator_options_1.default.githubOwner;
+            const repo = orchestrator_options_1.default.githubRepoName;
+            const token = Orchestrator.buildParameters.gitPrivateToken || process.env.GITHUB_TOKEN || '';
+            orchestrator_logger_1.default.log(`Checking runner availability (labels: [${Orchestrator.buildParameters.runnerCheckLabels.join(', ')}], min: ${Orchestrator.buildParameters.runnerCheckMinAvailable})`);
+            const result = await runner_availability_service_1.RunnerAvailabilityService.checkAvailability(owner, repo, token, Orchestrator.buildParameters.runnerCheckLabels, Orchestrator.buildParameters.runnerCheckMinAvailable);
+            orchestrator_logger_1.default.log(`Runner check: ${result.totalRunners} total, ${result.matchingRunners} matching, ${result.idleRunners} idle — ${result.reason}`);
+            if (result.shouldFallback) {
+                const original = Orchestrator.buildParameters.providerStrategy;
+                const fallback = Orchestrator.buildParameters.fallbackProviderStrategy;
+                orchestrator_logger_1.default.log(`Falling back from '${original}' to '${fallback}' — ${result.reason}`);
+                Orchestrator.buildParameters.providerStrategy = fallback;
+                core.setOutput('providerFallbackUsed', 'true');
+                core.setOutput('providerFallbackReason', result.reason);
+            }
+            else {
+                core.setOutput('providerFallbackUsed', 'false');
+            }
+        }
        // Detect LocalStack endpoints and handle AWS provider appropriately
        // AWS_FORCE_PROVIDER options:
        //   - 'aws': Force AWS provider (requires LocalStack Pro with ECS support)
@@ -8639,6 +8677,126 @@ class ResourceTracking {
 exports["default"] = ResourceTracking;


+/***/ }),
+
+/***/ 18876:
+/***/ (function(__unused_webpack_module, exports, __nccwpck_require__) {
+
+"use strict";
+
+var __importDefault = (this && this.__importDefault) || function (mod) {
+    return (mod && mod.__esModule) ? mod : { "default": mod };
+};
+Object.defineProperty(exports, "__esModule", ({ value: true }));
+exports.RunnerAvailabilityService = void 0;
+const core_1 = __nccwpck_require__(76762);
+const orchestrator_logger_1 = __importDefault(__nccwpck_require__(32549));
+/**
+ * Checks GitHub Actions runner availability to support automatic provider fallback.
+ *
+ * When a user configures `runnerCheckEnabled: true` with a `fallbackProviderStrategy`,
+ * this service queries the GitHub API for runner status before the build starts.
+ * If insufficient runners are available, the orchestrator routes to the fallback provider.
+ */
+class RunnerAvailabilityService {
+    /**
+     * Check if enough runners are available to handle the build.
+     *
+     * @param owner - GitHub repository owner
+     * @param repo - GitHub repository name
+     * @param token - GitHub token with repo/actions scope
+     * @param requiredLabels - Labels runners must have (empty = any runner)
+     * @param minAvailable - Minimum idle runners required
+     * @returns RunnerCheckResult with decision and diagnostics
+     */
+    static async checkAvailability(owner, repo, token, requiredLabels, minAvailable) {
+        if (!token) {
+            return {
+                shouldFallback: false,
+                reason: 'No GitHub token available — skipping runner check',
+                totalRunners: 0,
+                matchingRunners: 0,
+                idleRunners: 0,
+            };
+        }
+        try {
+            const octokit = new core_1.Octokit({ auth: token });
+            // Fetch all runners for the repository
+            const runners = await RunnerAvailabilityService.fetchRunners(octokit, owner, repo);
+            if (runners.length === 0) {
+                return {
+                    shouldFallback: true,
+                    reason: 'No runners registered for this repository',
+                    totalRunners: 0,
+                    matchingRunners: 0,
+                    idleRunners: 0,
+                };
+            }
+            // Filter by required labels
+            const matching = RunnerAvailabilityService.filterByLabels(runners, requiredLabels);
+            // Count idle (online + not busy)
+            const idle = matching.filter((r) => r.status === 'online' && !r.busy);
+            const result = {
+                shouldFallback: idle.length < minAvailable,
+                reason: idle.length >= minAvailable
+                    ? `${idle.length} idle runner(s) available (need ${minAvailable})`
+                    : `Only ${idle.length} idle runner(s) available, need ${minAvailable}`,
+                totalRunners: runners.length,
+                matchingRunners: matching.length,
+                idleRunners: idle.length,
+            };
+            return result;
+        }
+        catch (error) {
+            // If the API call fails (permissions, rate limit, etc.), don't block the build
+            orchestrator_logger_1.default.log(`Runner availability check failed: ${error.message}`);
+            return {
+                shouldFallback: false,
+                reason: `Runner check failed (${error.message}) — proceeding with primary provider`,
+                totalRunners: 0,
+                matchingRunners: 0,
+                idleRunners: 0,
+            };
+        }
+    }
+    /**
+     * Fetch all runners for a repository, handling pagination.
+     */
+    static async fetchRunners(octokit, owner, repo) {
+        const allRunners = [];
+        let page = 1;
+        const perPage = 100;
+        while (true) {
+            const response = await octokit.request('GET /repos/{owner}/{repo}/actions/runners', {
+                owner,
+                repo,
+                per_page: perPage,
+                page,
+            });
+            const runners = (response.data.runners || []);
+            allRunners.push(...runners);
+            if (runners.length < perPage)
+                break;
+            page++;
+        }
+        return allRunners;
+    }
+    /**
+     * Filter runners by required labels. A runner matches if it has ALL required labels.
+     * If requiredLabels is empty, all runners match.
+     */
+    static filterByLabels(runners, requiredLabels) {
+        if (requiredLabels.length === 0)
+            return runners;
+        return runners.filter((runner) => {
+            const runnerLabelNames = runner.labels.map((l) => l.name.toLowerCase());
+            return requiredLabels.every((required) => runnerLabelNames.includes(required.toLowerCase()));
+        });
+    }
+}
+exports.RunnerAvailabilityService = RunnerAvailabilityService;
+
+
 /***/ }),

 /***/ 54222:
@@ -54,6 +54,10 @@ class BuildParameters {
  public sshAgent!: string;
  public sshPublicKeysDirectoryPath!: string;
  public providerStrategy!: string;
+  public fallbackProviderStrategy!: string;
+  public runnerCheckEnabled!: boolean;
+  public runnerCheckLabels!: string[];
+  public runnerCheckMinAvailable!: number;
  public gitPrivateToken!: string;
  public awsStackName!: string;
  public awsEndpoint?: string;
@@ -194,6 +198,10 @@ class BuildParameters {
      containerRegistryRepository: Input.containerRegistryRepository,
      containerRegistryImageVersion: Input.containerRegistryImageVersion,
      providerStrategy: OrchestratorOptions.providerStrategy,
+      fallbackProviderStrategy: OrchestratorOptions.fallbackProviderStrategy,
+      runnerCheckEnabled: OrchestratorOptions.runnerCheckEnabled,
+      runnerCheckLabels: OrchestratorOptions.runnerCheckLabels,
+      runnerCheckMinAvailable: OrchestratorOptions.runnerCheckMinAvailable,
      buildPlatform: OrchestratorOptions.buildPlatform,
      kubeConfig: OrchestratorOptions.kubeConfig,
      containerMemory: OrchestratorOptions.containerMemory,
@@ -138,6 +138,24 @@ class OrchestratorOptions {
    return provider || 'local';
  }

+  static get fallbackProviderStrategy(): string {
+    return OrchestratorOptions.getInput('fallbackProviderStrategy') || '';
+  }
+
+  static get runnerCheckEnabled(): boolean {
+    return OrchestratorOptions.getInput('runnerCheckEnabled') === 'true';
+  }
+
+  static get runnerCheckLabels(): string[] {
+    const labels = OrchestratorOptions.getInput('runnerCheckLabels');
+
+    return labels ? labels.split(',').map((l) => l.trim()) : [];
+  }
+
+  static get runnerCheckMinAvailable(): number {
+    return Number(OrchestratorOptions.getInput('runnerCheckMinAvailable')) || 1;
+  }
+
  static get containerCpu(): string {
    return OrchestratorOptions.getInput('containerCpu') || `1024`;
  }
@@ -20,6 +20,7 @@ import { FollowLogStreamService } from './services/core/follow-log-stream-servic
 import OrchestratorResult from './services/core/orchestrator-result';
 import OrchestratorOptions from './options/orchestrator-options';
 import ResourceTracking from './services/core/resource-tracking';
+import { RunnerAvailabilityService } from './services/core/runner-availability-service';

 class Orchestrator {
  public static Provider: ProviderInterface;
@@ -76,6 +77,40 @@ class Orchestrator {
  private static async setupSelectedBuildPlatform() {
    OrchestratorLogger.log(`Orchestrator platform selected ${Orchestrator.buildParameters.providerStrategy}`);

+    // Check runner availability and apply fallback if needed
+    if (Orchestrator.buildParameters.runnerCheckEnabled && Orchestrator.buildParameters.fallbackProviderStrategy) {
+      const owner = OrchestratorOptions.githubOwner;
+      const repo = OrchestratorOptions.githubRepoName;
+      const token = Orchestrator.buildParameters.gitPrivateToken || process.env.GITHUB_TOKEN || '';
+
+      OrchestratorLogger.log(
+        `Checking runner availability (labels: [${Orchestrator.buildParameters.runnerCheckLabels.join(', ')}], min: ${Orchestrator.buildParameters.runnerCheckMinAvailable})`,
+      );
+
+      const result = await RunnerAvailabilityService.checkAvailability(
+        owner,
+        repo,
+        token,
+        Orchestrator.buildParameters.runnerCheckLabels,
+        Orchestrator.buildParameters.runnerCheckMinAvailable,
+      );
+
+      OrchestratorLogger.log(
+        `Runner check: ${result.totalRunners} total, ${result.matchingRunners} matching, ${result.idleRunners} idle — ${result.reason}`,
+      );
+
+      if (result.shouldFallback) {
+        const original = Orchestrator.buildParameters.providerStrategy;
+        const fallback = Orchestrator.buildParameters.fallbackProviderStrategy;
+        OrchestratorLogger.log(`Falling back from '${original}' to '${fallback}' — ${result.reason}`);
+        Orchestrator.buildParameters.providerStrategy = fallback;
+        core.setOutput('providerFallbackUsed', 'true');
+        core.setOutput('providerFallbackReason', result.reason);
+      } else {
+        core.setOutput('providerFallbackUsed', 'false');
+      }
+    }
+
    // Detect LocalStack endpoints and handle AWS provider appropriately
    // AWS_FORCE_PROVIDER options:
    //   - 'aws': Force AWS provider (requires LocalStack Pro with ECS support)
@@ -0,0 +1,142 @@
+import { Octokit } from '@octokit/core';
+import OrchestratorLogger from './orchestrator-logger';
+
+interface GitHubRunner {
+  id: number;
+  name: string;
+  status: 'online' | 'offline';
+  busy: boolean;
+  labels: Array<{ name: string }>;
+}
+
+interface RunnerCheckResult {
+  shouldFallback: boolean;
+  reason: string;
+  totalRunners: number;
+  matchingRunners: number;
+  idleRunners: number;
+}
+
+/**
+ * Checks GitHub Actions runner availability to support automatic provider fallback.
+ *
+ * When a user configures `runnerCheckEnabled: true` with a `fallbackProviderStrategy`,
+ * this service queries the GitHub API for runner status before the build starts.
+ * If insufficient runners are available, the orchestrator routes to the fallback provider.
+ */
+export class RunnerAvailabilityService {
+  /**
+   * Check if enough runners are available to handle the build.
+   *
+   * @param owner - GitHub repository owner
+   * @param repo - GitHub repository name
+   * @param token - GitHub token with repo/actions scope
+   * @param requiredLabels - Labels runners must have (empty = any runner)
+   * @param minAvailable - Minimum idle runners required
+   * @returns RunnerCheckResult with decision and diagnostics
+   */
+  static async checkAvailability(
+    owner: string,
+    repo: string,
+    token: string,
+    requiredLabels: string[],
+    minAvailable: number,
+  ): Promise<RunnerCheckResult> {
+    if (!token) {
+      return {
+        shouldFallback: false,
+        reason: 'No GitHub token available — skipping runner check',
+        totalRunners: 0,
+        matchingRunners: 0,
+        idleRunners: 0,
+      };
+    }
+
+    try {
+      const octokit = new Octokit({ auth: token });
+
+      // Fetch all runners for the repository
+      const runners = await RunnerAvailabilityService.fetchRunners(octokit, owner, repo);
+
+      if (runners.length === 0) {
+        return {
+          shouldFallback: true,
+          reason: 'No runners registered for this repository',
+          totalRunners: 0,
+          matchingRunners: 0,
+          idleRunners: 0,
+        };
+      }
+
+      // Filter by required labels
+      const matching = RunnerAvailabilityService.filterByLabels(runners, requiredLabels);
+
+      // Count idle (online + not busy)
+      const idle = matching.filter((r) => r.status === 'online' && !r.busy);
+
+      const result: RunnerCheckResult = {
+        shouldFallback: idle.length < minAvailable,
+        reason:
+          idle.length >= minAvailable
+            ? `${idle.length} idle runner(s) available (need ${minAvailable})`
+            : `Only ${idle.length} idle runner(s) available, need ${minAvailable}`,
+        totalRunners: runners.length,
+        matchingRunners: matching.length,
+        idleRunners: idle.length,
+      };
+
+      return result;
+    } catch (error: any) {
+      // If the API call fails (permissions, rate limit, etc.), don't block the build
+      OrchestratorLogger.log(`Runner availability check failed: ${error.message}`);
+
+      return {
+        shouldFallback: false,
+        reason: `Runner check failed (${error.message}) — proceeding with primary provider`,
+        totalRunners: 0,
+        matchingRunners: 0,
+        idleRunners: 0,
+      };
+    }
+  }
+
+  /**
+   * Fetch all runners for a repository, handling pagination.
+   */
+  private static async fetchRunners(octokit: Octokit, owner: string, repo: string): Promise<GitHubRunner[]> {
+    const allRunners: GitHubRunner[] = [];
+    let page = 1;
+    const perPage = 100;
+
+    while (true) {
+      const response = await octokit.request('GET /repos/{owner}/{repo}/actions/runners', {
+        owner,
+        repo,
+        per_page: perPage,
+        page,
+      });
+
+      const runners = (response.data.runners || []) as GitHubRunner[];
+      allRunners.push(...runners);
+
+      if (runners.length < perPage) break;
+      page++;
+    }
+
+    return allRunners;
+  }
+
+  /**
+   * Filter runners by required labels. A runner matches if it has ALL required labels.
+   * If requiredLabels is empty, all runners match.
+   */
+  private static filterByLabels(runners: GitHubRunner[], requiredLabels: string[]): GitHubRunner[] {
+    if (requiredLabels.length === 0) return runners;
+
+    return runners.filter((runner) => {
+      const runnerLabelNames = runner.labels.map((l) => l.name.toLowerCase());
+
+      return requiredLabels.every((required) => runnerLabelNames.includes(required.toLowerCase()));
+    });
+  }
+}