Merge remote-tracking branch 'origin/feature/provider-load-balancing' into release/lts-infrastructure

# Conflicts: # dist/index.js.map # src/model/build-parameters.ts # src/model/orchestrator/options/orchestrator-options.ts
2026-08-03 04:23:56 -07:00 · 2026-03-05 23:30:45 +00:00
parent 3e1547170b e9c247f04f
commit 7307bea200
8 changed files with 949 additions and 8 deletions
@@ -357,6 +357,12 @@ class BuildParameters {
            containerRegistryRepository: input_1.default.containerRegistryRepository,
            containerRegistryImageVersion: input_1.default.containerRegistryImageVersion,
            providerStrategy: orchestrator_options_1.default.providerStrategy,
+            fallbackProviderStrategy: orchestrator_options_1.default.fallbackProviderStrategy,
+            runnerCheckEnabled: orchestrator_options_1.default.runnerCheckEnabled,
+            runnerCheckLabels: orchestrator_options_1.default.runnerCheckLabels,
+            runnerCheckMinAvailable: orchestrator_options_1.default.runnerCheckMinAvailable,
+            retryOnFallback: orchestrator_options_1.default.retryOnFallback,
+            providerInitTimeout: orchestrator_options_1.default.providerInitTimeout,
            buildPlatform: orchestrator_options_1.default.buildPlatform,
            kubeConfig: orchestrator_options_1.default.kubeConfig,
            containerMemory: orchestrator_options_1.default.containerMemory,
@@ -2262,6 +2268,25 @@ class OrchestratorOptions {
        }
        return provider || 'local';
    }
+    static get fallbackProviderStrategy() {
+        return OrchestratorOptions.getInput('fallbackProviderStrategy') || '';
+    }
+    static get runnerCheckEnabled() {
+        return OrchestratorOptions.getInput('runnerCheckEnabled') === 'true';
+    }
+    static get runnerCheckLabels() {
+        const labels = OrchestratorOptions.getInput('runnerCheckLabels');
+        return labels ? labels.split(',').map((l) => l.trim()) : [];
+    }
+    static get runnerCheckMinAvailable() {
+        return Number(OrchestratorOptions.getInput('runnerCheckMinAvailable')) || 1;
+    }
+    static get retryOnFallback() {
+        return OrchestratorOptions.getInput('retryOnFallback') === 'true';
+    }
+    static get providerInitTimeout() {
+        return Number(OrchestratorOptions.getInput('providerInitTimeout')) || 0;
+    }
    static get containerCpu() {
        return OrchestratorOptions.getInput('containerCpu') || `1024`;
    }
@@ -2566,6 +2591,7 @@ const follow_log_stream_service_1 = __nccwpck_require__(36149);
 const orchestrator_result_1 = __importDefault(__nccwpck_require__(86819));
 const orchestrator_options_1 = __importDefault(__nccwpck_require__(82473));
 const resource_tracking_1 = __importDefault(__nccwpck_require__(42604));
+const runner_availability_service_1 = __nccwpck_require__(18876);
 class Orchestrator {
    static get isOrchestratorEnvironment() {
        return process.env[`GITHUB_ACTIONS`] !== `true`;
@@ -2602,6 +2628,26 @@ class Orchestrator {
    }
    static async setupSelectedBuildPlatform() {
        orchestrator_logger_1.default.log(`Orchestrator platform selected ${Orchestrator.buildParameters.providerStrategy}`);
+        // Check runner availability and apply fallback if needed
+        if (Orchestrator.buildParameters.runnerCheckEnabled && Orchestrator.buildParameters.fallbackProviderStrategy) {
+            const owner = orchestrator_options_1.default.githubOwner;
+            const repo = orchestrator_options_1.default.githubRepoName;
+            const token = Orchestrator.buildParameters.gitPrivateToken || process.env.GITHUB_TOKEN || '';
+            orchestrator_logger_1.default.log(`Checking runner availability (labels: [${Orchestrator.buildParameters.runnerCheckLabels.join(', ')}], min: ${Orchestrator.buildParameters.runnerCheckMinAvailable})`);
+            const result = await runner_availability_service_1.RunnerAvailabilityService.checkAvailability(owner, repo, token, Orchestrator.buildParameters.runnerCheckLabels, Orchestrator.buildParameters.runnerCheckMinAvailable);
+            orchestrator_logger_1.default.log(`Runner check: ${result.totalRunners} total, ${result.matchingRunners} matching, ${result.idleRunners} idle — ${result.reason}`);
+            if (result.shouldFallback) {
+                const original = Orchestrator.buildParameters.providerStrategy;
+                const fallback = Orchestrator.buildParameters.fallbackProviderStrategy;
+                orchestrator_logger_1.default.log(`Falling back from '${original}' to '${fallback}' — ${result.reason}`);
+                Orchestrator.buildParameters.providerStrategy = fallback;
+                core.setOutput('providerFallbackUsed', 'true');
+                core.setOutput('providerFallbackReason', result.reason);
+            }
+            else {
+                core.setOutput('providerFallbackUsed', 'false');
+            }
+        }
        // Detect LocalStack endpoints and handle AWS provider appropriately
        // AWS_FORCE_PROVIDER options:
        //   - 'aws': Force AWS provider (requires LocalStack Pro with ECS support)
@@ -2700,13 +2746,33 @@ class Orchestrator {
        if (baseImage.includes(`undefined`)) {
            throw new Error(`baseImage is undefined`);
        }
+        try {
+            return await Orchestrator.runWithProvider(buildParameters, baseImage);
+        }
+        catch (primaryError) {
+            // Retry on fallback provider if enabled and a fallback is configured
+            const fallback = buildParameters.fallbackProviderStrategy;
+            const alreadyOnFallback = buildParameters.providerStrategy === fallback;
+            if (buildParameters.retryOnFallback && fallback && !alreadyOnFallback) {
+                orchestrator_logger_1.default.log(`Primary provider '${buildParameters.providerStrategy}' failed: ${primaryError.message}`);
+                orchestrator_logger_1.default.log(`Retrying build on fallback provider '${fallback}'...`);
+                buildParameters.providerStrategy = fallback;
+                core.setOutput('providerFallbackUsed', 'true');
+                core.setOutput('providerFallbackReason', `Primary provider failed: ${primaryError.message}`);
+                return await Orchestrator.runWithProvider(buildParameters, baseImage);
+            }
+            throw primaryError;
+        }
+    }
+    static async runWithProvider(buildParameters, baseImage) {
        await Orchestrator.setup(buildParameters);
        // When aws-local mode is enabled, validate AWS CloudFormation templates
        // This ensures AWS templates are correct even when executing via local-docker
        if (Orchestrator.validateAwsTemplates) {
            await Orchestrator.validateAwsCloudFormationTemplates();
        }
-        await Orchestrator.Provider.setupWorkflow(Orchestrator.buildParameters.buildGuid, Orchestrator.buildParameters, Orchestrator.buildParameters.branch, Orchestrator.defaultSecrets);
+        // Setup workflow with optional init timeout
+        await Orchestrator.setupWorkflowWithTimeout();
        try {
            if (buildParameters.maxRetainedWorkspaces > 0) {
                Orchestrator.lockedWorkspace = shared_workspace_locking_1.default.NewWorkspaceName();
@@ -2756,6 +2822,24 @@ class Orchestrator {
            throw error;
        }
    }
+    /**
+     * Runs setupWorkflow with an optional timeout. If providerInitTimeout is set and the
+     * provider takes longer than that to initialize, throws an error that triggers
+     * retry-on-fallback (if enabled).
+     */
+    static async setupWorkflowWithTimeout() {
+        const timeoutSeconds = Orchestrator.buildParameters.providerInitTimeout;
+        const setupPromise = Orchestrator.Provider.setupWorkflow(Orchestrator.buildParameters.buildGuid, Orchestrator.buildParameters, Orchestrator.buildParameters.branch, Orchestrator.defaultSecrets);
+        if (timeoutSeconds <= 0) {
+            await setupPromise;
+            return;
+        }
+        orchestrator_logger_1.default.log(`Provider init timeout: ${timeoutSeconds}s`);
+        const timeoutPromise = new Promise((_, reject) => {
+            setTimeout(() => reject(new Error(`Provider initialization timed out after ${timeoutSeconds}s`)), timeoutSeconds * 1000);
+        });
+        await Promise.race([setupPromise, timeoutPromise]);
+    }
    static async updateStatusWithBuildParameters() {
        const content = { ...Orchestrator.buildParameters };
        content.gitPrivateToken = ``;
@@ -8697,6 +8781,176 @@ class ResourceTracking {
 exports["default"] = ResourceTracking;


+/***/ }),
+
+/***/ 18876:
+/***/ (function(__unused_webpack_module, exports, __nccwpck_require__) {
+
+"use strict";
+
+var __importDefault = (this && this.__importDefault) || function (mod) {
+    return (mod && mod.__esModule) ? mod : { "default": mod };
+};
+Object.defineProperty(exports, "__esModule", ({ value: true }));
+exports.RunnerAvailabilityService = void 0;
+const core_1 = __nccwpck_require__(76762);
+const orchestrator_logger_1 = __importDefault(__nccwpck_require__(32549));
+/**
+ * Maximum number of pages to fetch when paginating through GitHub API results.
+ * 100 pages * 100 per page = 10,000 runners maximum.
+ */
+const MAX_PAGINATION_PAGES = 100;
+/**
+ * Total timeout in milliseconds for the pagination loop.
+ * Prevents indefinite API calls if GitHub is slow or pagination is unexpectedly deep.
+ */
+const PAGINATION_TIMEOUT_MS = 30000;
+/**
+ * Checks GitHub Actions runner availability to support automatic provider fallback.
+ *
+ * When a user configures `runnerCheckEnabled: true` with a `fallbackProviderStrategy`,
+ * this service queries the GitHub API for runner status before the build starts.
+ * If insufficient runners are available, the orchestrator routes to the fallback provider.
+ */
+class RunnerAvailabilityService {
+    /**
+     * Check if enough runners are available to handle the build.
+     *
+     * @param owner - GitHub repository owner
+     * @param repo - GitHub repository name
+     * @param token - GitHub token with repo/actions scope
+     * @param requiredLabels - Labels runners must have (empty = any runner)
+     * @param minAvailable - Minimum idle runners required
+     * @returns RunnerCheckResult with decision and diagnostics
+     */
+    static async checkAvailability(owner, repo, token, requiredLabels, minAvailable) {
+        if (!token) {
+            return {
+                shouldFallback: false,
+                reason: 'No GitHub token available — skipping runner check',
+                totalRunners: 0,
+                matchingRunners: 0,
+                idleRunners: 0,
+            };
+        }
+        try {
+            const octokit = new core_1.Octokit({ auth: token });
+            // Fetch all runners for the repository
+            const runners = await RunnerAvailabilityService.fetchRunners(octokit, owner, repo);
+            if (runners.length === 0) {
+                return {
+                    shouldFallback: true,
+                    reason: 'No runners registered for this repository',
+                    totalRunners: 0,
+                    matchingRunners: 0,
+                    idleRunners: 0,
+                };
+            }
+            // Filter by required labels
+            const matching = RunnerAvailabilityService.filterByLabels(runners, requiredLabels);
+            // Count idle (online + not busy)
+            const idle = matching.filter((r) => r.status === 'online' && !r.busy);
+            const result = {
+                shouldFallback: idle.length < minAvailable,
+                reason: idle.length >= minAvailable
+                    ? `${idle.length} idle runner(s) available (need ${minAvailable})`
+                    : `Only ${idle.length} idle runner(s) available, need ${minAvailable}`,
+                totalRunners: runners.length,
+                matchingRunners: matching.length,
+                idleRunners: idle.length,
+            };
+            return result;
+        }
+        catch (error) {
+            // If the API call fails (permissions, rate limit, etc.), don't block the build
+            orchestrator_logger_1.default.log(`Runner availability check failed: ${error.message}`);
+            return {
+                shouldFallback: false,
+                reason: `Runner check failed (${error.message}) — proceeding with primary provider`,
+                totalRunners: 0,
+                matchingRunners: 0,
+                idleRunners: 0,
+            };
+        }
+    }
+    /**
+     * Fetch all runners for a repository, handling pagination.
+     *
+     * Includes defensive limits:
+     * - Maximum page count (MAX_PAGINATION_PAGES) to prevent infinite loops
+     * - Total timeout (PAGINATION_TIMEOUT_MS) to prevent indefinite API calls
+     * - Rate-limit detection (HTTP 403/429 with X-RateLimit-Remaining header)
+     */
+    static async fetchRunners(octokit, owner, repo) {
+        const allRunners = [];
+        let page = 1;
+        const perPage = 100;
+        const startTime = Date.now();
+        while (page <= MAX_PAGINATION_PAGES) {
+            // Check total timeout
+            if (Date.now() - startTime > PAGINATION_TIMEOUT_MS) {
+                orchestrator_logger_1.default.logWarning(`[RunnerAvailability] Pagination timeout reached after ${page - 1} pages and ${Date.now() - startTime}ms. ` +
+                    `Using ${allRunners.length} runners found so far.`);
+                break;
+            }
+            let response;
+            try {
+                response = await octokit.request('GET /repos/{owner}/{repo}/actions/runners', {
+                    owner,
+                    repo,
+                    per_page: perPage,
+                    page,
+                });
+            }
+            catch (requestError) {
+                // Octokit throws for non-2xx responses. Check if this is a rate limit error.
+                const status = requestError.status ?? requestError.response?.status;
+                if (status === 403 || status === 429) {
+                    const resetTime = requestError.response?.headers?.['x-ratelimit-reset'] ?? requestError.headers?.['x-ratelimit-reset'];
+                    const resetMessage = resetTime
+                        ? ` Resets at ${new Date(Number.parseInt(String(resetTime), 10) * 1000).toISOString()}`
+                        : '';
+                    orchestrator_logger_1.default.logWarning(`[RunnerAvailability] GitHub API rate limit reached (HTTP ${status}).${resetMessage} ` +
+                        `Using ${allRunners.length} runners found so far.`);
+                    break;
+                }
+                // Re-throw non-rate-limit errors to be handled by the outer catch
+                throw requestError;
+            }
+            const runners = (response.data.runners || []);
+            allRunners.push(...runners);
+            if (runners.length < perPage)
+                break;
+            page++;
+        }
+        if (page > MAX_PAGINATION_PAGES) {
+            orchestrator_logger_1.default.logWarning(`[RunnerAvailability] Maximum pagination limit reached (${MAX_PAGINATION_PAGES} pages). ` +
+                `Using ${allRunners.length} runners found so far.`);
+        }
+        if (allRunners.length === 0) {
+            orchestrator_logger_1.default.log('[RunnerAvailability] No runners found. Possible causes: ' +
+                'wrong token permissions (needs repo or actions scope), ' +
+                'no self-hosted runners registered, ' +
+                'or runners are registered at the organization level instead of the repository.');
+        }
+        return allRunners;
+    }
+    /**
+     * Filter runners by required labels. A runner matches if it has ALL required labels.
+     * If requiredLabels is empty, all runners match.
+     */
+    static filterByLabels(runners, requiredLabels) {
+        if (requiredLabels.length === 0)
+            return runners;
+        return runners.filter((runner) => {
+            const runnerLabelNames = runner.labels.map((l) => l.name.toLowerCase());
+            return requiredLabels.every((required) => runnerLabelNames.includes(required.toLowerCase()));
+        });
+    }
+}
+exports.RunnerAvailabilityService = RunnerAvailabilityService;
+
+
 /***/ }),

 /***/ 54222: