From cff759721ae6cedc341bba8db1f094aafcb7e905 Mon Sep 17 00:00:00 2001 From: frostebite Date: Thu, 5 Mar 2026 13:00:17 +0000 Subject: [PATCH] fix(load-balancing): add pagination limits and rate-limit detection Cap pagination at 100 pages (10,000 runners max), detect GitHub API rate limiting (403/429) with reset time reporting, add 30-second total timeout for pagination loop. Log clear diagnostic when no runners found suggesting possible causes (token permissions, runner registration). Co-Authored-By: Claude Opus 4.6 --- .../core/runner-availability-service.test.ts | 145 ++++++++++++++++-- .../core/runner-availability-service.ts | 78 +++++++++- 2 files changed, 207 insertions(+), 16 deletions(-) diff --git a/src/model/orchestrator/services/core/runner-availability-service.test.ts b/src/model/orchestrator/services/core/runner-availability-service.test.ts index 84248ee8..797ace1c 100644 --- a/src/model/orchestrator/services/core/runner-availability-service.test.ts +++ b/src/model/orchestrator/services/core/runner-availability-service.test.ts @@ -43,7 +43,7 @@ describe('RunnerAvailabilityService', () => { }); it('should fallback when no runners are registered', async () => { - const mockRequest = jest.fn().mockResolvedValue({ data: { runners: [] } }); + const mockRequest = jest.fn().mockResolvedValue({ status: 200, data: { runners: [] } }); MockedOctokit.mockImplementation(() => ({ request: mockRequest }) as any); const result = await RunnerAvailabilityService.checkAvailability('owner', 'repo', 'token', [], 1); @@ -57,7 +57,7 @@ describe('RunnerAvailabilityService', () => { { name: 'runner-1', status: 'online', busy: false, labels: ['self-hosted', 'linux'] }, { name: 'runner-2', status: 'online', busy: false, labels: ['self-hosted', 'linux'] }, ]); - const mockRequest = jest.fn().mockResolvedValue({ data: { runners } }); + const mockRequest = jest.fn().mockResolvedValue({ status: 200, data: { runners } }); MockedOctokit.mockImplementation(() => ({ request: mockRequest }) as any); const result = await RunnerAvailabilityService.checkAvailability('owner', 'repo', 'token', [], 1); @@ -71,7 +71,7 @@ describe('RunnerAvailabilityService', () => { { name: 'runner-1', status: 'online', busy: true, labels: ['self-hosted'] }, { name: 'runner-2', status: 'online', busy: true, labels: ['self-hosted'] }, ]); - const mockRequest = jest.fn().mockResolvedValue({ data: { runners } }); + const mockRequest = jest.fn().mockResolvedValue({ status: 200, data: { runners } }); MockedOctokit.mockImplementation(() => ({ request: mockRequest }) as any); const result = await RunnerAvailabilityService.checkAvailability('owner', 'repo', 'token', [], 1); @@ -84,7 +84,7 @@ describe('RunnerAvailabilityService', () => { const runners = createMockRunners([ { name: 'runner-1', status: 'offline', busy: false, labels: ['self-hosted'] }, ]); - const mockRequest = jest.fn().mockResolvedValue({ data: { runners } }); + const mockRequest = jest.fn().mockResolvedValue({ status: 200, data: { runners } }); MockedOctokit.mockImplementation(() => ({ request: mockRequest }) as any); const result = await RunnerAvailabilityService.checkAvailability('owner', 'repo', 'token', [], 1); @@ -97,7 +97,7 @@ describe('RunnerAvailabilityService', () => { { name: 'linux-runner', status: 'online', busy: false, labels: ['self-hosted', 'linux'] }, { name: 'windows-runner', status: 'online', busy: false, labels: ['self-hosted', 'windows'] }, ]); - const mockRequest = jest.fn().mockResolvedValue({ data: { runners } }); + const mockRequest = jest.fn().mockResolvedValue({ status: 200, data: { runners } }); MockedOctokit.mockImplementation(() => ({ request: mockRequest }) as any); const result = await RunnerAvailabilityService.checkAvailability( @@ -118,7 +118,7 @@ describe('RunnerAvailabilityService', () => { const runners = createMockRunners([ { name: 'windows-runner', status: 'online', busy: false, labels: ['self-hosted', 'windows'] }, ]); - const mockRequest = jest.fn().mockResolvedValue({ data: { runners } }); + const mockRequest = jest.fn().mockResolvedValue({ status: 200, data: { runners } }); MockedOctokit.mockImplementation(() => ({ request: mockRequest }) as any); const result = await RunnerAvailabilityService.checkAvailability( @@ -138,7 +138,7 @@ describe('RunnerAvailabilityService', () => { const runners = createMockRunners([ { name: 'runner-1', status: 'online', busy: false, labels: ['self-hosted'] }, ]); - const mockRequest = jest.fn().mockResolvedValue({ data: { runners } }); + const mockRequest = jest.fn().mockResolvedValue({ status: 200, data: { runners } }); MockedOctokit.mockImplementation(() => ({ request: mockRequest }) as any); // Need 2, have 1 — should fallback @@ -151,7 +151,7 @@ describe('RunnerAvailabilityService', () => { const runners = createMockRunners([ { name: 'runner-1', status: 'online', busy: false, labels: ['Self-Hosted', 'Linux'] }, ]); - const mockRequest = jest.fn().mockResolvedValue({ data: { runners } }); + const mockRequest = jest.fn().mockResolvedValue({ status: 200, data: { runners } }); MockedOctokit.mockImplementation(() => ({ request: mockRequest }) as any); const result = await RunnerAvailabilityService.checkAvailability( @@ -180,7 +180,7 @@ describe('RunnerAvailabilityService', () => { { name: 'busy', status: 'online', busy: true, labels: ['self-hosted'] }, { name: 'offline', status: 'offline', busy: false, labels: ['self-hosted'] }, ]); - const mockRequest = jest.fn().mockResolvedValue({ data: { runners } }); + const mockRequest = jest.fn().mockResolvedValue({ status: 200, data: { runners } }); MockedOctokit.mockImplementation(() => ({ request: mockRequest }) as any); const result = await RunnerAvailabilityService.checkAvailability('owner', 'repo', 'token', [], 1); @@ -190,4 +190,131 @@ describe('RunnerAvailabilityService', () => { expect(result.idleRunners).toBe(1); }); }); + + describe('pagination limits', () => { + it('should stop paginating after reaching the page limit', async () => { + // Return full pages (100 runners each) to force continued pagination + let callCount = 0; + const mockRequest = jest.fn().mockImplementation(() => { + callCount++; + const runners = createMockRunners( + Array.from({ length: 100 }, (_, i) => ({ + name: `runner-${callCount}-${i}`, + status: 'online' as const, + busy: false, + labels: ['self-hosted'], + })), + ); + + return Promise.resolve({ status: 200, data: { runners } }); + }); + MockedOctokit.mockImplementation(() => ({ request: mockRequest }) as any); + + const result = await RunnerAvailabilityService.checkAvailability('owner', 'repo', 'token', [], 1); + + // Should have called at most 100 pages (the MAX_PAGINATION_PAGES limit) + expect(mockRequest).toHaveBeenCalledTimes(100); + // Should still have runners from the pages it did fetch + expect(result.totalRunners).toBe(10000); + expect(result.shouldFallback).toBe(false); + }); + + it('should stop paginating on rate limit (HTTP 403)', async () => { + let callCount = 0; + const mockRequest = jest.fn().mockImplementation(() => { + callCount++; + if (callCount === 2) { + // Octokit throws for non-2xx responses + const error: any = new Error('API rate limit exceeded'); + error.status = 403; + error.response = { + status: 403, + headers: { 'x-ratelimit-reset': String(Math.floor(Date.now() / 1000) + 3600) }, + }; + + return Promise.reject(error); + } + const runners = createMockRunners( + Array.from({ length: 100 }, (_, i) => ({ + name: `runner-${i}`, + status: 'online' as const, + busy: false, + labels: ['self-hosted'], + })), + ); + + return Promise.resolve({ status: 200, data: { runners } }); + }); + MockedOctokit.mockImplementation(() => ({ request: mockRequest }) as any); + + const result = await RunnerAvailabilityService.checkAvailability('owner', 'repo', 'token', [], 1); + + // Should have stopped at page 2 (rate limited) + expect(mockRequest).toHaveBeenCalledTimes(2); + // Should use the 100 runners from the first page + expect(result.totalRunners).toBe(100); + expect(result.shouldFallback).toBe(false); + }); + + it('should stop paginating on rate limit (HTTP 429)', async () => { + let callCount = 0; + const mockRequest = jest.fn().mockImplementation(() => { + callCount++; + if (callCount === 1) { + // Octokit throws for non-2xx responses + const error: any = new Error('Too Many Requests'); + error.status = 429; + error.response = { status: 429, headers: {} }; + + return Promise.reject(error); + } + + return Promise.resolve({ status: 200, data: { runners: [] } }); + }); + MockedOctokit.mockImplementation(() => ({ request: mockRequest }) as any); + + const result = await RunnerAvailabilityService.checkAvailability('owner', 'repo', 'token', [], 1); + + // Should have stopped at first page (rate limited immediately) + expect(mockRequest).toHaveBeenCalledTimes(1); + // No runners found — should fallback + expect(result.totalRunners).toBe(0); + expect(result.shouldFallback).toBe(true); + }); + + it('should handle pagination timeout gracefully', async () => { + // Mock Date.now to simulate timeout + const originalDateNow = Date.now; + let callCount = 0; + + const mockRequest = jest.fn().mockImplementation(() => { + callCount++; + // After first call, advance time past the timeout + if (callCount >= 2) { + Date.now = jest.fn(() => originalDateNow() + 31_000); + } + const runners = createMockRunners( + Array.from({ length: 100 }, (_, i) => ({ + name: `runner-${callCount}-${i}`, + status: 'online' as const, + busy: false, + labels: ['self-hosted'], + })), + ); + + return Promise.resolve({ status: 200, data: { runners } }); + }); + MockedOctokit.mockImplementation(() => ({ request: mockRequest }) as any); + + const result = await RunnerAvailabilityService.checkAvailability('owner', 'repo', 'token', [], 1); + + // Should have stopped after timeout was detected (2 pages: first succeeds, second triggers timeout check) + expect(mockRequest.mock.calls.length).toBeLessThanOrEqual(3); + // Should have runners from pages fetched before timeout + expect(result.totalRunners).toBeGreaterThan(0); + + // Restore + Date.now = originalDateNow; + }); + }); }); diff --git a/src/model/orchestrator/services/core/runner-availability-service.ts b/src/model/orchestrator/services/core/runner-availability-service.ts index 26562c40..0b0962fa 100644 --- a/src/model/orchestrator/services/core/runner-availability-service.ts +++ b/src/model/orchestrator/services/core/runner-availability-service.ts @@ -17,6 +17,18 @@ interface RunnerCheckResult { idleRunners: number; } +/** + * Maximum number of pages to fetch when paginating through GitHub API results. + * 100 pages * 100 per page = 10,000 runners maximum. + */ +const MAX_PAGINATION_PAGES = 100; + +/** + * Total timeout in milliseconds for the pagination loop. + * Prevents indefinite API calls if GitHub is slow or pagination is unexpectedly deep. + */ +const PAGINATION_TIMEOUT_MS = 30_000; + /** * Checks GitHub Actions runner availability to support automatic provider fallback. * @@ -102,19 +114,55 @@ export class RunnerAvailabilityService { /** * Fetch all runners for a repository, handling pagination. + * + * Includes defensive limits: + * - Maximum page count (MAX_PAGINATION_PAGES) to prevent infinite loops + * - Total timeout (PAGINATION_TIMEOUT_MS) to prevent indefinite API calls + * - Rate-limit detection (HTTP 403/429 with X-RateLimit-Remaining header) */ private static async fetchRunners(octokit: Octokit, owner: string, repo: string): Promise { const allRunners: GitHubRunner[] = []; let page = 1; const perPage = 100; + const startTime = Date.now(); - while (true) { - const response = await octokit.request('GET /repos/{owner}/{repo}/actions/runners', { - owner, - repo, - per_page: perPage, - page, - }); + while (page <= MAX_PAGINATION_PAGES) { + // Check total timeout + if (Date.now() - startTime > PAGINATION_TIMEOUT_MS) { + OrchestratorLogger.logWarning( + `[RunnerAvailability] Pagination timeout reached after ${page - 1} pages and ${Date.now() - startTime}ms. ` + + `Using ${allRunners.length} runners found so far.`, + ); + break; + } + + let response: any; + try { + response = await octokit.request('GET /repos/{owner}/{repo}/actions/runners', { + owner, + repo, + per_page: perPage, + page, + }); + } catch (requestError: any) { + // Octokit throws for non-2xx responses. Check if this is a rate limit error. + const status = requestError.status ?? requestError.response?.status; + if (status === 403 || status === 429) { + const resetTime = + requestError.response?.headers?.['x-ratelimit-reset'] ?? + requestError.headers?.['x-ratelimit-reset']; + const resetMessage = resetTime + ? ` Resets at ${new Date(Number.parseInt(String(resetTime), 10) * 1000).toISOString()}` + : ''; + OrchestratorLogger.logWarning( + `[RunnerAvailability] GitHub API rate limit reached (HTTP ${status}).${resetMessage} ` + + `Using ${allRunners.length} runners found so far.`, + ); + break; + } + // Re-throw non-rate-limit errors to be handled by the outer catch + throw requestError; + } const runners = (response.data.runners || []) as GitHubRunner[]; allRunners.push(...runners); @@ -123,6 +171,22 @@ export class RunnerAvailabilityService { page++; } + if (page > MAX_PAGINATION_PAGES) { + OrchestratorLogger.logWarning( + `[RunnerAvailability] Maximum pagination limit reached (${MAX_PAGINATION_PAGES} pages). ` + + `Using ${allRunners.length} runners found so far.`, + ); + } + + if (allRunners.length === 0) { + OrchestratorLogger.log( + '[RunnerAvailability] No runners found. Possible causes: ' + + 'wrong token permissions (needs repo or actions scope), ' + + 'no self-hosted runners registered, ' + + 'or runners are registered at the organization level instead of the repository.', + ); + } + return allRunners; }