fix(load-balancing): add pagination limits and rate-limit detection

Cap pagination at 100 pages (10,000 runners max), detect GitHub API
rate limiting (403/429) with reset time reporting, add 30-second total
timeout for pagination loop. Log clear diagnostic when no runners found
suggesting possible causes (token permissions, runner registration).

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
frostebite
2026-03-05 13:00:17 +00:00
parent 7e9d0bf53e
commit cff759721a
2 changed files with 207 additions and 16 deletions

View File

@@ -43,7 +43,7 @@ describe('RunnerAvailabilityService', () => {
});
it('should fallback when no runners are registered', async () => {
const mockRequest = jest.fn().mockResolvedValue({ data: { runners: [] } });
const mockRequest = jest.fn().mockResolvedValue({ status: 200, data: { runners: [] } });
MockedOctokit.mockImplementation(() => ({ request: mockRequest }) as any);
const result = await RunnerAvailabilityService.checkAvailability('owner', 'repo', 'token', [], 1);
@@ -57,7 +57,7 @@ describe('RunnerAvailabilityService', () => {
{ name: 'runner-1', status: 'online', busy: false, labels: ['self-hosted', 'linux'] },
{ name: 'runner-2', status: 'online', busy: false, labels: ['self-hosted', 'linux'] },
]);
const mockRequest = jest.fn().mockResolvedValue({ data: { runners } });
const mockRequest = jest.fn().mockResolvedValue({ status: 200, data: { runners } });
MockedOctokit.mockImplementation(() => ({ request: mockRequest }) as any);
const result = await RunnerAvailabilityService.checkAvailability('owner', 'repo', 'token', [], 1);
@@ -71,7 +71,7 @@ describe('RunnerAvailabilityService', () => {
{ name: 'runner-1', status: 'online', busy: true, labels: ['self-hosted'] },
{ name: 'runner-2', status: 'online', busy: true, labels: ['self-hosted'] },
]);
const mockRequest = jest.fn().mockResolvedValue({ data: { runners } });
const mockRequest = jest.fn().mockResolvedValue({ status: 200, data: { runners } });
MockedOctokit.mockImplementation(() => ({ request: mockRequest }) as any);
const result = await RunnerAvailabilityService.checkAvailability('owner', 'repo', 'token', [], 1);
@@ -84,7 +84,7 @@ describe('RunnerAvailabilityService', () => {
const runners = createMockRunners([
{ name: 'runner-1', status: 'offline', busy: false, labels: ['self-hosted'] },
]);
const mockRequest = jest.fn().mockResolvedValue({ data: { runners } });
const mockRequest = jest.fn().mockResolvedValue({ status: 200, data: { runners } });
MockedOctokit.mockImplementation(() => ({ request: mockRequest }) as any);
const result = await RunnerAvailabilityService.checkAvailability('owner', 'repo', 'token', [], 1);
@@ -97,7 +97,7 @@ describe('RunnerAvailabilityService', () => {
{ name: 'linux-runner', status: 'online', busy: false, labels: ['self-hosted', 'linux'] },
{ name: 'windows-runner', status: 'online', busy: false, labels: ['self-hosted', 'windows'] },
]);
const mockRequest = jest.fn().mockResolvedValue({ data: { runners } });
const mockRequest = jest.fn().mockResolvedValue({ status: 200, data: { runners } });
MockedOctokit.mockImplementation(() => ({ request: mockRequest }) as any);
const result = await RunnerAvailabilityService.checkAvailability(
@@ -118,7 +118,7 @@ describe('RunnerAvailabilityService', () => {
const runners = createMockRunners([
{ name: 'windows-runner', status: 'online', busy: false, labels: ['self-hosted', 'windows'] },
]);
const mockRequest = jest.fn().mockResolvedValue({ data: { runners } });
const mockRequest = jest.fn().mockResolvedValue({ status: 200, data: { runners } });
MockedOctokit.mockImplementation(() => ({ request: mockRequest }) as any);
const result = await RunnerAvailabilityService.checkAvailability(
@@ -138,7 +138,7 @@ describe('RunnerAvailabilityService', () => {
const runners = createMockRunners([
{ name: 'runner-1', status: 'online', busy: false, labels: ['self-hosted'] },
]);
const mockRequest = jest.fn().mockResolvedValue({ data: { runners } });
const mockRequest = jest.fn().mockResolvedValue({ status: 200, data: { runners } });
MockedOctokit.mockImplementation(() => ({ request: mockRequest }) as any);
// Need 2, have 1 — should fallback
@@ -151,7 +151,7 @@ describe('RunnerAvailabilityService', () => {
const runners = createMockRunners([
{ name: 'runner-1', status: 'online', busy: false, labels: ['Self-Hosted', 'Linux'] },
]);
const mockRequest = jest.fn().mockResolvedValue({ data: { runners } });
const mockRequest = jest.fn().mockResolvedValue({ status: 200, data: { runners } });
MockedOctokit.mockImplementation(() => ({ request: mockRequest }) as any);
const result = await RunnerAvailabilityService.checkAvailability(
@@ -180,7 +180,7 @@ describe('RunnerAvailabilityService', () => {
{ name: 'busy', status: 'online', busy: true, labels: ['self-hosted'] },
{ name: 'offline', status: 'offline', busy: false, labels: ['self-hosted'] },
]);
const mockRequest = jest.fn().mockResolvedValue({ data: { runners } });
const mockRequest = jest.fn().mockResolvedValue({ status: 200, data: { runners } });
MockedOctokit.mockImplementation(() => ({ request: mockRequest }) as any);
const result = await RunnerAvailabilityService.checkAvailability('owner', 'repo', 'token', [], 1);
@@ -190,4 +190,131 @@ describe('RunnerAvailabilityService', () => {
expect(result.idleRunners).toBe(1);
});
});
describe('pagination limits', () => {
it('should stop paginating after reaching the page limit', async () => {
// Return full pages (100 runners each) to force continued pagination
let callCount = 0;
const mockRequest = jest.fn().mockImplementation(() => {
callCount++;
const runners = createMockRunners(
Array.from({ length: 100 }, (_, i) => ({
name: `runner-${callCount}-${i}`,
status: 'online' as const,
busy: false,
labels: ['self-hosted'],
})),
);
return Promise.resolve({ status: 200, data: { runners } });
});
MockedOctokit.mockImplementation(() => ({ request: mockRequest }) as any);
const result = await RunnerAvailabilityService.checkAvailability('owner', 'repo', 'token', [], 1);
// Should have called at most 100 pages (the MAX_PAGINATION_PAGES limit)
expect(mockRequest).toHaveBeenCalledTimes(100);
// Should still have runners from the pages it did fetch
expect(result.totalRunners).toBe(10000);
expect(result.shouldFallback).toBe(false);
});
it('should stop paginating on rate limit (HTTP 403)', async () => {
let callCount = 0;
const mockRequest = jest.fn().mockImplementation(() => {
callCount++;
if (callCount === 2) {
// Octokit throws for non-2xx responses
const error: any = new Error('API rate limit exceeded');
error.status = 403;
error.response = {
status: 403,
headers: { 'x-ratelimit-reset': String(Math.floor(Date.now() / 1000) + 3600) },
};
return Promise.reject(error);
}
const runners = createMockRunners(
Array.from({ length: 100 }, (_, i) => ({
name: `runner-${i}`,
status: 'online' as const,
busy: false,
labels: ['self-hosted'],
})),
);
return Promise.resolve({ status: 200, data: { runners } });
});
MockedOctokit.mockImplementation(() => ({ request: mockRequest }) as any);
const result = await RunnerAvailabilityService.checkAvailability('owner', 'repo', 'token', [], 1);
// Should have stopped at page 2 (rate limited)
expect(mockRequest).toHaveBeenCalledTimes(2);
// Should use the 100 runners from the first page
expect(result.totalRunners).toBe(100);
expect(result.shouldFallback).toBe(false);
});
it('should stop paginating on rate limit (HTTP 429)', async () => {
let callCount = 0;
const mockRequest = jest.fn().mockImplementation(() => {
callCount++;
if (callCount === 1) {
// Octokit throws for non-2xx responses
const error: any = new Error('Too Many Requests');
error.status = 429;
error.response = { status: 429, headers: {} };
return Promise.reject(error);
}
return Promise.resolve({ status: 200, data: { runners: [] } });
});
MockedOctokit.mockImplementation(() => ({ request: mockRequest }) as any);
const result = await RunnerAvailabilityService.checkAvailability('owner', 'repo', 'token', [], 1);
// Should have stopped at first page (rate limited immediately)
expect(mockRequest).toHaveBeenCalledTimes(1);
// No runners found — should fallback
expect(result.totalRunners).toBe(0);
expect(result.shouldFallback).toBe(true);
});
it('should handle pagination timeout gracefully', async () => {
// Mock Date.now to simulate timeout
const originalDateNow = Date.now;
let callCount = 0;
const mockRequest = jest.fn().mockImplementation(() => {
callCount++;
// After first call, advance time past the timeout
if (callCount >= 2) {
Date.now = jest.fn(() => originalDateNow() + 31_000);
}
const runners = createMockRunners(
Array.from({ length: 100 }, (_, i) => ({
name: `runner-${callCount}-${i}`,
status: 'online' as const,
busy: false,
labels: ['self-hosted'],
})),
);
return Promise.resolve({ status: 200, data: { runners } });
});
MockedOctokit.mockImplementation(() => ({ request: mockRequest }) as any);
const result = await RunnerAvailabilityService.checkAvailability('owner', 'repo', 'token', [], 1);
// Should have stopped after timeout was detected (2 pages: first succeeds, second triggers timeout check)
expect(mockRequest.mock.calls.length).toBeLessThanOrEqual(3);
// Should have runners from pages fetched before timeout
expect(result.totalRunners).toBeGreaterThan(0);
// Restore
Date.now = originalDateNow;
});
});
});

View File

@@ -17,6 +17,18 @@ interface RunnerCheckResult {
idleRunners: number;
}
/**
* Maximum number of pages to fetch when paginating through GitHub API results.
* 100 pages * 100 per page = 10,000 runners maximum.
*/
const MAX_PAGINATION_PAGES = 100;
/**
* Total timeout in milliseconds for the pagination loop.
* Prevents indefinite API calls if GitHub is slow or pagination is unexpectedly deep.
*/
const PAGINATION_TIMEOUT_MS = 30_000;
/**
* Checks GitHub Actions runner availability to support automatic provider fallback.
*
@@ -102,19 +114,55 @@ export class RunnerAvailabilityService {
/**
* Fetch all runners for a repository, handling pagination.
*
* Includes defensive limits:
* - Maximum page count (MAX_PAGINATION_PAGES) to prevent infinite loops
* - Total timeout (PAGINATION_TIMEOUT_MS) to prevent indefinite API calls
* - Rate-limit detection (HTTP 403/429 with X-RateLimit-Remaining header)
*/
private static async fetchRunners(octokit: Octokit, owner: string, repo: string): Promise<GitHubRunner[]> {
const allRunners: GitHubRunner[] = [];
let page = 1;
const perPage = 100;
const startTime = Date.now();
while (true) {
const response = await octokit.request('GET /repos/{owner}/{repo}/actions/runners', {
owner,
repo,
per_page: perPage,
page,
});
while (page <= MAX_PAGINATION_PAGES) {
// Check total timeout
if (Date.now() - startTime > PAGINATION_TIMEOUT_MS) {
OrchestratorLogger.logWarning(
`[RunnerAvailability] Pagination timeout reached after ${page - 1} pages and ${Date.now() - startTime}ms. ` +
`Using ${allRunners.length} runners found so far.`,
);
break;
}
let response: any;
try {
response = await octokit.request('GET /repos/{owner}/{repo}/actions/runners', {
owner,
repo,
per_page: perPage,
page,
});
} catch (requestError: any) {
// Octokit throws for non-2xx responses. Check if this is a rate limit error.
const status = requestError.status ?? requestError.response?.status;
if (status === 403 || status === 429) {
const resetTime =
requestError.response?.headers?.['x-ratelimit-reset'] ??
requestError.headers?.['x-ratelimit-reset'];
const resetMessage = resetTime
? ` Resets at ${new Date(Number.parseInt(String(resetTime), 10) * 1000).toISOString()}`
: '';
OrchestratorLogger.logWarning(
`[RunnerAvailability] GitHub API rate limit reached (HTTP ${status}).${resetMessage} ` +
`Using ${allRunners.length} runners found so far.`,
);
break;
}
// Re-throw non-rate-limit errors to be handled by the outer catch
throw requestError;
}
const runners = (response.data.runners || []) as GitHubRunner[];
allRunners.push(...runners);
@@ -123,6 +171,22 @@ export class RunnerAvailabilityService {
page++;
}
if (page > MAX_PAGINATION_PAGES) {
OrchestratorLogger.logWarning(
`[RunnerAvailability] Maximum pagination limit reached (${MAX_PAGINATION_PAGES} pages). ` +
`Using ${allRunners.length} runners found so far.`,
);
}
if (allRunners.length === 0) {
OrchestratorLogger.log(
'[RunnerAvailability] No runners found. Possible causes: ' +
'wrong token permissions (needs repo or actions scope), ' +
'no self-hosted runners registered, ' +
'or runners are registered at the organization level instead of the repository.',
);
}
return allRunners;
}