mirror of
https://github.com/game-ci/unity-builder.git
synced 2026-06-16 13:06:53 -07:00
feat(orchestrator): add retry-on-fallback and provider init timeout
Adds retryOnFallback (retry failed builds on alternate provider) and providerInitTimeout (swap provider if init takes too long). Refactors run() into run()/runWithProvider() to support retry loop. Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
+12
@@ -218,6 +218,18 @@ inputs:
|
|||||||
description:
|
description:
|
||||||
'[Orchestrator] Minimum number of idle runners required for the primary provider. If fewer are available,
|
'[Orchestrator] Minimum number of idle runners required for the primary provider. If fewer are available,
|
||||||
routes to fallbackProviderStrategy.'
|
routes to fallbackProviderStrategy.'
|
||||||
|
retryOnFallback:
|
||||||
|
default: 'false'
|
||||||
|
required: false
|
||||||
|
description:
|
||||||
|
'[Orchestrator] When true and fallbackProviderStrategy is set, automatically retry the build on the fallback
|
||||||
|
provider if the primary provider fails. Useful for long builds where transient cloud failures are common.'
|
||||||
|
providerInitTimeout:
|
||||||
|
default: '0'
|
||||||
|
required: false
|
||||||
|
description:
|
||||||
|
'[Orchestrator] Maximum seconds to wait for the primary provider to initialize (setupWorkflow). If exceeded
|
||||||
|
and fallbackProviderStrategy is set, switches to the fallback. Set to 0 to disable (default).'
|
||||||
resourceTracking:
|
resourceTracking:
|
||||||
default: 'false'
|
default: 'false'
|
||||||
required: false
|
required: false
|
||||||
|
|||||||
+47
-1
@@ -331,6 +331,8 @@ class BuildParameters {
|
|||||||
runnerCheckEnabled: orchestrator_options_1.default.runnerCheckEnabled,
|
runnerCheckEnabled: orchestrator_options_1.default.runnerCheckEnabled,
|
||||||
runnerCheckLabels: orchestrator_options_1.default.runnerCheckLabels,
|
runnerCheckLabels: orchestrator_options_1.default.runnerCheckLabels,
|
||||||
runnerCheckMinAvailable: orchestrator_options_1.default.runnerCheckMinAvailable,
|
runnerCheckMinAvailable: orchestrator_options_1.default.runnerCheckMinAvailable,
|
||||||
|
retryOnFallback: orchestrator_options_1.default.retryOnFallback,
|
||||||
|
providerInitTimeout: orchestrator_options_1.default.providerInitTimeout,
|
||||||
buildPlatform: orchestrator_options_1.default.buildPlatform,
|
buildPlatform: orchestrator_options_1.default.buildPlatform,
|
||||||
kubeConfig: orchestrator_options_1.default.kubeConfig,
|
kubeConfig: orchestrator_options_1.default.kubeConfig,
|
||||||
containerMemory: orchestrator_options_1.default.containerMemory,
|
containerMemory: orchestrator_options_1.default.containerMemory,
|
||||||
@@ -2221,6 +2223,12 @@ class OrchestratorOptions {
|
|||||||
static get runnerCheckMinAvailable() {
|
static get runnerCheckMinAvailable() {
|
||||||
return Number(OrchestratorOptions.getInput('runnerCheckMinAvailable')) || 1;
|
return Number(OrchestratorOptions.getInput('runnerCheckMinAvailable')) || 1;
|
||||||
}
|
}
|
||||||
|
static get retryOnFallback() {
|
||||||
|
return OrchestratorOptions.getInput('retryOnFallback') === 'true';
|
||||||
|
}
|
||||||
|
static get providerInitTimeout() {
|
||||||
|
return Number(OrchestratorOptions.getInput('providerInitTimeout')) || 0;
|
||||||
|
}
|
||||||
static get containerCpu() {
|
static get containerCpu() {
|
||||||
return OrchestratorOptions.getInput('containerCpu') || `1024`;
|
return OrchestratorOptions.getInput('containerCpu') || `1024`;
|
||||||
}
|
}
|
||||||
@@ -2680,13 +2688,33 @@ class Orchestrator {
|
|||||||
if (baseImage.includes(`undefined`)) {
|
if (baseImage.includes(`undefined`)) {
|
||||||
throw new Error(`baseImage is undefined`);
|
throw new Error(`baseImage is undefined`);
|
||||||
}
|
}
|
||||||
|
try {
|
||||||
|
return await Orchestrator.runWithProvider(buildParameters, baseImage);
|
||||||
|
}
|
||||||
|
catch (primaryError) {
|
||||||
|
// Retry on fallback provider if enabled and a fallback is configured
|
||||||
|
const fallback = buildParameters.fallbackProviderStrategy;
|
||||||
|
const alreadyOnFallback = buildParameters.providerStrategy === fallback;
|
||||||
|
if (buildParameters.retryOnFallback && fallback && !alreadyOnFallback) {
|
||||||
|
orchestrator_logger_1.default.log(`Primary provider '${buildParameters.providerStrategy}' failed: ${primaryError.message}`);
|
||||||
|
orchestrator_logger_1.default.log(`Retrying build on fallback provider '${fallback}'...`);
|
||||||
|
buildParameters.providerStrategy = fallback;
|
||||||
|
core.setOutput('providerFallbackUsed', 'true');
|
||||||
|
core.setOutput('providerFallbackReason', `Primary provider failed: ${primaryError.message}`);
|
||||||
|
return await Orchestrator.runWithProvider(buildParameters, baseImage);
|
||||||
|
}
|
||||||
|
throw primaryError;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
static async runWithProvider(buildParameters, baseImage) {
|
||||||
await Orchestrator.setup(buildParameters);
|
await Orchestrator.setup(buildParameters);
|
||||||
// When aws-local mode is enabled, validate AWS CloudFormation templates
|
// When aws-local mode is enabled, validate AWS CloudFormation templates
|
||||||
// This ensures AWS templates are correct even when executing via local-docker
|
// This ensures AWS templates are correct even when executing via local-docker
|
||||||
if (Orchestrator.validateAwsTemplates) {
|
if (Orchestrator.validateAwsTemplates) {
|
||||||
await Orchestrator.validateAwsCloudFormationTemplates();
|
await Orchestrator.validateAwsCloudFormationTemplates();
|
||||||
}
|
}
|
||||||
await Orchestrator.Provider.setupWorkflow(Orchestrator.buildParameters.buildGuid, Orchestrator.buildParameters, Orchestrator.buildParameters.branch, Orchestrator.defaultSecrets);
|
// Setup workflow with optional init timeout
|
||||||
|
await Orchestrator.setupWorkflowWithTimeout();
|
||||||
try {
|
try {
|
||||||
if (buildParameters.maxRetainedWorkspaces > 0) {
|
if (buildParameters.maxRetainedWorkspaces > 0) {
|
||||||
Orchestrator.lockedWorkspace = shared_workspace_locking_1.default.NewWorkspaceName();
|
Orchestrator.lockedWorkspace = shared_workspace_locking_1.default.NewWorkspaceName();
|
||||||
@@ -2736,6 +2764,24 @@ class Orchestrator {
|
|||||||
throw error;
|
throw error;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
/**
|
||||||
|
* Runs setupWorkflow with an optional timeout. If providerInitTimeout is set and the
|
||||||
|
* provider takes longer than that to initialize, throws an error that triggers
|
||||||
|
* retry-on-fallback (if enabled).
|
||||||
|
*/
|
||||||
|
static async setupWorkflowWithTimeout() {
|
||||||
|
const timeoutSeconds = Orchestrator.buildParameters.providerInitTimeout;
|
||||||
|
const setupPromise = Orchestrator.Provider.setupWorkflow(Orchestrator.buildParameters.buildGuid, Orchestrator.buildParameters, Orchestrator.buildParameters.branch, Orchestrator.defaultSecrets);
|
||||||
|
if (timeoutSeconds <= 0) {
|
||||||
|
await setupPromise;
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
orchestrator_logger_1.default.log(`Provider init timeout: ${timeoutSeconds}s`);
|
||||||
|
const timeoutPromise = new Promise((_, reject) => {
|
||||||
|
setTimeout(() => reject(new Error(`Provider initialization timed out after ${timeoutSeconds}s`)), timeoutSeconds * 1000);
|
||||||
|
});
|
||||||
|
await Promise.race([setupPromise, timeoutPromise]);
|
||||||
|
}
|
||||||
static async updateStatusWithBuildParameters() {
|
static async updateStatusWithBuildParameters() {
|
||||||
const content = { ...Orchestrator.buildParameters };
|
const content = { ...Orchestrator.buildParameters };
|
||||||
content.gitPrivateToken = ``;
|
content.gitPrivateToken = ``;
|
||||||
|
|||||||
+1
-1
File diff suppressed because one or more lines are too long
@@ -58,6 +58,8 @@ class BuildParameters {
|
|||||||
public runnerCheckEnabled!: boolean;
|
public runnerCheckEnabled!: boolean;
|
||||||
public runnerCheckLabels!: string[];
|
public runnerCheckLabels!: string[];
|
||||||
public runnerCheckMinAvailable!: number;
|
public runnerCheckMinAvailable!: number;
|
||||||
|
public retryOnFallback!: boolean;
|
||||||
|
public providerInitTimeout!: number;
|
||||||
public gitPrivateToken!: string;
|
public gitPrivateToken!: string;
|
||||||
public awsStackName!: string;
|
public awsStackName!: string;
|
||||||
public awsEndpoint?: string;
|
public awsEndpoint?: string;
|
||||||
@@ -202,6 +204,8 @@ class BuildParameters {
|
|||||||
runnerCheckEnabled: OrchestratorOptions.runnerCheckEnabled,
|
runnerCheckEnabled: OrchestratorOptions.runnerCheckEnabled,
|
||||||
runnerCheckLabels: OrchestratorOptions.runnerCheckLabels,
|
runnerCheckLabels: OrchestratorOptions.runnerCheckLabels,
|
||||||
runnerCheckMinAvailable: OrchestratorOptions.runnerCheckMinAvailable,
|
runnerCheckMinAvailable: OrchestratorOptions.runnerCheckMinAvailable,
|
||||||
|
retryOnFallback: OrchestratorOptions.retryOnFallback,
|
||||||
|
providerInitTimeout: OrchestratorOptions.providerInitTimeout,
|
||||||
buildPlatform: OrchestratorOptions.buildPlatform,
|
buildPlatform: OrchestratorOptions.buildPlatform,
|
||||||
kubeConfig: OrchestratorOptions.kubeConfig,
|
kubeConfig: OrchestratorOptions.kubeConfig,
|
||||||
containerMemory: OrchestratorOptions.containerMemory,
|
containerMemory: OrchestratorOptions.containerMemory,
|
||||||
|
|||||||
@@ -156,6 +156,14 @@ class OrchestratorOptions {
|
|||||||
return Number(OrchestratorOptions.getInput('runnerCheckMinAvailable')) || 1;
|
return Number(OrchestratorOptions.getInput('runnerCheckMinAvailable')) || 1;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static get retryOnFallback(): boolean {
|
||||||
|
return OrchestratorOptions.getInput('retryOnFallback') === 'true';
|
||||||
|
}
|
||||||
|
|
||||||
|
static get providerInitTimeout(): number {
|
||||||
|
return Number(OrchestratorOptions.getInput('providerInitTimeout')) || 0;
|
||||||
|
}
|
||||||
|
|
||||||
static get containerCpu(): string {
|
static get containerCpu(): string {
|
||||||
return OrchestratorOptions.getInput('containerCpu') || `1024`;
|
return OrchestratorOptions.getInput('containerCpu') || `1024`;
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -217,6 +217,30 @@ class Orchestrator {
|
|||||||
if (baseImage.includes(`undefined`)) {
|
if (baseImage.includes(`undefined`)) {
|
||||||
throw new Error(`baseImage is undefined`);
|
throw new Error(`baseImage is undefined`);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
try {
|
||||||
|
return await Orchestrator.runWithProvider(buildParameters, baseImage);
|
||||||
|
} catch (primaryError: any) {
|
||||||
|
// Retry on fallback provider if enabled and a fallback is configured
|
||||||
|
const fallback = buildParameters.fallbackProviderStrategy;
|
||||||
|
const alreadyOnFallback = buildParameters.providerStrategy === fallback;
|
||||||
|
if (buildParameters.retryOnFallback && fallback && !alreadyOnFallback) {
|
||||||
|
OrchestratorLogger.log(
|
||||||
|
`Primary provider '${buildParameters.providerStrategy}' failed: ${primaryError.message}`,
|
||||||
|
);
|
||||||
|
OrchestratorLogger.log(`Retrying build on fallback provider '${fallback}'...`);
|
||||||
|
buildParameters.providerStrategy = fallback;
|
||||||
|
core.setOutput('providerFallbackUsed', 'true');
|
||||||
|
core.setOutput('providerFallbackReason', `Primary provider failed: ${primaryError.message}`);
|
||||||
|
|
||||||
|
return await Orchestrator.runWithProvider(buildParameters, baseImage);
|
||||||
|
}
|
||||||
|
|
||||||
|
throw primaryError;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
private static async runWithProvider(buildParameters: BuildParameters, baseImage: string) {
|
||||||
await Orchestrator.setup(buildParameters);
|
await Orchestrator.setup(buildParameters);
|
||||||
|
|
||||||
// When aws-local mode is enabled, validate AWS CloudFormation templates
|
// When aws-local mode is enabled, validate AWS CloudFormation templates
|
||||||
@@ -224,12 +248,10 @@ class Orchestrator {
|
|||||||
if (Orchestrator.validateAwsTemplates) {
|
if (Orchestrator.validateAwsTemplates) {
|
||||||
await Orchestrator.validateAwsCloudFormationTemplates();
|
await Orchestrator.validateAwsCloudFormationTemplates();
|
||||||
}
|
}
|
||||||
await Orchestrator.Provider.setupWorkflow(
|
|
||||||
Orchestrator.buildParameters.buildGuid,
|
// Setup workflow with optional init timeout
|
||||||
Orchestrator.buildParameters,
|
await Orchestrator.setupWorkflowWithTimeout();
|
||||||
Orchestrator.buildParameters.branch,
|
|
||||||
Orchestrator.defaultSecrets,
|
|
||||||
);
|
|
||||||
try {
|
try {
|
||||||
if (buildParameters.maxRetainedWorkspaces > 0) {
|
if (buildParameters.maxRetainedWorkspaces > 0) {
|
||||||
Orchestrator.lockedWorkspace = SharedWorkspaceLocking.NewWorkspaceName();
|
Orchestrator.lockedWorkspace = SharedWorkspaceLocking.NewWorkspaceName();
|
||||||
@@ -310,6 +332,39 @@ class Orchestrator {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Runs setupWorkflow with an optional timeout. If providerInitTimeout is set and the
|
||||||
|
* provider takes longer than that to initialize, throws an error that triggers
|
||||||
|
* retry-on-fallback (if enabled).
|
||||||
|
*/
|
||||||
|
private static async setupWorkflowWithTimeout() {
|
||||||
|
const timeoutSeconds = Orchestrator.buildParameters.providerInitTimeout;
|
||||||
|
|
||||||
|
const setupPromise = Orchestrator.Provider.setupWorkflow(
|
||||||
|
Orchestrator.buildParameters.buildGuid,
|
||||||
|
Orchestrator.buildParameters,
|
||||||
|
Orchestrator.buildParameters.branch,
|
||||||
|
Orchestrator.defaultSecrets,
|
||||||
|
);
|
||||||
|
|
||||||
|
if (timeoutSeconds <= 0) {
|
||||||
|
await setupPromise;
|
||||||
|
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
OrchestratorLogger.log(`Provider init timeout: ${timeoutSeconds}s`);
|
||||||
|
|
||||||
|
const timeoutPromise = new Promise<never>((_, reject) => {
|
||||||
|
setTimeout(
|
||||||
|
() => reject(new Error(`Provider initialization timed out after ${timeoutSeconds}s`)),
|
||||||
|
timeoutSeconds * 1000,
|
||||||
|
);
|
||||||
|
});
|
||||||
|
|
||||||
|
await Promise.race([setupPromise, timeoutPromise]);
|
||||||
|
}
|
||||||
|
|
||||||
private static async updateStatusWithBuildParameters() {
|
private static async updateStatusWithBuildParameters() {
|
||||||
const content = { ...Orchestrator.buildParameters };
|
const content = { ...Orchestrator.buildParameters };
|
||||||
content.gitPrivateToken = ``;
|
content.gitPrivateToken = ``;
|
||||||
|
|||||||
Reference in New Issue
Block a user