feat(orchestrator): multi-storage support for GCP and Azure providers

Both providers now support four storage backends via gcpStorageType / azureStorageType:

GCP Cloud Run:
  - gcs-fuse: Mount GCS bucket as POSIX filesystem (unlimited, best for large sequential I/O)
  - gcs-copy: Copy artifacts in/out via gsutil (simpler, no FUSE overhead)
  - nfs: Filestore NFS mount (true POSIX, good random I/O, up to 100 TiB)
  - in-memory: tmpfs (fastest, volatile, up to 32 GiB)

Azure ACI:
  - azure-files: SMB file share mount (up to 100 TiB, premium throughput)
  - blob-copy: Copy artifacts in/out via az storage blob (no mount overhead)
  - azure-files-nfs: NFS 4.1 file share mount (true POSIX, no SMB lock overhead)
  - in-memory: emptyDir tmpfs (fastest, volatile, limited by container memory)

New inputs: gcpStorageType, gcpFilestoreIp, gcpFilestoreShare, azureStorageType,
azureBlobContainer. Constructor validates storage config and warns on missing
prerequisites (e.g. NFS requires VPC connector/subnet).

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
frostebite
2026-03-05 07:07:20 +00:00
parent d8563369e1
commit f4bc5d20c4
5 changed files with 466 additions and 152 deletions
@@ -1,20 +1,26 @@
/**
* Google Cloud Run Jobs Provider (Experimental)
*
* Executes Unity builds as Cloud Run Jobs with Cloud Storage (GCS) for large artifact storage.
* Executes Unity builds as Cloud Run Jobs with configurable storage backends.
*
* Storage types:
* - gcs-fuse: Mount a GCS bucket as a POSIX filesystem via GCS FUSE sidecar.
* Unlimited size, best for large sequential reads/writes.
* Requires: gcpBucket
* - gcs-copy: Copy artifacts in/out of GCS before/after the build via gsutil.
* No mount overhead, simpler, works everywhere.
* Requires: gcpBucket
* - nfs: Mount a Filestore NFS share. True POSIX semantics, good random I/O,
* up to 100 TiB. Best for Library caching (many small random reads).
* Requires: gcpFilestoreIp, gcpFilestoreShare
* - in-memory: tmpfs volume (emptyDir). Fastest I/O but volatile and limited to 32 GiB.
* Good for scratch/temp space during builds.
*
* Prerequisites:
* - Google Cloud SDK authenticated (GOOGLE_APPLICATION_CREDENTIALS or gcloud auth)
* - Cloud Run Jobs API enabled
* - A GCS bucket for build artifacts
* - Service account with roles: Cloud Run Admin, Storage Admin, Logs Viewer
*
* Architecture:
* - Uses Cloud Run Jobs (not Services) for one-off build execution
* - GCS FUSE sidecar mounts a bucket as a local filesystem for large artifact I/O
* - Cloud Logging streams build output in real-time
* - Supports volumes up to 32 GiB in-memory or unlimited via GCS FUSE
*
* @experimental This provider is experimental. APIs and behavior may change.
*/
@@ -29,10 +35,15 @@ import { OrchestratorSystem } from '../../services/core/orchestrator-system';
import { Input } from '../../..';
import ResourceTracking from '../../services/core/resource-tracking';
type GcpStorageType = 'gcs-fuse' | 'gcs-copy' | 'nfs' | 'in-memory';
class GcpCloudRunProvider implements ProviderInterface {
private readonly project: string;
private readonly region: string;
private readonly storageType: GcpStorageType;
private readonly bucket: string;
private readonly filestoreIp: string;
private readonly filestoreShare: string;
private readonly machineType: string;
private readonly diskSizeGb: number;
private readonly serviceAccount: string;
@@ -43,7 +54,10 @@ class GcpCloudRunProvider implements ProviderInterface {
this.buildParameters = buildParameters;
this.project = buildParameters.gcpProject || process.env.GOOGLE_CLOUD_PROJECT || process.env.GCLOUD_PROJECT || '';
this.region = buildParameters.gcpRegion || Input.region || 'us-central1';
this.storageType = (buildParameters.gcpStorageType || 'gcs-fuse') as GcpStorageType;
this.bucket = buildParameters.gcpBucket || '';
this.filestoreIp = buildParameters.gcpFilestoreIp || '';
this.filestoreShare = buildParameters.gcpFilestoreShare || '/share1';
this.machineType = buildParameters.gcpMachineType || 'e2-standard-4';
this.diskSizeGb = Number.parseInt(buildParameters.gcpDiskSizeGb || '100', 10);
this.serviceAccount = buildParameters.gcpServiceAccount || '';
@@ -52,8 +66,43 @@ class GcpCloudRunProvider implements ProviderInterface {
OrchestratorLogger.log('[GCP Cloud Run] Provider initialized (EXPERIMENTAL)');
OrchestratorLogger.log(`[GCP Cloud Run] Project: ${this.project || '(auto-detect)'}`);
OrchestratorLogger.log(`[GCP Cloud Run] Region: ${this.region}`);
OrchestratorLogger.log(`[GCP Cloud Run] Bucket: ${this.bucket || '(none)'}`);
OrchestratorLogger.log(`[GCP Cloud Run] Disk size: ${this.diskSizeGb}GB`);
OrchestratorLogger.log(`[GCP Cloud Run] Storage: ${this.storageType}`);
this.validateStorageConfig();
}
private validateStorageConfig(): void {
switch (this.storageType) {
case 'gcs-fuse':
case 'gcs-copy':
if (!this.bucket) {
OrchestratorLogger.logWarning(
`[GCP Cloud Run] Storage type '${this.storageType}' requires gcpBucket to be set.`,
);
} else {
OrchestratorLogger.log(`[GCP Cloud Run] Bucket: gs://${this.bucket}`);
}
break;
case 'nfs':
if (!this.filestoreIp) {
OrchestratorLogger.logWarning('[GCP Cloud Run] Storage type "nfs" requires gcpFilestoreIp to be set.');
} else {
OrchestratorLogger.log(`[GCP Cloud Run] Filestore: ${this.filestoreIp}:${this.filestoreShare}`);
}
if (!this.vpcConnector) {
OrchestratorLogger.logWarning(
'[GCP Cloud Run] NFS storage usually requires gcpVpcConnector for private network access to Filestore.',
);
}
break;
case 'in-memory':
OrchestratorLogger.log(`[GCP Cloud Run] In-memory volume: ${Math.min(this.diskSizeGb, 32)} GiB (max 32)`);
break;
default:
OrchestratorLogger.logWarning(
`[GCP Cloud Run] Unknown storage type '${this.storageType}'. Valid: gcs-fuse, gcs-copy, nfs, in-memory`,
);
}
if (!this.project) {
OrchestratorLogger.logWarning(
@@ -73,8 +122,8 @@ class GcpCloudRunProvider implements ProviderInterface {
// Verify gcloud CLI is available
try {
const version = await OrchestratorSystem.Run('gcloud --version', false, true);
OrchestratorLogger.log(`[GCP Cloud Run] gcloud CLI detected`);
await OrchestratorSystem.Run('gcloud --version', false, true);
OrchestratorLogger.log('[GCP Cloud Run] gcloud CLI detected');
} catch {
throw new Error(
'[GCP Cloud Run] gcloud CLI not found. Install Google Cloud SDK: https://cloud.google.com/sdk/docs/install',
@@ -89,27 +138,84 @@ class GcpCloudRunProvider implements ProviderInterface {
false,
true,
);
} catch (error) {
} catch {
OrchestratorLogger.logWarning(
`[GCP Cloud Run] Could not verify Cloud Run API status. Ensure run.googleapis.com is enabled.`,
'[GCP Cloud Run] Could not verify Cloud Run API status. Ensure run.googleapis.com is enabled.',
);
}
// Create GCS bucket for artifacts if specified and doesn't exist
if (this.bucket) {
try {
await OrchestratorSystem.Run(`gcloud storage buckets describe gs://${this.bucket} --format="value(name)"`, false, true);
OrchestratorLogger.log(`[GCP Cloud Run] Bucket gs://${this.bucket} exists`);
} catch {
OrchestratorLogger.log(`[GCP Cloud Run] Creating bucket gs://${this.bucket}`);
const projectFlag = this.project ? `--project=${this.project}` : '';
await OrchestratorSystem.Run(
`gcloud storage buckets create gs://${this.bucket} --location=${this.region} ${projectFlag}`,
);
}
// Storage-specific setup
if ((this.storageType === 'gcs-fuse' || this.storageType === 'gcs-copy') && this.bucket) {
await this.ensureBucketExists();
}
}
private async ensureBucketExists(): Promise<void> {
try {
await OrchestratorSystem.Run(
`gcloud storage buckets describe gs://${this.bucket} --format="value(name)"`,
false,
true,
);
OrchestratorLogger.log(`[GCP Cloud Run] Bucket gs://${this.bucket} exists`);
} catch {
OrchestratorLogger.log(`[GCP Cloud Run] Creating bucket gs://${this.bucket}`);
const projectFlag = this.project ? `--project=${this.project}` : '';
await OrchestratorSystem.Run(
`gcloud storage buckets create gs://${this.bucket} --location=${this.region} ${projectFlag}`,
);
}
}
private buildVolumeFlags(mountdir: string): { volumeFlags: string; mountFlags: string } {
switch (this.storageType) {
case 'gcs-fuse':
if (!this.bucket) return { volumeFlags: '', mountFlags: '' };
return {
volumeFlags: `--add-volume=name=gcs-fuse,type=cloud-storage,bucket=${this.bucket}`,
mountFlags: `--add-volume-mount=volume=gcs-fuse,mount-path=${mountdir}`,
};
case 'nfs':
if (!this.filestoreIp) return { volumeFlags: '', mountFlags: '' };
return {
volumeFlags: `--add-volume=name=nfs-vol,type=nfs,location=${this.filestoreIp}:${this.filestoreShare}`,
mountFlags: `--add-volume-mount=volume=nfs-vol,mount-path=${mountdir}`,
};
case 'in-memory': {
const sizeGib = Math.min(this.diskSizeGb, 32);
return {
volumeFlags: `--add-volume=name=tmpfs-vol,type=in-memory,size-limit=${sizeGib}Gi`,
mountFlags: `--add-volume-mount=volume=tmpfs-vol,mount-path=${mountdir}`,
};
}
case 'gcs-copy':
// No volume mount — artifacts are copied in/out via gsutil commands
return { volumeFlags: '', mountFlags: '' };
default:
return { volumeFlags: '', mountFlags: '' };
}
}
private async copyArtifactsIn(mountdir: string): Promise<void> {
if (this.storageType !== 'gcs-copy' || !this.bucket) return;
OrchestratorLogger.log(`[GCP Cloud Run] Copying artifacts from gs://${this.bucket} to ${mountdir}`);
try {
await OrchestratorSystem.Run(`gcloud storage cp -r "gs://${this.bucket}/*" "${mountdir}/" || true`, false, true);
} catch {
OrchestratorLogger.log('[GCP Cloud Run] No existing artifacts to restore (bucket may be empty)');
}
}
private async copyArtifactsOut(mountdir: string): Promise<void> {
if (this.storageType !== 'gcs-copy' || !this.bucket) return;
OrchestratorLogger.log(`[GCP Cloud Run] Uploading artifacts from ${mountdir} to gs://${this.bucket}`);
await OrchestratorSystem.Run(`gcloud storage cp -r "${mountdir}/*" "gs://${this.bucket}/"`, false, true);
}
async runTaskInWorkflow(
buildGuid: string,
image: string,
@@ -129,21 +235,22 @@ class GcpCloudRunProvider implements ProviderInterface {
const envFlags = environment
.map((env) => `${env.name}=${env.value}`)
.concat(secrets.map((s) => `${s.EnvironmentVariable}=${s.ParameterValue}`));
const envString = envFlags.length > 0 ? `--set-env-vars="${envFlags.join(',')}"` : '';
// Build volume and mount flags for GCS FUSE
let volumeFlags = '';
let mountFlags = '';
if (this.bucket) {
volumeFlags = `--add-volume=name=gcs-fuse,type=cloud-storage,bucket=${this.bucket}`;
mountFlags = `--add-volume-mount=volume=gcs-fuse,mount-path=${mountdir}`;
// Build storage volume flags
const { volumeFlags, mountFlags } = this.buildVolumeFlags(mountdir);
// For gcs-copy, wrap the user command with copy-in/copy-out steps
let effectiveCommands = commands;
if (this.storageType === 'gcs-copy' && this.bucket && commands) {
effectiveCommands = [
`gcloud storage cp -r "gs://${this.bucket}/*" "${mountdir}/" 2>/dev/null || true`,
commands,
`gcloud storage cp -r "${mountdir}/*" "gs://${this.bucket}/"`,
].join(' && ');
}
// Service account flag
const saFlag = this.serviceAccount ? `--service-account=${this.serviceAccount}` : '';
// VPC connector for private networking
const vpcFlag = this.vpcConnector ? `--vpc-connector=${this.vpcConnector}` : '';
// Create the Cloud Run Job
@@ -152,10 +259,10 @@ class GcpCloudRunProvider implements ProviderInterface {
jobName,
`--image=${image}`,
`--region=${this.region}`,
`--task-timeout=86400s`,
`--max-retries=0`,
`--cpu=4`,
`--memory=16Gi`,
'--task-timeout=86400s',
'--max-retries=0',
'--cpu=4',
'--memory=16Gi',
volumeFlags,
mountFlags,
envString,
@@ -172,7 +279,6 @@ class GcpCloudRunProvider implements ProviderInterface {
await OrchestratorSystem.Run(createCmd);
OrchestratorLogger.log(`[GCP Cloud Run] Job ${jobName} created`);
} catch (error: any) {
// Job might already exist from a retry
if (error.message?.includes('already exists')) {
OrchestratorLogger.log(`[GCP Cloud Run] Job ${jobName} already exists, updating...`);
const updateCmd = createCmd.replace('jobs create', 'jobs update');
@@ -183,13 +289,13 @@ class GcpCloudRunProvider implements ProviderInterface {
}
// Override the command if provided
if (commands) {
if (effectiveCommands) {
const updateCmd = [
'gcloud run jobs update',
jobName,
`--region=${this.region}`,
`--command="/bin/sh"`,
`--args="-c,${commands}"`,
'--command="/bin/sh"',
`--args="-c,${effectiveCommands}"`,
projectFlag,
'--quiet',
]
@@ -200,7 +306,7 @@ class GcpCloudRunProvider implements ProviderInterface {
}
// Execute the job
OrchestratorLogger.log(`[GCP Cloud Run] Executing job ${jobName}...`);
OrchestratorLogger.log(`[GCP Cloud Run] Executing job ${jobName} (storage: ${this.storageType})...`);
const executeCmd = [
'gcloud run jobs execute',
jobName,
@@ -216,16 +322,13 @@ class GcpCloudRunProvider implements ProviderInterface {
let output = '';
try {
output = await OrchestratorSystem.Run(executeCmd);
OrchestratorLogger.log(`[GCP Cloud Run] Job execution completed`);
OrchestratorLogger.log('[GCP Cloud Run] Job execution completed');
} catch (error: any) {
// Try to get logs even on failure
await this.streamJobLogs(jobName);
throw new Error(`[GCP Cloud Run] Job execution failed: ${error.message}`);
}
// Stream logs
await this.streamJobLogs(jobName);
return output;
}
@@ -245,7 +348,7 @@ class GcpCloudRunProvider implements ProviderInterface {
}
}
} catch {
OrchestratorLogger.logWarning(`[GCP Cloud Run] Could not retrieve job logs`);
OrchestratorLogger.logWarning('[GCP Cloud Run] Could not retrieve job logs');
}
}
@@ -254,8 +357,7 @@ class GcpCloudRunProvider implements ProviderInterface {
branchName: string,
defaultSecretsArray: { ParameterKey: string; EnvironmentVariable: string; ParameterValue: string }[],
) {
OrchestratorLogger.log(`[GCP Cloud Run] Cleaning up workflow`);
// Cloud Run Jobs auto-cleanup after execution; explicit delete is optional
OrchestratorLogger.log('[GCP Cloud Run] Cleaning up workflow');
}
async garbageCollect(
@@ -265,11 +367,10 @@ class GcpCloudRunProvider implements ProviderInterface {
fullCache: boolean,
baseDependencies: boolean,
): Promise<string> {
OrchestratorLogger.log(`[GCP Cloud Run] Garbage collecting old jobs`);
OrchestratorLogger.log('[GCP Cloud Run] Garbage collecting old jobs');
const projectFlag = this.project ? `--project=${this.project}` : '';
try {
// List old jobs matching the unity-build prefix
const jobsJson = await OrchestratorSystem.Run(
`gcloud run jobs list --region=${this.region} ${projectFlag} --filter="metadata.name~unity-build-" --format="json(metadata.name,metadata.creationTimestamp)"`,
false,