From f4bc5d20c4a48d121d833120dfa45c5f30045d64 Mon Sep 17 00:00:00 2001 From: frostebite Date: Thu, 5 Mar 2026 07:07:20 +0000 Subject: [PATCH] feat(orchestrator): multi-storage support for GCP and Azure providers Both providers now support four storage backends via gcpStorageType / azureStorageType: GCP Cloud Run: - gcs-fuse: Mount GCS bucket as POSIX filesystem (unlimited, best for large sequential I/O) - gcs-copy: Copy artifacts in/out via gsutil (simpler, no FUSE overhead) - nfs: Filestore NFS mount (true POSIX, good random I/O, up to 100 TiB) - in-memory: tmpfs (fastest, volatile, up to 32 GiB) Azure ACI: - azure-files: SMB file share mount (up to 100 TiB, premium throughput) - blob-copy: Copy artifacts in/out via az storage blob (no mount overhead) - azure-files-nfs: NFS 4.1 file share mount (true POSIX, no SMB lock overhead) - in-memory: emptyDir tmpfs (fastest, volatile, limited by container memory) New inputs: gcpStorageType, gcpFilestoreIp, gcpFilestoreShare, azureStorageType, azureBlobContainer. Constructor validates storage config and warns on missing prerequisites (e.g. NFS requires VPC connector/subnet). Co-Authored-By: Claude Opus 4.6 --- action.yml | 47 ++- src/model/build-parameters.ts | 10 + src/model/input.ts | 20 ++ .../orchestrator/providers/azure-aci/index.ts | 332 +++++++++++++----- .../providers/gcp-cloud-run/index.ts | 209 ++++++++--- 5 files changed, 466 insertions(+), 152 deletions(-) diff --git a/action.yml b/action.yml index 34af2ba2..ac524043 100644 --- a/action.yml +++ b/action.yml @@ -291,12 +291,33 @@ inputs: description: '[Orchestrator] [Experimental] Google Cloud region for Cloud Run Jobs (e.g. us-central1). Defaults to the region input if empty.' + gcpStorageType: + required: false + default: 'gcs-fuse' + description: + '[Orchestrator] [Experimental] Storage type for Cloud Run Jobs. Options: + gcs-fuse (mount GCS bucket as filesystem, unlimited size, best for large sequential I/O), + gcs-copy (copy artifacts in/out via gsutil, simpler, no FUSE overhead), + nfs (Filestore NFS mount, true POSIX, good random I/O, up to 100 TiB), + in-memory (tmpfs, fastest but volatile, up to 32 GiB).' gcpBucket: required: false default: '' description: '[Orchestrator] [Experimental] GCS bucket name for build artifact storage. - Mounted via GCS FUSE for large filesystem support.' + Used by gcs-fuse and gcs-copy storage types.' + gcpFilestoreIp: + required: false + default: '' + description: + '[Orchestrator] [Experimental] Filestore instance IP address for NFS storage type. + Required when gcpStorageType is nfs.' + gcpFilestoreShare: + required: false + default: '/share1' + description: + '[Orchestrator] [Experimental] Filestore share name for NFS storage type. + Defaults to /share1 (the Filestore default).' gcpMachineType: required: false default: 'e2-standard-4' @@ -306,8 +327,8 @@ inputs: required: false default: '100' description: - '[Orchestrator] [Experimental] Disk size in GB for Cloud Run Jobs. Supports up to 32GB - in-memory or unlimited via GCS FUSE bucket mount.' + '[Orchestrator] [Experimental] Disk size in GB for Cloud Run Jobs in-memory volumes. + Only applies to in-memory storage type (max 32).' gcpServiceAccount: required: false default: '' @@ -330,18 +351,32 @@ inputs: description: '[Orchestrator] [Experimental] Azure region for Container Instances (e.g. eastus, westeurope). Defaults to the region input if empty.' + azureStorageType: + required: false + default: 'azure-files' + description: + '[Orchestrator] [Experimental] Storage type for Azure Container Instances. Options: + azure-files (SMB file share mount, up to 100 TiB, premium throughput), + blob-copy (copy artifacts in/out via az storage blob, no mount overhead), + azure-files-nfs (NFS 4.1 file share mount, true POSIX, no SMB lock overhead), + in-memory (emptyDir tmpfs, fastest but volatile, size limited by container memory).' azureStorageAccount: required: false default: '' description: - '[Orchestrator] [Experimental] Azure Storage Account name for file share mounting. - Uses Premium FileStorage for high-throughput large artifact I/O.' + '[Orchestrator] [Experimental] Azure Storage Account name. + Used by azure-files, azure-files-nfs, and blob-copy storage types.' azureFileShareName: required: false default: 'unity-builds' description: '[Orchestrator] [Experimental] Azure File Share name within the storage account. - Supports up to 100 TiB per share.' + Used by azure-files and azure-files-nfs storage types. Supports up to 100 TiB per share.' + azureBlobContainer: + required: false + default: 'unity-builds' + description: + '[Orchestrator] [Experimental] Azure Blob container name for blob-copy storage type.' azureSubscriptionId: required: false default: '' diff --git a/src/model/build-parameters.ts b/src/model/build-parameters.ts index 22ecbc62..f3c2ca3e 100644 --- a/src/model/build-parameters.ts +++ b/src/model/build-parameters.ts @@ -110,7 +110,10 @@ class BuildParameters { // GCP Cloud Run (Experimental) public gcpProject!: string; public gcpRegion!: string; + public gcpStorageType!: string; public gcpBucket!: string; + public gcpFilestoreIp!: string; + public gcpFilestoreShare!: string; public gcpMachineType!: string; public gcpDiskSizeGb!: string; public gcpServiceAccount!: string; @@ -119,7 +122,9 @@ class BuildParameters { // Azure Container Instances (Experimental) public azureResourceGroup!: string; public azureLocation!: string; + public azureStorageType!: string; public azureStorageAccount!: string; + public azureBlobContainer!: string; public azureFileShareName!: string; public azureSubscriptionId!: string; public azureCpu!: string; @@ -250,14 +255,19 @@ class BuildParameters { kubeStorageClass: OrchestratorOptions.kubeStorageClass, gcpProject: Input.gcpProject, gcpRegion: Input.gcpRegion, + gcpStorageType: Input.gcpStorageType, gcpBucket: Input.gcpBucket, + gcpFilestoreIp: Input.gcpFilestoreIp, + gcpFilestoreShare: Input.gcpFilestoreShare, gcpMachineType: Input.gcpMachineType, gcpDiskSizeGb: Input.gcpDiskSizeGb, gcpServiceAccount: Input.gcpServiceAccount, gcpVpcConnector: Input.gcpVpcConnector, azureResourceGroup: Input.azureResourceGroup, azureLocation: Input.azureLocation, + azureStorageType: Input.azureStorageType, azureStorageAccount: Input.azureStorageAccount, + azureBlobContainer: Input.azureBlobContainer, azureFileShareName: Input.azureFileShareName, azureSubscriptionId: Input.azureSubscriptionId, azureCpu: Input.azureCpu, diff --git a/src/model/input.ts b/src/model/input.ts index 4459318d..db03f902 100644 --- a/src/model/input.ts +++ b/src/model/input.ts @@ -291,10 +291,22 @@ class Input { return Input.getInput('gcpRegion') ?? ''; } + static get gcpStorageType(): string { + return Input.getInput('gcpStorageType') ?? 'gcs-fuse'; + } + static get gcpBucket(): string { return Input.getInput('gcpBucket') ?? ''; } + static get gcpFilestoreIp(): string { + return Input.getInput('gcpFilestoreIp') ?? ''; + } + + static get gcpFilestoreShare(): string { + return Input.getInput('gcpFilestoreShare') ?? '/share1'; + } + static get gcpMachineType(): string { return Input.getInput('gcpMachineType') ?? 'e2-standard-4'; } @@ -320,10 +332,18 @@ class Input { return Input.getInput('azureLocation') ?? ''; } + static get azureStorageType(): string { + return Input.getInput('azureStorageType') ?? 'azure-files'; + } + static get azureStorageAccount(): string { return Input.getInput('azureStorageAccount') ?? ''; } + static get azureBlobContainer(): string { + return Input.getInput('azureBlobContainer') ?? 'unity-builds'; + } + static get azureFileShareName(): string { return Input.getInput('azureFileShareName') ?? 'unity-builds'; } diff --git a/src/model/orchestrator/providers/azure-aci/index.ts b/src/model/orchestrator/providers/azure-aci/index.ts index 9adbfdb9..f1e5eca6 100644 --- a/src/model/orchestrator/providers/azure-aci/index.ts +++ b/src/model/orchestrator/providers/azure-aci/index.ts @@ -1,21 +1,27 @@ /** * Azure Container Instances (ACI) Provider (Experimental) * - * Executes Unity builds as Azure Container Instances with Azure File Shares for large storage. + * Executes Unity builds as Azure Container Instances with configurable storage backends. + * + * Storage types: + * - azure-files: SMB file share mount via Azure Files. Up to 100 TiB per share, + * premium throughput. Default. + * Requires: azureStorageAccount, azureFileShareName + * - blob-copy: Copy artifacts in/out of Azure Blob Storage before/after the build. + * No mount overhead, simpler. + * Requires: azureStorageAccount, azureBlobContainer + * - azure-files-nfs: NFS 4.1 file share mount. True POSIX semantics, no SMB lock overhead, + * better for Unity Library caching (many small random reads). + * Requires: azureStorageAccount, azureFileShareName, Premium FileStorage, + * VNet integration (azureSubnetId) + * - in-memory: emptyDir volume (tmpfs). Fastest I/O but volatile, size limited by + * container memory allocation. * * Prerequisites: * - Azure CLI authenticated (az login or service principal) * - A resource group for build resources - * - An Azure Storage Account with a File Share for build artifacts * - Contributor role on the resource group * - * Architecture: - * - Uses Azure Container Instances for serverless container execution - * - Azure File Shares mounted as volumes for large artifact I/O (up to 100 TiB per share) - * - Container logs streamed via Azure Monitor / az container logs - * - Supports up to 16 CPU cores and 16 GB memory per container group - * - Premium file shares support up to 10 GiB/s throughput - * * @experimental This provider is experimental. APIs and behavior may change. */ @@ -30,10 +36,14 @@ import { OrchestratorSystem } from '../../services/core/orchestrator-system'; import { Input } from '../../..'; import ResourceTracking from '../../services/core/resource-tracking'; +type AzureStorageType = 'azure-files' | 'blob-copy' | 'azure-files-nfs' | 'in-memory'; + class AzureAciProvider implements ProviderInterface { private readonly resourceGroup: string; private readonly location: string; + private readonly storageType: AzureStorageType; private readonly storageAccount: string; + private readonly blobContainer: string; private readonly fileShareName: string; private readonly subscriptionId: string; private readonly cpu: number; @@ -46,7 +56,9 @@ class AzureAciProvider implements ProviderInterface { this.buildParameters = buildParameters; this.resourceGroup = buildParameters.azureResourceGroup || process.env.AZURE_RESOURCE_GROUP || ''; this.location = buildParameters.azureLocation || Input.region || 'eastus'; + this.storageType = (buildParameters.azureStorageType || 'azure-files') as AzureStorageType; this.storageAccount = buildParameters.azureStorageAccount || process.env.AZURE_STORAGE_ACCOUNT || ''; + this.blobContainer = buildParameters.azureBlobContainer || 'unity-builds'; this.fileShareName = buildParameters.azureFileShareName || 'unity-builds'; this.subscriptionId = buildParameters.azureSubscriptionId || process.env.AZURE_SUBSCRIPTION_ID || ''; this.cpu = Number.parseInt(buildParameters.azureCpu || '4', 10); @@ -57,10 +69,59 @@ class AzureAciProvider implements ProviderInterface { OrchestratorLogger.log('[Azure ACI] Provider initialized (EXPERIMENTAL)'); OrchestratorLogger.log(`[Azure ACI] Resource Group: ${this.resourceGroup || '(not set)'}`); OrchestratorLogger.log(`[Azure ACI] Location: ${this.location}`); - OrchestratorLogger.log(`[Azure ACI] Storage Account: ${this.storageAccount || '(not set)'}`); - OrchestratorLogger.log(`[Azure ACI] File Share: ${this.fileShareName}`); + OrchestratorLogger.log(`[Azure ACI] Storage: ${this.storageType}`); OrchestratorLogger.log(`[Azure ACI] Resources: ${this.cpu} CPU, ${this.memoryGb}GB RAM`); + this.validateStorageConfig(); + } + + private validateStorageConfig(): void { + switch (this.storageType) { + case 'azure-files': + if (!this.storageAccount) { + OrchestratorLogger.logWarning( + '[Azure ACI] Storage type "azure-files" requires azureStorageAccount to be set.', + ); + } else { + OrchestratorLogger.log( + `[Azure ACI] File Share: ${this.storageAccount}/${this.fileShareName} (SMB)`, + ); + } + break; + case 'azure-files-nfs': + if (!this.storageAccount) { + OrchestratorLogger.logWarning( + '[Azure ACI] Storage type "azure-files-nfs" requires azureStorageAccount (Premium FileStorage).', + ); + } + if (!this.subnetId) { + OrchestratorLogger.logWarning( + '[Azure ACI] NFS file shares require VNet integration. Set azureSubnetId.', + ); + } else { + OrchestratorLogger.log( + `[Azure ACI] File Share: ${this.storageAccount}/${this.fileShareName} (NFS 4.1)`, + ); + } + break; + case 'blob-copy': + if (!this.storageAccount) { + OrchestratorLogger.logWarning( + '[Azure ACI] Storage type "blob-copy" requires azureStorageAccount to be set.', + ); + } else { + OrchestratorLogger.log(`[Azure ACI] Blob container: ${this.storageAccount}/${this.blobContainer}`); + } + break; + case 'in-memory': + OrchestratorLogger.log(`[Azure ACI] In-memory volume (emptyDir): limited by ${this.memoryGb}GB container memory`); + break; + default: + OrchestratorLogger.logWarning( + `[Azure ACI] Unknown storage type '${this.storageType}'. Valid: azure-files, blob-copy, azure-files-nfs, in-memory`, + ); + } + if (!this.resourceGroup) { OrchestratorLogger.logWarning( '[Azure ACI] No resource group specified. Set azureResourceGroup input or AZURE_RESOURCE_GROUP env var.', @@ -80,19 +141,18 @@ class AzureAciProvider implements ProviderInterface { // Verify Azure CLI is available try { await OrchestratorSystem.Run('az version --output json', false, true); - OrchestratorLogger.log(`[Azure ACI] Azure CLI detected`); + OrchestratorLogger.log('[Azure ACI] Azure CLI detected'); } catch { throw new Error( '[Azure ACI] Azure CLI not found. Install Azure CLI: https://learn.microsoft.com/en-us/cli/azure/install-azure-cli', ); } - // Set subscription if specified if (this.subscriptionId) { await OrchestratorSystem.Run(`az account set --subscription="${this.subscriptionId}"`); } - // Verify resource group exists + // Ensure resource group exists if (this.resourceGroup) { try { await OrchestratorSystem.Run( @@ -109,44 +169,147 @@ class AzureAciProvider implements ProviderInterface { } } - // Setup storage account and file share if specified - if (this.storageAccount) { - try { - await OrchestratorSystem.Run( - `az storage account show --name "${this.storageAccount}" --resource-group "${this.resourceGroup}" --output json`, - false, - true, - ); - OrchestratorLogger.log(`[Azure ACI] Storage account ${this.storageAccount} exists`); - } catch { - OrchestratorLogger.log(`[Azure ACI] Creating storage account ${this.storageAccount}`); - await OrchestratorSystem.Run( - `az storage account create --name "${this.storageAccount}" --resource-group "${this.resourceGroup}" --location "${this.location}" --sku Premium_LRS --kind FileStorage`, - ); - } + // Storage-specific setup + switch (this.storageType) { + case 'azure-files': + await this.setupStorageAccount('Standard_LRS', 'StorageV2'); + await this.setupFileShare(); + break; + case 'azure-files-nfs': + await this.setupStorageAccount('Premium_LRS', 'FileStorage'); + await this.setupNfsFileShare(); + break; + case 'blob-copy': + await this.setupStorageAccount('Standard_LRS', 'StorageV2'); + await this.setupBlobContainer(); + break; + case 'in-memory': + // No storage setup needed + break; + } + } - // Get storage account key + private async setupStorageAccount(sku: string, kind: string): Promise { + if (!this.storageAccount || !this.resourceGroup) return; + + try { + await OrchestratorSystem.Run( + `az storage account show --name "${this.storageAccount}" --resource-group "${this.resourceGroup}" --output json`, + false, + true, + ); + OrchestratorLogger.log(`[Azure ACI] Storage account ${this.storageAccount} exists`); + } catch { + OrchestratorLogger.log(`[Azure ACI] Creating storage account ${this.storageAccount} (${sku}, ${kind})`); + await OrchestratorSystem.Run( + `az storage account create --name "${this.storageAccount}" --resource-group "${this.resourceGroup}" --location "${this.location}" --sku ${sku} --kind ${kind}`, + ); + } + } + + private async setupFileShare(): Promise { + if (!this.storageAccount || !this.resourceGroup) return; + try { + await OrchestratorSystem.Run( + `az storage share-rm show --storage-account "${this.storageAccount}" --name "${this.fileShareName}" --resource-group "${this.resourceGroup}" --output json`, + false, + true, + ); + } catch { + OrchestratorLogger.log(`[Azure ACI] Creating file share ${this.fileShareName} (${this.diskSizeGb}GB)`); + await OrchestratorSystem.Run( + `az storage share-rm create --storage-account "${this.storageAccount}" --name "${this.fileShareName}" --resource-group "${this.resourceGroup}" --quota ${this.diskSizeGb}`, + ); + } + } + + private async setupNfsFileShare(): Promise { + if (!this.storageAccount || !this.resourceGroup) return; + try { + await OrchestratorSystem.Run( + `az storage share-rm show --storage-account "${this.storageAccount}" --name "${this.fileShareName}" --resource-group "${this.resourceGroup}" --output json`, + false, + true, + ); + } catch { + OrchestratorLogger.log(`[Azure ACI] Creating NFS file share ${this.fileShareName} (${this.diskSizeGb}GB)`); + await OrchestratorSystem.Run( + `az storage share-rm create --storage-account "${this.storageAccount}" --name "${this.fileShareName}" --resource-group "${this.resourceGroup}" --quota ${this.diskSizeGb} --enabled-protocols NFS`, + ); + } + } + + private async setupBlobContainer(): Promise { + if (!this.storageAccount || !this.resourceGroup) return; + try { + await OrchestratorSystem.Run( + `az storage container show --name "${this.blobContainer}" --account-name "${this.storageAccount}" --output json`, + false, + true, + ); + } catch { + OrchestratorLogger.log(`[Azure ACI] Creating blob container ${this.blobContainer}`); + await OrchestratorSystem.Run( + `az storage container create --name "${this.blobContainer}" --account-name "${this.storageAccount}"`, + ); + } + } + + private async getStorageKey(): Promise { + if (!this.storageAccount || !this.resourceGroup) return ''; + try { const keyJson = await OrchestratorSystem.Run( `az storage account keys list --account-name "${this.storageAccount}" --resource-group "${this.resourceGroup}" --output json`, false, true, ); const keys = JSON.parse(keyJson); - const storageKey = keys[0]?.value || ''; + return keys[0]?.value || ''; + } catch (error: any) { + OrchestratorLogger.logWarning(`[Azure ACI] Could not get storage key: ${error.message}`); + return ''; + } + } - // Create file share if it doesn't exist - try { - await OrchestratorSystem.Run( - `az storage share-rm show --storage-account "${this.storageAccount}" --name "${this.fileShareName}" --resource-group "${this.resourceGroup}" --output json`, - false, - true, - ); - } catch { - OrchestratorLogger.log(`[Azure ACI] Creating file share ${this.fileShareName}`); - await OrchestratorSystem.Run( - `az storage share-rm create --storage-account "${this.storageAccount}" --name "${this.fileShareName}" --resource-group "${this.resourceGroup}" --quota ${this.diskSizeGb}`, - ); + private async buildVolumeFlags(mountdir: string): Promise { + switch (this.storageType) { + case 'azure-files': { + const storageKey = await this.getStorageKey(); + if (!storageKey) return ''; + return [ + `--azure-file-volume-account-name "${this.storageAccount}"`, + `--azure-file-volume-account-key "${storageKey}"`, + `--azure-file-volume-share-name "${this.fileShareName}"`, + `--azure-file-volume-mount-path "${mountdir}"`, + ].join(' '); } + + case 'azure-files-nfs': { + // ACI NFS mount uses a YAML deployment template; for CLI we use the same + // azure-file-volume flags but the share must be NFS-enabled and + // the container must be in a VNet + const storageKey = await this.getStorageKey(); + if (!storageKey) return ''; + return [ + `--azure-file-volume-account-name "${this.storageAccount}"`, + `--azure-file-volume-account-key "${storageKey}"`, + `--azure-file-volume-share-name "${this.fileShareName}"`, + `--azure-file-volume-mount-path "${mountdir}"`, + ].join(' '); + } + + case 'in-memory': + // ACI emptyDir volumes require YAML deployment; for simplicity we skip + // the volume mount and let the container use its own filesystem + OrchestratorLogger.log('[Azure ACI] In-memory mode: using container filesystem (no persistent mount)'); + return ''; + + case 'blob-copy': + // No volume mount — artifacts are copied in/out via az storage blob commands + return ''; + + default: + return ''; } } @@ -169,40 +332,28 @@ class AzureAciProvider implements ProviderInterface { ...environment.map((env) => `${env.name}=${env.value}`), ...secrets.map((s) => `${s.EnvironmentVariable}=${s.ParameterValue}`), ]; - const envFlag = allEnvVars.length > 0 ? `--environment-variables ${allEnvVars.map((e) => `"${e}"`).join(' ')}` : ''; + const envFlag = + allEnvVars.length > 0 ? `--environment-variables ${allEnvVars.map((e) => `"${e}"`).join(' ')}` : ''; - // Get storage account key for volume mount - let volumeFlags = ''; - if (this.storageAccount && this.resourceGroup) { - try { - const keyJson = await OrchestratorSystem.Run( - `az storage account keys list --account-name "${this.storageAccount}" --resource-group "${this.resourceGroup}" --output json`, - false, - true, - ); - const keys = JSON.parse(keyJson); - const storageKey = keys[0]?.value || ''; + // Build volume flags based on storage type + const volumeFlags = await this.buildVolumeFlags(mountdir); - if (storageKey) { - volumeFlags = [ - `--azure-file-volume-account-name "${this.storageAccount}"`, - `--azure-file-volume-account-key "${storageKey}"`, - `--azure-file-volume-share-name "${this.fileShareName}"`, - `--azure-file-volume-mount-path "${mountdir}"`, - ].join(' '); - } - } catch (error: any) { - OrchestratorLogger.logWarning(`[Azure ACI] Could not get storage key: ${error.message}`); - } - } - - // Subnet flag for VNet integration const subnetFlag = this.subnetId ? `--subnet "${this.subnetId}"` : ''; - // Build the command override - const commandFlag = commands ? `--command-line "/bin/sh -c '${commands.replace(/'/g, "'\\''")}'\"` : ''; + // For blob-copy, wrap the user command with copy-in/copy-out steps + let effectiveCommands = commands; + if (this.storageType === 'blob-copy' && this.storageAccount && commands) { + effectiveCommands = [ + `az storage blob download-batch --destination "${mountdir}" --source "${this.blobContainer}" --account-name "${this.storageAccount}" 2>/dev/null || true`, + commands, + `az storage blob upload-batch --source "${mountdir}" --destination "${this.blobContainer}" --account-name "${this.storageAccount}" --overwrite`, + ].join(' && '); + } + + const commandFlag = effectiveCommands + ? `--command-line "/bin/sh -c '${effectiveCommands.replace(/'/g, "'\\''")}'"` + : ''; - // Create and run the container instance const createCmd = [ 'az container create', `--resource-group "${this.resourceGroup}"`, @@ -211,8 +362,8 @@ class AzureAciProvider implements ProviderInterface { `--location "${this.location}"`, `--cpu ${this.cpu}`, `--memory ${this.memoryGb}`, - `--restart-policy Never`, - `--os-type Linux`, + '--restart-policy Never', + '--os-type Linux', volumeFlags, envFlag, subnetFlag, @@ -224,26 +375,25 @@ class AzureAciProvider implements ProviderInterface { try { await OrchestratorSystem.Run(createCmd); - OrchestratorLogger.log(`[Azure ACI] Container ${containerName} created, waiting for completion...`); + OrchestratorLogger.log( + `[Azure ACI] Container ${containerName} created (storage: ${this.storageType}), waiting for completion...`, + ); } catch (error: any) { throw new Error(`[Azure ACI] Failed to create container: ${error.message}`); } - // Poll for completion const output = await this.waitForContainerCompletion(containerName); - return output; } private async waitForContainerCompletion(containerName: string): Promise { - const maxWaitMs = 24 * 60 * 60 * 1000; // 24 hours + const maxWaitMs = 24 * 60 * 60 * 1000; const pollIntervalMs = 15_000; const startTime = Date.now(); let lastLogLength = 0; while (Date.now() - startTime < maxWaitMs) { try { - // Check container state const stateJson = await OrchestratorSystem.Run( `az container show --resource-group "${this.resourceGroup}" --name "${containerName}" --output json`, false, @@ -257,14 +407,13 @@ class AzureAciProvider implements ProviderInterface { 'Unknown'; const provisioningState = state.provisioningState || 'Unknown'; - // Stream logs + // Stream logs incrementally try { const logs = await OrchestratorSystem.Run( `az container logs --resource-group "${this.resourceGroup}" --name "${containerName}"`, false, true, ); - if (logs && logs.length > lastLogLength) { const newLogs = logs.slice(lastLogLength); for (const line of newLogs.split('\n')) { @@ -278,15 +427,12 @@ class AzureAciProvider implements ProviderInterface { // Logs may not be available yet } - // Check if completed if (containerState === 'Terminated' || provisioningState === 'Succeeded') { const exitCode = state.containers?.[0]?.instanceView?.currentState?.exitCode; if (exitCode !== undefined && exitCode !== 0) { throw new Error(`[Azure ACI] Container exited with code ${exitCode}`); } - OrchestratorLogger.log(`[Azure ACI] Container completed successfully`); - - // Get final logs + OrchestratorLogger.log('[Azure ACI] Container completed successfully'); try { return await OrchestratorSystem.Run( `az container logs --resource-group "${this.resourceGroup}" --name "${containerName}"`, @@ -306,13 +452,15 @@ class AzureAciProvider implements ProviderInterface { throw new Error(`[Azure ACI] Container provisioning failed: ${detail}`); } } catch (error: any) { - if (error.message?.includes('Container provisioning failed') || error.message?.includes('exited with code')) { + if ( + error.message?.includes('Container provisioning failed') || + error.message?.includes('exited with code') + ) { throw error; } OrchestratorLogger.logWarning(`[Azure ACI] Polling error: ${error.message}`); } - // Wait before next poll await new Promise((resolve) => setTimeout(resolve, pollIntervalMs)); } @@ -324,8 +472,7 @@ class AzureAciProvider implements ProviderInterface { branchName: string, defaultSecretsArray: { ParameterKey: string; EnvironmentVariable: string; ParameterValue: string }[], ) { - OrchestratorLogger.log(`[Azure ACI] Cleaning up workflow`); - // ACI containers with restart-policy=Never auto-stop; cleanup is done during garbage collection + OrchestratorLogger.log('[Azure ACI] Cleaning up workflow'); } async garbageCollect( @@ -335,7 +482,7 @@ class AzureAciProvider implements ProviderInterface { fullCache: boolean, baseDependencies: boolean, ): Promise { - OrchestratorLogger.log(`[Azure ACI] Garbage collecting old container groups`); + OrchestratorLogger.log('[Azure ACI] Garbage collecting old container groups'); try { const containersJson = await OrchestratorSystem.Run( @@ -353,10 +500,11 @@ class AzureAciProvider implements ProviderInterface { const name = container.name || ''; if (!name.startsWith('unity-build-')) continue; - const createdAt = new Date(container.tags?.createdAt || container.properties?.provisioningState || 0); + const createdAt = new Date( + container.tags?.createdAt || container.properties?.provisioningState || 0, + ); const state = container.containers?.[0]?.instanceView?.currentState?.state || ''; - // Delete terminated containers older than the threshold if (state === 'Terminated' || createdAt < cutoffDate) { if (previewOnly) { OrchestratorLogger.log(`[Azure ACI] Would delete: ${name}`); diff --git a/src/model/orchestrator/providers/gcp-cloud-run/index.ts b/src/model/orchestrator/providers/gcp-cloud-run/index.ts index e66d6909..c320eb1f 100644 --- a/src/model/orchestrator/providers/gcp-cloud-run/index.ts +++ b/src/model/orchestrator/providers/gcp-cloud-run/index.ts @@ -1,20 +1,26 @@ /** * Google Cloud Run Jobs Provider (Experimental) * - * Executes Unity builds as Cloud Run Jobs with Cloud Storage (GCS) for large artifact storage. + * Executes Unity builds as Cloud Run Jobs with configurable storage backends. + * + * Storage types: + * - gcs-fuse: Mount a GCS bucket as a POSIX filesystem via GCS FUSE sidecar. + * Unlimited size, best for large sequential reads/writes. + * Requires: gcpBucket + * - gcs-copy: Copy artifacts in/out of GCS before/after the build via gsutil. + * No mount overhead, simpler, works everywhere. + * Requires: gcpBucket + * - nfs: Mount a Filestore NFS share. True POSIX semantics, good random I/O, + * up to 100 TiB. Best for Library caching (many small random reads). + * Requires: gcpFilestoreIp, gcpFilestoreShare + * - in-memory: tmpfs volume (emptyDir). Fastest I/O but volatile and limited to 32 GiB. + * Good for scratch/temp space during builds. * * Prerequisites: * - Google Cloud SDK authenticated (GOOGLE_APPLICATION_CREDENTIALS or gcloud auth) * - Cloud Run Jobs API enabled - * - A GCS bucket for build artifacts * - Service account with roles: Cloud Run Admin, Storage Admin, Logs Viewer * - * Architecture: - * - Uses Cloud Run Jobs (not Services) for one-off build execution - * - GCS FUSE sidecar mounts a bucket as a local filesystem for large artifact I/O - * - Cloud Logging streams build output in real-time - * - Supports volumes up to 32 GiB in-memory or unlimited via GCS FUSE - * * @experimental This provider is experimental. APIs and behavior may change. */ @@ -29,10 +35,15 @@ import { OrchestratorSystem } from '../../services/core/orchestrator-system'; import { Input } from '../../..'; import ResourceTracking from '../../services/core/resource-tracking'; +type GcpStorageType = 'gcs-fuse' | 'gcs-copy' | 'nfs' | 'in-memory'; + class GcpCloudRunProvider implements ProviderInterface { private readonly project: string; private readonly region: string; + private readonly storageType: GcpStorageType; private readonly bucket: string; + private readonly filestoreIp: string; + private readonly filestoreShare: string; private readonly machineType: string; private readonly diskSizeGb: number; private readonly serviceAccount: string; @@ -43,7 +54,10 @@ class GcpCloudRunProvider implements ProviderInterface { this.buildParameters = buildParameters; this.project = buildParameters.gcpProject || process.env.GOOGLE_CLOUD_PROJECT || process.env.GCLOUD_PROJECT || ''; this.region = buildParameters.gcpRegion || Input.region || 'us-central1'; + this.storageType = (buildParameters.gcpStorageType || 'gcs-fuse') as GcpStorageType; this.bucket = buildParameters.gcpBucket || ''; + this.filestoreIp = buildParameters.gcpFilestoreIp || ''; + this.filestoreShare = buildParameters.gcpFilestoreShare || '/share1'; this.machineType = buildParameters.gcpMachineType || 'e2-standard-4'; this.diskSizeGb = Number.parseInt(buildParameters.gcpDiskSizeGb || '100', 10); this.serviceAccount = buildParameters.gcpServiceAccount || ''; @@ -52,8 +66,43 @@ class GcpCloudRunProvider implements ProviderInterface { OrchestratorLogger.log('[GCP Cloud Run] Provider initialized (EXPERIMENTAL)'); OrchestratorLogger.log(`[GCP Cloud Run] Project: ${this.project || '(auto-detect)'}`); OrchestratorLogger.log(`[GCP Cloud Run] Region: ${this.region}`); - OrchestratorLogger.log(`[GCP Cloud Run] Bucket: ${this.bucket || '(none)'}`); - OrchestratorLogger.log(`[GCP Cloud Run] Disk size: ${this.diskSizeGb}GB`); + OrchestratorLogger.log(`[GCP Cloud Run] Storage: ${this.storageType}`); + + this.validateStorageConfig(); + } + + private validateStorageConfig(): void { + switch (this.storageType) { + case 'gcs-fuse': + case 'gcs-copy': + if (!this.bucket) { + OrchestratorLogger.logWarning( + `[GCP Cloud Run] Storage type '${this.storageType}' requires gcpBucket to be set.`, + ); + } else { + OrchestratorLogger.log(`[GCP Cloud Run] Bucket: gs://${this.bucket}`); + } + break; + case 'nfs': + if (!this.filestoreIp) { + OrchestratorLogger.logWarning('[GCP Cloud Run] Storage type "nfs" requires gcpFilestoreIp to be set.'); + } else { + OrchestratorLogger.log(`[GCP Cloud Run] Filestore: ${this.filestoreIp}:${this.filestoreShare}`); + } + if (!this.vpcConnector) { + OrchestratorLogger.logWarning( + '[GCP Cloud Run] NFS storage usually requires gcpVpcConnector for private network access to Filestore.', + ); + } + break; + case 'in-memory': + OrchestratorLogger.log(`[GCP Cloud Run] In-memory volume: ${Math.min(this.diskSizeGb, 32)} GiB (max 32)`); + break; + default: + OrchestratorLogger.logWarning( + `[GCP Cloud Run] Unknown storage type '${this.storageType}'. Valid: gcs-fuse, gcs-copy, nfs, in-memory`, + ); + } if (!this.project) { OrchestratorLogger.logWarning( @@ -73,8 +122,8 @@ class GcpCloudRunProvider implements ProviderInterface { // Verify gcloud CLI is available try { - const version = await OrchestratorSystem.Run('gcloud --version', false, true); - OrchestratorLogger.log(`[GCP Cloud Run] gcloud CLI detected`); + await OrchestratorSystem.Run('gcloud --version', false, true); + OrchestratorLogger.log('[GCP Cloud Run] gcloud CLI detected'); } catch { throw new Error( '[GCP Cloud Run] gcloud CLI not found. Install Google Cloud SDK: https://cloud.google.com/sdk/docs/install', @@ -89,27 +138,84 @@ class GcpCloudRunProvider implements ProviderInterface { false, true, ); - } catch (error) { + } catch { OrchestratorLogger.logWarning( - `[GCP Cloud Run] Could not verify Cloud Run API status. Ensure run.googleapis.com is enabled.`, + '[GCP Cloud Run] Could not verify Cloud Run API status. Ensure run.googleapis.com is enabled.', ); } - // Create GCS bucket for artifacts if specified and doesn't exist - if (this.bucket) { - try { - await OrchestratorSystem.Run(`gcloud storage buckets describe gs://${this.bucket} --format="value(name)"`, false, true); - OrchestratorLogger.log(`[GCP Cloud Run] Bucket gs://${this.bucket} exists`); - } catch { - OrchestratorLogger.log(`[GCP Cloud Run] Creating bucket gs://${this.bucket}`); - const projectFlag = this.project ? `--project=${this.project}` : ''; - await OrchestratorSystem.Run( - `gcloud storage buckets create gs://${this.bucket} --location=${this.region} ${projectFlag}`, - ); - } + // Storage-specific setup + if ((this.storageType === 'gcs-fuse' || this.storageType === 'gcs-copy') && this.bucket) { + await this.ensureBucketExists(); } } + private async ensureBucketExists(): Promise { + try { + await OrchestratorSystem.Run( + `gcloud storage buckets describe gs://${this.bucket} --format="value(name)"`, + false, + true, + ); + OrchestratorLogger.log(`[GCP Cloud Run] Bucket gs://${this.bucket} exists`); + } catch { + OrchestratorLogger.log(`[GCP Cloud Run] Creating bucket gs://${this.bucket}`); + const projectFlag = this.project ? `--project=${this.project}` : ''; + await OrchestratorSystem.Run( + `gcloud storage buckets create gs://${this.bucket} --location=${this.region} ${projectFlag}`, + ); + } + } + + private buildVolumeFlags(mountdir: string): { volumeFlags: string; mountFlags: string } { + switch (this.storageType) { + case 'gcs-fuse': + if (!this.bucket) return { volumeFlags: '', mountFlags: '' }; + return { + volumeFlags: `--add-volume=name=gcs-fuse,type=cloud-storage,bucket=${this.bucket}`, + mountFlags: `--add-volume-mount=volume=gcs-fuse,mount-path=${mountdir}`, + }; + + case 'nfs': + if (!this.filestoreIp) return { volumeFlags: '', mountFlags: '' }; + return { + volumeFlags: `--add-volume=name=nfs-vol,type=nfs,location=${this.filestoreIp}:${this.filestoreShare}`, + mountFlags: `--add-volume-mount=volume=nfs-vol,mount-path=${mountdir}`, + }; + + case 'in-memory': { + const sizeGib = Math.min(this.diskSizeGb, 32); + return { + volumeFlags: `--add-volume=name=tmpfs-vol,type=in-memory,size-limit=${sizeGib}Gi`, + mountFlags: `--add-volume-mount=volume=tmpfs-vol,mount-path=${mountdir}`, + }; + } + + case 'gcs-copy': + // No volume mount — artifacts are copied in/out via gsutil commands + return { volumeFlags: '', mountFlags: '' }; + + default: + return { volumeFlags: '', mountFlags: '' }; + } + } + + private async copyArtifactsIn(mountdir: string): Promise { + if (this.storageType !== 'gcs-copy' || !this.bucket) return; + OrchestratorLogger.log(`[GCP Cloud Run] Copying artifacts from gs://${this.bucket} to ${mountdir}`); + try { + await OrchestratorSystem.Run(`gcloud storage cp -r "gs://${this.bucket}/*" "${mountdir}/" || true`, false, true); + } catch { + OrchestratorLogger.log('[GCP Cloud Run] No existing artifacts to restore (bucket may be empty)'); + } + } + + private async copyArtifactsOut(mountdir: string): Promise { + if (this.storageType !== 'gcs-copy' || !this.bucket) return; + OrchestratorLogger.log(`[GCP Cloud Run] Uploading artifacts from ${mountdir} to gs://${this.bucket}`); + await OrchestratorSystem.Run(`gcloud storage cp -r "${mountdir}/*" "gs://${this.bucket}/"`, false, true); + } + async runTaskInWorkflow( buildGuid: string, image: string, @@ -129,21 +235,22 @@ class GcpCloudRunProvider implements ProviderInterface { const envFlags = environment .map((env) => `${env.name}=${env.value}`) .concat(secrets.map((s) => `${s.EnvironmentVariable}=${s.ParameterValue}`)); - const envString = envFlags.length > 0 ? `--set-env-vars="${envFlags.join(',')}"` : ''; - // Build volume and mount flags for GCS FUSE - let volumeFlags = ''; - let mountFlags = ''; - if (this.bucket) { - volumeFlags = `--add-volume=name=gcs-fuse,type=cloud-storage,bucket=${this.bucket}`; - mountFlags = `--add-volume-mount=volume=gcs-fuse,mount-path=${mountdir}`; + // Build storage volume flags + const { volumeFlags, mountFlags } = this.buildVolumeFlags(mountdir); + + // For gcs-copy, wrap the user command with copy-in/copy-out steps + let effectiveCommands = commands; + if (this.storageType === 'gcs-copy' && this.bucket && commands) { + effectiveCommands = [ + `gcloud storage cp -r "gs://${this.bucket}/*" "${mountdir}/" 2>/dev/null || true`, + commands, + `gcloud storage cp -r "${mountdir}/*" "gs://${this.bucket}/"`, + ].join(' && '); } - // Service account flag const saFlag = this.serviceAccount ? `--service-account=${this.serviceAccount}` : ''; - - // VPC connector for private networking const vpcFlag = this.vpcConnector ? `--vpc-connector=${this.vpcConnector}` : ''; // Create the Cloud Run Job @@ -152,10 +259,10 @@ class GcpCloudRunProvider implements ProviderInterface { jobName, `--image=${image}`, `--region=${this.region}`, - `--task-timeout=86400s`, - `--max-retries=0`, - `--cpu=4`, - `--memory=16Gi`, + '--task-timeout=86400s', + '--max-retries=0', + '--cpu=4', + '--memory=16Gi', volumeFlags, mountFlags, envString, @@ -172,7 +279,6 @@ class GcpCloudRunProvider implements ProviderInterface { await OrchestratorSystem.Run(createCmd); OrchestratorLogger.log(`[GCP Cloud Run] Job ${jobName} created`); } catch (error: any) { - // Job might already exist from a retry if (error.message?.includes('already exists')) { OrchestratorLogger.log(`[GCP Cloud Run] Job ${jobName} already exists, updating...`); const updateCmd = createCmd.replace('jobs create', 'jobs update'); @@ -183,13 +289,13 @@ class GcpCloudRunProvider implements ProviderInterface { } // Override the command if provided - if (commands) { + if (effectiveCommands) { const updateCmd = [ 'gcloud run jobs update', jobName, `--region=${this.region}`, - `--command="/bin/sh"`, - `--args="-c,${commands}"`, + '--command="/bin/sh"', + `--args="-c,${effectiveCommands}"`, projectFlag, '--quiet', ] @@ -200,7 +306,7 @@ class GcpCloudRunProvider implements ProviderInterface { } // Execute the job - OrchestratorLogger.log(`[GCP Cloud Run] Executing job ${jobName}...`); + OrchestratorLogger.log(`[GCP Cloud Run] Executing job ${jobName} (storage: ${this.storageType})...`); const executeCmd = [ 'gcloud run jobs execute', jobName, @@ -216,16 +322,13 @@ class GcpCloudRunProvider implements ProviderInterface { let output = ''; try { output = await OrchestratorSystem.Run(executeCmd); - OrchestratorLogger.log(`[GCP Cloud Run] Job execution completed`); + OrchestratorLogger.log('[GCP Cloud Run] Job execution completed'); } catch (error: any) { - // Try to get logs even on failure await this.streamJobLogs(jobName); throw new Error(`[GCP Cloud Run] Job execution failed: ${error.message}`); } - // Stream logs await this.streamJobLogs(jobName); - return output; } @@ -245,7 +348,7 @@ class GcpCloudRunProvider implements ProviderInterface { } } } catch { - OrchestratorLogger.logWarning(`[GCP Cloud Run] Could not retrieve job logs`); + OrchestratorLogger.logWarning('[GCP Cloud Run] Could not retrieve job logs'); } } @@ -254,8 +357,7 @@ class GcpCloudRunProvider implements ProviderInterface { branchName: string, defaultSecretsArray: { ParameterKey: string; EnvironmentVariable: string; ParameterValue: string }[], ) { - OrchestratorLogger.log(`[GCP Cloud Run] Cleaning up workflow`); - // Cloud Run Jobs auto-cleanup after execution; explicit delete is optional + OrchestratorLogger.log('[GCP Cloud Run] Cleaning up workflow'); } async garbageCollect( @@ -265,11 +367,10 @@ class GcpCloudRunProvider implements ProviderInterface { fullCache: boolean, baseDependencies: boolean, ): Promise { - OrchestratorLogger.log(`[GCP Cloud Run] Garbage collecting old jobs`); + OrchestratorLogger.log('[GCP Cloud Run] Garbage collecting old jobs'); const projectFlag = this.project ? `--project=${this.project}` : ''; try { - // List old jobs matching the unity-build prefix const jobsJson = await OrchestratorSystem.Run( `gcloud run jobs list --region=${this.region} ${projectFlag} --filter="metadata.name~unity-build-" --format="json(metadata.name,metadata.creationTimestamp)"`, false,