Rename Cloud Runner to Orchestrator (#775)

* Rename "Cloud Runner" to "Orchestrator" across entire codebase

Breaking change: All CloudRunner classes, options, environment variables,
and action.yml inputs have been renamed to Orchestrator equivalents.

- Renamed src/model/cloud-runner/ directory to src/model/orchestrator/
- Renamed all cloud-runner-* files to orchestrator-*
- Renamed all CloudRunner* classes to Orchestrator* (15+ classes)
- Renamed all cloudRunner* properties to orchestrator* equivalents
- Renamed CLOUD_RUNNER_* env vars to ORCHESTRATOR_*
- Updated action.yml [CloudRunner] markers to [Orchestrator]
- Updated workflow files and package.json test scripts
- Updated all runtime strings (cache paths, log messages, branch refs)
- Rebuilt dist/index.js

No backward compatibility layer is provided.

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>

* Remove tracked log/temp files and add to .gitignore

Remove $LOG_FILE and temp/job-log.txt debug artifacts that should
not be in the repository.

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>

---------

Co-authored-by: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
Frostebite
2026-03-04 21:53:47 +00:00
committed by GitHub
parent f3849ee1c9
commit 9d475434d3
117 changed files with 4756 additions and 4745 deletions
@@ -0,0 +1,460 @@
import * as k8s from '@kubernetes/client-node';
import { BuildParameters } from '../../..';
import * as core from '@actions/core';
import { ProviderInterface } from '../provider-interface';
import OrchestratorSecret from '../../options/orchestrator-secret';
import KubernetesStorage from './kubernetes-storage';
import OrchestratorEnvironmentVariable from '../../options/orchestrator-environment-variable';
import KubernetesTaskRunner from './kubernetes-task-runner';
import KubernetesSecret from './kubernetes-secret';
import KubernetesJobSpecFactory from './kubernetes-job-spec-factory';
import KubernetesServiceAccount from './kubernetes-service-account';
import OrchestratorLogger from '../../services/core/orchestrator-logger';
import { CoreV1Api } from '@kubernetes/client-node';
import Orchestrator from '../../orchestrator';
import { ProviderResource } from '../provider-resource';
import { ProviderWorkflow } from '../provider-workflow';
import { RemoteClientLogger } from '../../remote-client/remote-client-logger';
import { KubernetesRole } from './kubernetes-role';
import { OrchestratorSystem } from '../../services/core/orchestrator-system';
import ResourceTracking from '../../services/core/resource-tracking';
class Kubernetes implements ProviderInterface {
public static Instance: Kubernetes;
public kubeConfig!: k8s.KubeConfig;
public kubeClient!: k8s.CoreV1Api;
public kubeClientApps!: k8s.AppsV1Api;
public kubeClientBatch!: k8s.BatchV1Api;
public rbacAuthorizationV1Api!: k8s.RbacAuthorizationV1Api;
public buildGuid: string = '';
public buildParameters!: BuildParameters;
public pvcName: string = '';
public secretName: string = '';
public jobName: string = '';
public namespace!: string;
public podName: string = '';
public containerName: string = '';
public cleanupCronJobName: string = '';
public serviceAccountName: string = '';
public ip: string = '';
constructor(buildParameters: BuildParameters) {
Kubernetes.Instance = this;
this.kubeConfig = new k8s.KubeConfig();
this.kubeConfig.loadFromDefault();
this.kubeClient = this.kubeConfig.makeApiClient(k8s.CoreV1Api);
this.kubeClientApps = this.kubeConfig.makeApiClient(k8s.AppsV1Api);
this.kubeClientBatch = this.kubeConfig.makeApiClient(k8s.BatchV1Api);
this.rbacAuthorizationV1Api = this.kubeConfig.makeApiClient(k8s.RbacAuthorizationV1Api);
this.namespace = buildParameters.containerNamespace ? buildParameters.containerNamespace : 'default';
OrchestratorLogger.log('Loaded default Kubernetes configuration for this environment');
}
async PushLogUpdate(logs: string) {
// push logs to nginx file server via 'LOG_SERVICE_IP' env var
const ip = process.env[`LOG_SERVICE_IP`];
if (ip === undefined) {
RemoteClientLogger.logWarning(`LOG_SERVICE_IP not set, skipping log push`);
return;
}
const url = `http://${ip}/api/log`;
RemoteClientLogger.log(`Pushing logs to ${url}`);
// logs to base64
logs = Buffer.from(logs).toString('base64');
const response = await OrchestratorSystem.Run(`curl -X POST -d "${logs}" ${url}`, false, true);
RemoteClientLogger.log(`Pushed logs to ${url} ${response}`);
}
async listResources(): Promise<ProviderResource[]> {
const pods = await this.kubeClient.listNamespacedPod(this.namespace);
const serviceAccounts = await this.kubeClient.listNamespacedServiceAccount(this.namespace);
const secrets = await this.kubeClient.listNamespacedSecret(this.namespace);
const jobs = await this.kubeClientBatch.listNamespacedJob(this.namespace);
return [
...pods.body.items.map((x) => {
return { Name: x.metadata?.name || `` };
}),
...serviceAccounts.body.items.map((x) => {
return { Name: x.metadata?.name || `` };
}),
...secrets.body.items.map((x) => {
return { Name: x.metadata?.name || `` };
}),
...jobs.body.items.map((x) => {
return { Name: x.metadata?.name || `` };
}),
];
}
listWorkflow(): Promise<ProviderWorkflow[]> {
throw new Error('Method not implemented.');
}
watchWorkflow(): Promise<string> {
throw new Error('Method not implemented.');
}
garbageCollect(
// eslint-disable-next-line no-unused-vars
filter: string,
// eslint-disable-next-line no-unused-vars
previewOnly: boolean,
// eslint-disable-next-line no-unused-vars
olderThan: Number,
// eslint-disable-next-line no-unused-vars
fullCache: boolean,
// eslint-disable-next-line no-unused-vars
baseDependencies: boolean,
): Promise<string> {
return new Promise((result) => result(``));
}
public async setupWorkflow(
buildGuid: string,
buildParameters: BuildParameters,
// eslint-disable-next-line no-unused-vars
branchName: string,
// eslint-disable-next-line no-unused-vars
defaultSecretsArray: { ParameterKey: string; EnvironmentVariable: string; ParameterValue: string }[],
) {
try {
this.buildParameters = buildParameters;
this.cleanupCronJobName = `unity-builder-cronjob-${buildParameters.buildGuid}`;
this.serviceAccountName = `service-account-${buildParameters.buildGuid}`;
await KubernetesServiceAccount.createServiceAccount(this.serviceAccountName, this.namespace, this.kubeClient);
} catch (error) {
throw error;
}
}
async runTaskInWorkflow(
buildGuid: string,
image: string,
commands: string,
mountdir: string,
workingdir: string,
environment: OrchestratorEnvironmentVariable[],
secrets: OrchestratorSecret[],
): Promise<string> {
try {
OrchestratorLogger.log('Orchestrator K8s workflow!');
ResourceTracking.logAllocationSummary('k8s workflow');
await ResourceTracking.logDiskUsageSnapshot('k8s workflow (host)');
await ResourceTracking.logK3dNodeDiskUsage('k8s workflow (before job)');
// Setup
const id =
BuildParameters && BuildParameters.shouldUseRetainedWorkspaceMode(this.buildParameters)
? Orchestrator.lockedWorkspace
: this.buildParameters.buildGuid;
this.pvcName = `unity-builder-pvc-${id}`;
await KubernetesStorage.createPersistentVolumeClaim(
this.buildParameters,
this.pvcName,
this.kubeClient,
this.namespace,
);
this.buildGuid = buildGuid;
this.secretName = `build-credentials-${this.buildGuid}`;
this.jobName = `unity-builder-job-${this.buildGuid}`;
this.containerName = `main`;
await KubernetesSecret.createSecret(secrets, this.secretName, this.namespace, this.kubeClient);
// For tests, clean up old images before creating job to free space for image pull
// IMPORTANT: Preserve the Unity image to avoid re-pulling it
if (process.env['orchestratorTests'] === 'true') {
try {
OrchestratorLogger.log('Cleaning up old images in k3d node before pulling new image...');
const { OrchestratorSystem: OrchestratorSystemModule } = await import(
'../../services/core/orchestrator-system'
);
// Aggressive cleanup: remove stopped containers and non-Unity images
// IMPORTANT: Preserve Unity images (unityci/editor) to avoid re-pulling the 3.9GB image
const K3D_NODE_CONTAINERS = ['k3d-unity-builder-agent-0', 'k3d-unity-builder-server-0'];
const cleanupCommands: string[] = [];
for (const NODE of K3D_NODE_CONTAINERS) {
// Remove all stopped containers (this frees runtime space but keeps images)
cleanupCommands.push(
`docker exec ${NODE} sh -c "crictl rm --all 2>/dev/null || true" || true`,
`docker exec ${NODE} sh -c "for img in $(crictl images -q 2>/dev/null); do repo=$(crictl inspecti $img --format '{{.repo}}' 2>/dev/null || echo ''); if echo "$repo" | grep -qvE 'unityci/editor|unity'; then crictl rmi $img 2>/dev/null || true; fi; done" || true`,
`docker exec ${NODE} sh -c "crictl rmi --prune 2>/dev/null || true" || true`,
);
}
for (const cmd of cleanupCommands) {
try {
await OrchestratorSystemModule.Run(cmd, true, true);
} catch (cmdError) {
// Ignore individual command failures - cleanup is best effort
OrchestratorLogger.log(`Cleanup command failed (non-fatal): ${cmdError}`);
}
}
OrchestratorLogger.log('Cleanup completed (containers and non-Unity images removed, Unity images preserved)');
} catch (cleanupError) {
OrchestratorLogger.logWarning(`Failed to cleanup images before job creation: ${cleanupError}`);
// Continue anyway - image might already be cached
}
}
let output = '';
try {
// Before creating the job, verify we have the Unity image cached on the agent node
// If not cached, try to ensure it's available to avoid disk pressure during pull
if (process.env['orchestratorTests'] === 'true' && image.includes('unityci/editor')) {
try {
const { OrchestratorSystem: OrchestratorSystemModule2 } = await import(
'../../services/core/orchestrator-system'
);
// Check if image is cached on agent node (where pods run)
const agentImageCheck = await OrchestratorSystemModule2.Run(
`docker exec k3d-unity-builder-agent-0 sh -c "crictl images | grep -q unityci/editor && echo 'cached' || echo 'not_cached'" || echo 'not_cached'`,
true,
true,
);
if (agentImageCheck.includes('not_cached')) {
// Check if image is on server node
const serverImageCheck = await OrchestratorSystemModule2.Run(
`docker exec k3d-unity-builder-server-0 sh -c "crictl images | grep -q unityci/editor && echo 'cached' || echo 'not_cached'" || echo 'not_cached'`,
true,
true,
);
// Check available disk space on agent node
const diskInfo = await OrchestratorSystemModule2.Run(
'docker exec k3d-unity-builder-agent-0 sh -c "df -h /var/lib/rancher/k3s 2>/dev/null | tail -1 || df -h / 2>/dev/null | tail -1 || echo unknown" || echo unknown',
true,
true,
);
OrchestratorLogger.logWarning(
`Unity image not cached on agent node (where pods run). Server node: ${
serverImageCheck.includes('cached') ? 'has image' : 'no image'
}. Disk info: ${diskInfo.trim()}. Pod will attempt to pull image (3.9GB) which may fail due to disk pressure.`,
);
// If image is on server but not agent, log a warning
// NOTE: We don't attempt to pull here because:
// 1. Pulling a 3.9GB image can take several minutes and block the test
// 2. If there's not enough disk space, the pull will hang indefinitely
// 3. The pod will attempt to pull during scheduling anyway
// 4. If the pull fails, Kubernetes will provide proper error messages
if (serverImageCheck.includes('cached')) {
OrchestratorLogger.logWarning(
'Unity image exists on server node but not agent node. Pod will attempt to pull during scheduling. If pull fails due to disk pressure, ensure cleanup runs before this test.',
);
} else {
// Image not on either node - check if we have enough space to pull
// Extract available space from disk info
const availableSpaceMatch = diskInfo.match(/(\d+(?:\.\d+)?)\s*([gkm]?i?b)/i);
if (availableSpaceMatch) {
const availableValue = Number.parseFloat(availableSpaceMatch[1]);
const availableUnit = availableSpaceMatch[2].toUpperCase();
let availableGB = availableValue;
if (availableUnit.includes('M')) {
availableGB = availableValue / 1024;
} else if (availableUnit.includes('K')) {
availableGB = availableValue / (1024 * 1024);
}
// Unity image is ~3.9GB, need at least 4.5GB to be safe
if (availableGB < 4.5) {
OrchestratorLogger.logWarning(
`CRITICAL: Unity image not cached and only ${availableGB.toFixed(
2,
)}GB available. Image pull (3.9GB) will likely fail. Consider running cleanup or ensuring pre-pull step succeeds.`,
);
}
}
}
} else {
OrchestratorLogger.log('Unity image is cached on agent node - pod should start without pulling');
}
} catch (checkError) {
// Ignore check errors - continue with job creation
OrchestratorLogger.logWarning(`Failed to verify Unity image cache: ${checkError}`);
}
}
OrchestratorLogger.log('Job does not exist');
await this.createJob(commands, image, mountdir, workingdir, environment, secrets);
OrchestratorLogger.log('Watching pod until running');
await KubernetesTaskRunner.watchUntilPodRunning(this.kubeClient, this.podName, this.namespace);
OrchestratorLogger.log('Pod is running');
output += await KubernetesTaskRunner.runTask(
this.kubeConfig,
this.kubeClient,
this.jobName,
this.podName,
this.containerName,
this.namespace,
);
} catch (error: any) {
OrchestratorLogger.log(`error running k8s workflow ${error}`);
await new Promise((resolve) => setTimeout(resolve, 3000));
OrchestratorLogger.log(
JSON.stringify(
(await this.kubeClient.listNamespacedEvent(this.namespace)).body.items
.map((x) => {
return {
message: x.message || ``,
name: x.metadata.name || ``,
reason: x.reason || ``,
};
})
.filter((x) => x.name.includes(this.podName)),
undefined,
4,
),
);
await this.cleanupTaskResources();
throw error;
}
await this.cleanupTaskResources();
return output;
} catch (error) {
OrchestratorLogger.log('Running job failed');
core.error(JSON.stringify(error, undefined, 4));
// await this.cleanupTaskResources();
throw error;
}
}
private async createJob(
commands: string,
image: string,
mountdir: string,
workingdir: string,
environment: OrchestratorEnvironmentVariable[],
secrets: OrchestratorSecret[],
) {
await this.createNamespacedJob(commands, image, mountdir, workingdir, environment, secrets);
const find = await Kubernetes.findPodFromJob(this.kubeClient, this.jobName, this.namespace);
this.setPodNameAndContainerName(find);
}
private async doesJobExist(name: string) {
const jobs = await this.kubeClientBatch.listNamespacedJob(this.namespace);
return jobs.body.items.some((x) => x.metadata?.name === name);
}
private async doesFailedJobExist() {
const podStatus = await this.kubeClient.readNamespacedPodStatus(this.podName, this.namespace);
return podStatus.body.status?.phase === `Failed`;
}
private async createNamespacedJob(
commands: string,
image: string,
mountdir: string,
workingdir: string,
environment: OrchestratorEnvironmentVariable[],
secrets: OrchestratorSecret[],
) {
for (let index = 0; index < 3; index++) {
try {
const jobSpec = KubernetesJobSpecFactory.getJobSpec(
commands,
image,
mountdir,
workingdir,
environment,
secrets,
this.buildGuid,
this.buildParameters,
this.secretName,
this.pvcName,
this.jobName,
k8s,
this.containerName,
this.ip,
);
await new Promise((promise) => setTimeout(promise, 15000));
// await KubernetesRole.createRole(this.serviceAccountName, this.namespace, this.rbacAuthorizationV1Api);
const result = await this.kubeClientBatch.createNamespacedJob(this.namespace, jobSpec);
OrchestratorLogger.log(`Build job created`);
await new Promise((promise) => setTimeout(promise, 5000));
OrchestratorLogger.log('Job created');
return result.body.metadata?.name;
} catch (error) {
OrchestratorLogger.log(`Error occured creating job: ${error}`);
throw error;
}
}
}
setPodNameAndContainerName(pod: k8s.V1Pod) {
this.podName = pod.metadata?.name || '';
this.containerName = pod.status?.containerStatuses?.[0].name || this.containerName;
}
async cleanupTaskResources() {
OrchestratorLogger.log('cleaning up');
try {
await this.kubeClientBatch.deleteNamespacedJob(this.jobName, this.namespace);
await this.kubeClient.deleteNamespacedPod(this.podName, this.namespace);
await KubernetesRole.deleteRole(this.serviceAccountName, this.namespace, this.rbacAuthorizationV1Api);
} catch (error: any) {
OrchestratorLogger.log(`Failed to cleanup`);
if (error.response.body.reason !== `NotFound`) {
OrchestratorLogger.log(`Wasn't a not found error: ${error.response.body.reason}`);
throw error;
}
}
try {
await this.kubeClient.deleteNamespacedSecret(this.secretName, this.namespace);
} catch (error: any) {
OrchestratorLogger.log(`Failed to cleanup secret`);
OrchestratorLogger.log(error.response.body.reason);
}
OrchestratorLogger.log('cleaned up Secret, Job and Pod');
OrchestratorLogger.log('cleaning up finished');
}
async cleanupWorkflow(
buildParameters: BuildParameters,
// eslint-disable-next-line no-unused-vars
branchName: string,
// eslint-disable-next-line no-unused-vars
defaultSecretsArray: { ParameterKey: string; EnvironmentVariable: string; ParameterValue: string }[],
) {
if (BuildParameters && BuildParameters.shouldUseRetainedWorkspaceMode(buildParameters)) {
return;
}
OrchestratorLogger.log(`deleting PVC`);
try {
await this.kubeClient.deleteNamespacedPersistentVolumeClaim(this.pvcName, this.namespace);
await this.kubeClient.deleteNamespacedServiceAccount(this.serviceAccountName, this.namespace);
OrchestratorLogger.log('cleaned up PVC and Service Account');
} catch (error: any) {
OrchestratorLogger.log(`Cleanup failed ${JSON.stringify(error, undefined, 4)}`);
throw error;
}
}
static async findPodFromJob(kubeClient: CoreV1Api, jobName: string, namespace: string) {
const namespacedPods = await kubeClient.listNamespacedPod(namespace);
const pod = namespacedPods.body.items.find((x) => x.metadata?.labels?.['job-name'] === jobName);
if (pod === undefined) {
throw new Error("pod with job-name label doesn't exist");
}
return pod;
}
}
export default Kubernetes;
@@ -0,0 +1,208 @@
import { V1EnvVar, V1EnvVarSource, V1SecretKeySelector } from '@kubernetes/client-node';
import BuildParameters from '../../../build-parameters';
import { CommandHookService } from '../../services/hooks/command-hook-service';
import OrchestratorEnvironmentVariable from '../../options/orchestrator-environment-variable';
import OrchestratorSecret from '../../options/orchestrator-secret';
import Orchestrator from '../../orchestrator';
import OrchestratorLogger from '../../services/core/orchestrator-logger';
class KubernetesJobSpecFactory {
static getJobSpec(
command: string,
image: string,
mountdir: string,
workingDirectory: string,
environment: OrchestratorEnvironmentVariable[],
secrets: OrchestratorSecret[],
buildGuid: string,
buildParameters: BuildParameters,
secretName: string,
pvcName: string,
jobName: string,
k8s: any,
containerName: string,
ip: string = '',
) {
const endpointEnvironmentNames = new Set([
'AWS_S3_ENDPOINT',
'AWS_ENDPOINT',
'AWS_CLOUD_FORMATION_ENDPOINT',
'AWS_ECS_ENDPOINT',
'AWS_KINESIS_ENDPOINT',
'AWS_CLOUD_WATCH_LOGS_ENDPOINT',
'INPUT_AWSS3ENDPOINT',
'INPUT_AWSENDPOINT',
]);
// Determine the LocalStack hostname to use for K8s pods
// Priority: K8S_LOCALSTACK_HOST env var > localstack-main (container name on shared network)
// Note: Using K8S_LOCALSTACK_HOST instead of LOCALSTACK_HOST to avoid conflict with awslocal CLI
const localstackHost = process.env['K8S_LOCALSTACK_HOST'] || 'localstack-main';
OrchestratorLogger.log(`K8s pods will use LocalStack host: ${localstackHost}`);
const adjustedEnvironment = environment.map((x) => {
let value = x.value;
if (
typeof value === 'string' &&
endpointEnvironmentNames.has(x.name) &&
(value.startsWith('http://localhost') || value.startsWith('http://127.0.0.1'))
) {
// Replace localhost with the LocalStack container hostname
// When k3d and LocalStack are on the same Docker network, pods can reach LocalStack by container name
value = value
.replace('http://localhost', `http://${localstackHost}`)
.replace('http://127.0.0.1', `http://${localstackHost}`);
OrchestratorLogger.log(`Replaced localhost with ${localstackHost} for ${x.name}: ${value}`);
}
return { name: x.name, value } as OrchestratorEnvironmentVariable;
});
const job = new k8s.V1Job();
job.apiVersion = 'batch/v1';
job.kind = 'Job';
job.metadata = {
name: jobName,
labels: {
app: 'unity-builder',
buildGuid,
},
};
// Reduce TTL for tests to free up resources faster (default 9999s = ~2.8 hours)
// For CI/test environments, use shorter TTL (300s = 5 minutes) to prevent disk pressure
const jobTTL = process.env['orchestratorTests'] === 'true' ? 300 : 9999;
job.spec = {
ttlSecondsAfterFinished: jobTTL,
backoffLimit: 0,
template: {
spec: {
terminationGracePeriodSeconds: 90, // Give PreStopHook (60s sleep) time to complete
volumes: [
{
name: 'build-mount',
persistentVolumeClaim: {
claimName: pvcName,
},
},
],
containers: [
{
ttlSecondsAfterFinished: 9999,
name: containerName,
image,
imagePullPolicy: process.env['orchestratorTests'] === 'true' ? 'IfNotPresent' : 'Always',
command: ['/bin/sh'],
args: [
'-c',
`${CommandHookService.ApplyHooksToCommands(`${command}\nsleep 2m`, Orchestrator.buildParameters)}`,
],
workingDir: `${workingDirectory}`,
resources: {
requests: (() => {
// Use smaller resource requests for lightweight hook containers
// Hook containers typically use utility images like aws-cli, rclone, etc.
const lightweightImages = ['amazon/aws-cli', 'rclone/rclone', 'steamcmd/steamcmd', 'ubuntu'];
const isLightweightContainer = lightweightImages.some((lightImage) => image.includes(lightImage));
if (isLightweightContainer && process.env['orchestratorTests'] === 'true') {
// For test environments, use minimal resources for hook containers
return {
memory: '128Mi',
cpu: '100m', // 0.1 CPU
};
}
// For main build containers, use the configured resources
const memoryMB = Number.parseInt(buildParameters.containerMemory);
const cpuMB = Number.parseInt(buildParameters.containerCpu);
return {
memory: !Number.isNaN(memoryMB) && memoryMB > 0 ? `${memoryMB / 1024}G` : '750M',
cpu: !Number.isNaN(cpuMB) && cpuMB > 0 ? `${cpuMB / 1024}` : '1',
};
})(),
},
env: [
...adjustedEnvironment.map((x) => {
const environmentVariable = new V1EnvVar();
environmentVariable.name = x.name;
environmentVariable.value = x.value;
return environmentVariable;
}),
...secrets.map((x) => {
const secret = new V1EnvVarSource();
secret.secretKeyRef = new V1SecretKeySelector();
secret.secretKeyRef.key = x.ParameterKey;
secret.secretKeyRef.name = secretName;
const environmentVariable = new V1EnvVar();
environmentVariable.name = x.EnvironmentVariable;
environmentVariable.valueFrom = secret;
return environmentVariable;
}),
{ name: 'LOG_SERVICE_IP', value: ip },
],
volumeMounts: [
{
name: 'build-mount',
mountPath: `${mountdir}`,
},
],
lifecycle: {
preStop: {
exec: {
command: [
'/bin/sh',
'-c',
'sleep 60; cd /data/builder/action/steps && chmod +x /steps/return_license.sh 2>/dev/null || true; /steps/return_license.sh 2>/dev/null || true',
],
},
},
},
},
],
restartPolicy: 'Never',
// Add tolerations for CI/test environments to allow scheduling even with disk pressure
// This is acceptable for CI where we aggressively clean up disk space
tolerations: [
{
key: 'node.kubernetes.io/disk-pressure',
operator: 'Exists',
effect: 'NoSchedule',
},
],
},
},
};
if (process.env['ORCHESTRATOR_MINIKUBE']) {
job.spec.template.spec.volumes[0] = {
name: 'build-mount',
hostPath: {
path: `/data`,
type: `Directory`,
},
};
}
// Set ephemeral-storage request to a reasonable value to prevent evictions
// For tests, don't set a request (or use minimal 128Mi) since k3d nodes have very limited disk space
// Kubernetes will use whatever is available without a request, which is better for constrained environments
// For production, use 2Gi to allow for larger builds
// The node needs some free space headroom, so requesting too much causes evictions
// With node at 96% usage and only ~2.7GB free, we can't request much without triggering evictions
if (process.env['orchestratorTests'] !== 'true') {
// Only set ephemeral-storage request for production builds
job.spec.template.spec.containers[0].resources.requests[`ephemeral-storage`] = '2Gi';
}
// For tests, don't set ephemeral-storage request - let Kubernetes use available space
return job;
}
}
export default KubernetesJobSpecFactory;
@@ -0,0 +1,194 @@
import OrchestratorLogger from '../../services/core/orchestrator-logger';
import { CoreV1Api } from '@kubernetes/client-node';
class KubernetesPods {
public static async IsPodRunning(podName: string, namespace: string, kubeClient: CoreV1Api) {
const pods = (await kubeClient.listNamespacedPod(namespace)).body.items.filter((x) => podName === x.metadata?.name);
const running = pods.length > 0 && (pods[0].status?.phase === `Running` || pods[0].status?.phase === `Pending`);
const phase = pods[0]?.status?.phase || 'undefined status';
OrchestratorLogger.log(`Getting pod status: ${phase}`);
if (phase === `Failed`) {
const pod = pods[0];
const containerStatuses = pod.status?.containerStatuses || [];
const conditions = pod.status?.conditions || [];
const events = (await kubeClient.listNamespacedEvent(namespace)).body.items
.filter((x) => x.involvedObject?.name === podName)
.map((x) => ({
message: x.message || '',
reason: x.reason || '',
type: x.type || '',
}));
const errorDetails: string[] = [];
errorDetails.push(`Pod: ${podName}`, `Phase: ${phase}`);
if (conditions.length > 0) {
errorDetails.push(
`Conditions: ${JSON.stringify(
conditions.map((c) => ({ type: c.type, status: c.status, reason: c.reason, message: c.message })),
undefined,
2,
)}`,
);
}
let containerExitCode: number | undefined;
let containerSucceeded = false;
if (containerStatuses.length > 0) {
for (const [index, cs] of containerStatuses.entries()) {
if (cs.state?.waiting) {
errorDetails.push(
`Container ${index} (${cs.name}) waiting: ${cs.state.waiting.reason} - ${cs.state.waiting.message || ''}`,
);
}
if (cs.state?.terminated) {
const exitCode = cs.state.terminated.exitCode;
containerExitCode = exitCode;
if (exitCode === 0) {
containerSucceeded = true;
}
errorDetails.push(
`Container ${index} (${cs.name}) terminated: ${cs.state.terminated.reason} - ${
cs.state.terminated.message || ''
} (exit code: ${exitCode})`,
);
}
}
}
if (events.length > 0) {
errorDetails.push(`Recent events: ${JSON.stringify(events.slice(-5), undefined, 2)}`);
}
// Check if only PreStopHook failed but container succeeded
const hasPreStopHookFailure = events.some((event) => event.reason === 'FailedPreStopHook');
const wasKilled = events.some((event) => event.reason === 'Killing');
const hasExceededGracePeriod = events.some((event) => event.reason === 'ExceededGracePeriod');
// If container succeeded (exit code 0), PreStopHook failure is non-critical
// Also check if pod was killed but container might have succeeded
if (containerSucceeded && containerExitCode === 0) {
// Container succeeded - PreStopHook failure is non-critical
if (hasPreStopHookFailure) {
OrchestratorLogger.logWarning(
`Pod ${podName} marked as Failed due to PreStopHook failure, but container exited successfully (exit code 0). This is non-fatal.`,
);
} else {
OrchestratorLogger.log(
`Pod ${podName} container succeeded (exit code 0), but pod phase is Failed. Checking details...`,
);
}
OrchestratorLogger.log(`Pod details: ${errorDetails.join('\n')}`);
// Don't throw error - container succeeded, PreStopHook failure is non-critical
return false; // Pod is not running, but we don't treat it as a failure
}
// If pod was killed and we have PreStopHook failure, wait for container status
// The container might have succeeded but status hasn't been updated yet
if (wasKilled && hasPreStopHookFailure && (containerExitCode === undefined || !containerSucceeded)) {
OrchestratorLogger.log(
`Pod ${podName} was killed with PreStopHook failure. Waiting for container status to determine if container succeeded...`,
);
// Wait a bit for container status to become available (up to 30 seconds)
for (let index = 0; index < 6; index++) {
await new Promise((resolve) => setTimeout(resolve, 5000));
try {
const updatedPod = (await kubeClient.listNamespacedPod(namespace)).body.items.find(
(x) => podName === x.metadata?.name,
);
if (updatedPod?.status?.containerStatuses && updatedPod.status.containerStatuses.length > 0) {
const updatedContainerStatus = updatedPod.status.containerStatuses[0];
if (updatedContainerStatus.state?.terminated) {
const updatedExitCode = updatedContainerStatus.state.terminated.exitCode;
if (updatedExitCode === 0) {
OrchestratorLogger.logWarning(
`Pod ${podName} container succeeded (exit code 0) after waiting. PreStopHook failure is non-fatal.`,
);
return false; // Pod is not running, but container succeeded
} else {
OrchestratorLogger.log(
`Pod ${podName} container failed with exit code ${updatedExitCode} after waiting.`,
);
errorDetails.push(`Container terminated after wait: exit code ${updatedExitCode}`);
containerExitCode = updatedExitCode;
containerSucceeded = false;
break;
}
}
}
} catch (waitError) {
OrchestratorLogger.log(`Error while waiting for container status: ${waitError}`);
}
}
// If we still don't have container status after waiting, but only PreStopHook failed,
// be lenient - the container might have succeeded but status wasn't updated
if (containerExitCode === undefined && hasPreStopHookFailure && !hasExceededGracePeriod) {
OrchestratorLogger.logWarning(
`Pod ${podName} container status not available after waiting, but only PreStopHook failed (no ExceededGracePeriod). Assuming container may have succeeded.`,
);
return false; // Be lenient - PreStopHook failure alone is not fatal
}
OrchestratorLogger.log(
`Container status check completed. Exit code: ${containerExitCode}, PreStopHook failure: ${hasPreStopHookFailure}`,
);
}
// If we only have PreStopHook failure and no actual container failure, be lenient
if (hasPreStopHookFailure && !hasExceededGracePeriod && containerExitCode === undefined) {
OrchestratorLogger.logWarning(
`Pod ${podName} has PreStopHook failure but no container failure detected. Treating as non-fatal.`,
);
return false; // PreStopHook failure alone is not fatal if container status is unclear
}
// Check if pod was evicted due to disk pressure - this is an infrastructure issue
const wasEvicted = errorDetails.some(
(detail) => detail.toLowerCase().includes('evicted') || detail.toLowerCase().includes('diskpressure'),
);
if (wasEvicted) {
const evictionMessage = `Pod ${podName} was evicted due to disk pressure. This is a test infrastructure issue - the cluster doesn't have enough disk space.`;
OrchestratorLogger.logWarning(evictionMessage);
OrchestratorLogger.log(`Pod details: ${errorDetails.join('\n')}`);
throw new Error(
`${evictionMessage}\nThis indicates the test environment needs more disk space or better cleanup.\n${errorDetails.join(
'\n',
)}`,
);
}
// Exit code 137 (128 + 9) means SIGKILL - container was killed by system (often OOM)
// If this happened with PreStopHook failure, it might be a resource issue, not a real failure
// Be lenient if we only have PreStopHook/ExceededGracePeriod issues
if (containerExitCode === 137 && (hasPreStopHookFailure || hasExceededGracePeriod)) {
OrchestratorLogger.logWarning(
`Pod ${podName} was killed (exit code 137 - likely OOM or resource limit) with PreStopHook/grace period issues. This may be a resource constraint issue rather than a build failure.`,
);
// Still log the details but don't fail the test - the build might have succeeded before being killed
OrchestratorLogger.log(`Pod details: ${errorDetails.join('\n')}`);
return false; // Don't treat system kills as test failures if only PreStopHook issues
}
const errorMessage = `K8s pod failed\n${errorDetails.join('\n')}`;
OrchestratorLogger.log(errorMessage);
throw new Error(errorMessage);
}
return running;
}
public static async GetPodStatus(podName: string, namespace: string, kubeClient: CoreV1Api) {
const pods = (await kubeClient.listNamespacedPod(namespace)).body.items.find((x) => podName === x.metadata?.name);
const phase = pods?.status?.phase || 'undefined status';
return phase;
}
}
export default KubernetesPods;
@@ -0,0 +1,53 @@
import { RbacAuthorizationV1Api } from '@kubernetes/client-node';
class KubernetesRole {
static async createRole(serviceAccountName: string, namespace: string, rbac: RbacAuthorizationV1Api) {
// create admin kubernetes role and role binding
const roleBinding = {
apiVersion: 'rbac.authorization.k8s.io/v1',
kind: 'RoleBinding',
metadata: {
name: `${serviceAccountName}-admin`,
namespace,
},
subjects: [
{
kind: 'ServiceAccount',
name: serviceAccountName,
namespace,
},
],
roleRef: {
apiGroup: 'rbac.authorization.k8s.io',
kind: 'Role',
name: `${serviceAccountName}-admin`,
},
};
const role = {
apiVersion: 'rbac.authorization.k8s.io/v1',
kind: 'Role',
metadata: {
name: `${serviceAccountName}-admin`,
namespace,
},
rules: [
{
apiGroups: ['*'],
resources: ['*'],
verbs: ['*'],
},
],
};
const roleBindingResponse = await rbac.createNamespacedRoleBinding(namespace, roleBinding);
const roleResponse = await rbac.createNamespacedRole(namespace, role);
return { roleBindingResponse, roleResponse };
}
public static async deleteRole(serviceAccountName: string, namespace: string, rbac: RbacAuthorizationV1Api) {
await rbac.deleteNamespacedRoleBinding(`${serviceAccountName}-admin`, namespace);
await rbac.deleteNamespacedRole(`${serviceAccountName}-admin`, namespace);
}
}
export { KubernetesRole };
@@ -0,0 +1,45 @@
import { CoreV1Api } from '@kubernetes/client-node';
import OrchestratorSecret from '../../options/orchestrator-secret';
import * as k8s from '@kubernetes/client-node';
import OrchestratorLogger from '../../services/core/orchestrator-logger';
import * as base64 from 'base-64';
class KubernetesSecret {
static async createSecret(
secrets: OrchestratorSecret[],
secretName: string,
namespace: string,
kubeClient: CoreV1Api,
) {
try {
const secret = new k8s.V1Secret();
secret.apiVersion = 'v1';
secret.kind = 'Secret';
secret.type = 'Opaque';
secret.metadata = {
name: secretName,
};
secret.data = {};
for (const buildSecret of secrets) {
secret.data[buildSecret.ParameterKey] = base64.encode(buildSecret.ParameterValue);
}
OrchestratorLogger.log(`Creating secret: ${secretName}`);
const existingSecrets = await kubeClient.listNamespacedSecret(namespace);
const mappedSecrets = existingSecrets.body.items.map((x) => {
return x.metadata?.name || `no name`;
});
OrchestratorLogger.log(
`ExistsAlready: ${mappedSecrets.includes(secretName)} SecretsCount: ${mappedSecrets.length}`,
);
await new Promise((promise) => setTimeout(promise, 15000));
await kubeClient.createNamespacedSecret(namespace, secret);
OrchestratorLogger.log('Created secret');
} catch (error) {
OrchestratorLogger.log(`Created secret failed ${error}`);
throw new Error(`Failed to create kubernetes secret`);
}
}
}
export default KubernetesSecret;
@@ -0,0 +1,18 @@
import { CoreV1Api } from '@kubernetes/client-node';
import * as k8s from '@kubernetes/client-node';
class KubernetesServiceAccount {
static async createServiceAccount(serviceAccountName: string, namespace: string, kubeClient: CoreV1Api) {
const serviceAccount = new k8s.V1ServiceAccount();
serviceAccount.apiVersion = 'v1';
serviceAccount.kind = 'ServiceAccount';
serviceAccount.metadata = {
name: serviceAccountName,
};
serviceAccount.automountServiceAccountToken = true;
return kubeClient.createNamespacedServiceAccount(namespace, serviceAccount);
}
}
export default KubernetesServiceAccount;
@@ -0,0 +1,276 @@
import { waitUntil } from 'async-wait-until';
import * as core from '@actions/core';
import * as k8s from '@kubernetes/client-node';
import BuildParameters from '../../../build-parameters';
import OrchestratorLogger from '../../services/core/orchestrator-logger';
import { IncomingMessage } from 'node:http';
import GitHub from '../../../github';
class KubernetesStorage {
public static async createPersistentVolumeClaim(
buildParameters: BuildParameters,
pvcName: string,
kubeClient: k8s.CoreV1Api,
namespace: string,
) {
if (buildParameters.kubeVolume !== ``) {
OrchestratorLogger.log(`Kube Volume was input was set ${buildParameters.kubeVolume} overriding ${pvcName}`);
pvcName = buildParameters.kubeVolume;
return;
}
const allPvc = (await kubeClient.listNamespacedPersistentVolumeClaim(namespace)).body.items;
const pvcList = allPvc.map((x) => x.metadata?.name);
OrchestratorLogger.log(`Current PVCs in namespace ${namespace}`);
OrchestratorLogger.log(JSON.stringify(pvcList, undefined, 4));
if (pvcList.includes(pvcName)) {
OrchestratorLogger.log(`pvc ${pvcName} already exists`);
if (GitHub.githubInputEnabled) {
core.setOutput('volume', pvcName);
}
return;
}
OrchestratorLogger.log(`Creating PVC ${pvcName} (does not exist)`);
const result = await KubernetesStorage.createPVC(pvcName, buildParameters, kubeClient, namespace);
await KubernetesStorage.handleResult(result, kubeClient, namespace, pvcName);
}
public static async getPVCPhase(kubeClient: k8s.CoreV1Api, name: string, namespace: string) {
try {
return (await kubeClient.readNamespacedPersistentVolumeClaim(name, namespace)).body.status?.phase;
} catch (error) {
core.error('Failed to get PVC phase');
core.error(JSON.stringify(error, undefined, 4));
throw error;
}
}
public static async watchUntilPVCNotPending(kubeClient: k8s.CoreV1Api, name: string, namespace: string) {
let checkCount = 0;
try {
OrchestratorLogger.log(`watch Until PVC Not Pending ${name} ${namespace}`);
// Check if storage class uses WaitForFirstConsumer binding mode
// If so, skip waiting - PVC will bind when pod is created
let shouldSkipWait = false;
try {
const pvcBody = (await kubeClient.readNamespacedPersistentVolumeClaim(name, namespace)).body;
const storageClassName = pvcBody.spec?.storageClassName;
if (storageClassName) {
const kubeConfig = new k8s.KubeConfig();
kubeConfig.loadFromDefault();
const storageV1Api = kubeConfig.makeApiClient(k8s.StorageV1Api);
try {
const sc = await storageV1Api.readStorageClass(storageClassName);
const volumeBindingMode = sc.body.volumeBindingMode;
if (volumeBindingMode === 'WaitForFirstConsumer') {
OrchestratorLogger.log(
`StorageClass "${storageClassName}" uses WaitForFirstConsumer binding mode. PVC will bind when pod is created. Skipping wait.`,
);
shouldSkipWait = true;
}
} catch (scError) {
// If we can't check the storage class, proceed with normal wait
OrchestratorLogger.log(
`Could not check storage class binding mode: ${scError}. Proceeding with normal wait.`,
);
}
}
} catch (pvcReadError) {
// If we can't read PVC, proceed with normal wait
OrchestratorLogger.log(
`Could not read PVC to check storage class: ${pvcReadError}. Proceeding with normal wait.`,
);
}
if (shouldSkipWait) {
OrchestratorLogger.log(`Skipping PVC wait - will bind when pod is created`);
return;
}
const initialPhase = await this.getPVCPhase(kubeClient, name, namespace);
OrchestratorLogger.log(`Initial PVC phase: ${initialPhase}`);
// Wait until PVC is NOT Pending (i.e., Bound or Available)
await waitUntil(
async () => {
checkCount++;
const phase = await this.getPVCPhase(kubeClient, name, namespace);
// Log progress every 4 checks (every ~60 seconds)
if (checkCount % 4 === 0) {
OrchestratorLogger.log(`PVC ${name} still ${phase} (check ${checkCount})`);
// Fetch and log PVC events for diagnostics
try {
const events = await kubeClient.listNamespacedEvent(namespace);
const pvcEvents = events.body.items
.filter((x) => x.involvedObject?.kind === 'PersistentVolumeClaim' && x.involvedObject?.name === name)
.map((x) => ({
message: x.message || '',
reason: x.reason || '',
type: x.type || '',
count: x.count || 0,
}))
.slice(-5); // Get last 5 events
if (pvcEvents.length > 0) {
OrchestratorLogger.log(`PVC Events: ${JSON.stringify(pvcEvents, undefined, 2)}`);
// Check if event indicates WaitForFirstConsumer
const waitForConsumerEvent = pvcEvents.find(
(event) =>
event.reason === 'WaitForFirstConsumer' || event.message?.includes('waiting for first consumer'),
);
if (waitForConsumerEvent) {
OrchestratorLogger.log(
`PVC is waiting for first consumer. This is normal for WaitForFirstConsumer storage classes. Proceeding without waiting.`,
);
return true; // Exit wait loop - PVC will bind when pod is created
}
}
} catch {
// Ignore event fetch errors
}
}
return phase !== 'Pending';
},
{
timeout: 750000,
intervalBetweenAttempts: 15000,
},
);
const finalPhase = await this.getPVCPhase(kubeClient, name, namespace);
OrchestratorLogger.log(`PVC phase after wait: ${finalPhase}`);
if (finalPhase === 'Pending') {
throw new Error(`PVC ${name} is still Pending after timeout`);
}
} catch (error: any) {
core.error('Failed to watch PVC');
core.error(error.toString());
try {
const pvcBody = (await kubeClient.readNamespacedPersistentVolumeClaim(name, namespace)).body;
// Fetch PVC events for detailed diagnostics
let pvcEvents: any[] = [];
try {
const events = await kubeClient.listNamespacedEvent(namespace);
pvcEvents = events.body.items
.filter((x) => x.involvedObject?.kind === 'PersistentVolumeClaim' && x.involvedObject?.name === name)
.map((x) => ({
message: x.message || '',
reason: x.reason || '',
type: x.type || '',
count: x.count || 0,
}));
} catch {
// Ignore event fetch errors
}
// Check if storage class exists
let storageClassInfo = '';
try {
const storageClassName = pvcBody.spec?.storageClassName;
if (storageClassName) {
// Create StorageV1Api from default config
const kubeConfig = new k8s.KubeConfig();
kubeConfig.loadFromDefault();
const storageV1Api = kubeConfig.makeApiClient(k8s.StorageV1Api);
try {
const sc = await storageV1Api.readStorageClass(storageClassName);
storageClassInfo = `StorageClass "${storageClassName}" exists. Provisioner: ${
sc.body.provisioner || 'unknown'
}`;
} catch (scError: any) {
storageClassInfo =
scError.statusCode === 404
? `StorageClass "${storageClassName}" does NOT exist! This is likely why the PVC is stuck in Pending.`
: `Failed to check StorageClass "${storageClassName}": ${scError.message || scError}`;
}
}
} catch (scCheckError) {
// Ignore storage class check errors - not critical for diagnostics
storageClassInfo = `Could not check storage class: ${scCheckError}`;
}
core.error(
`PVC Body: ${JSON.stringify(
{
phase: pvcBody.status?.phase,
conditions: pvcBody.status?.conditions,
accessModes: pvcBody.spec?.accessModes,
storageClassName: pvcBody.spec?.storageClassName,
storageRequest: pvcBody.spec?.resources?.requests?.storage,
},
undefined,
4,
)}`,
);
if (storageClassInfo) {
core.error(storageClassInfo);
}
if (pvcEvents.length > 0) {
core.error(`PVC Events: ${JSON.stringify(pvcEvents, undefined, 2)}`);
} else {
core.error('No PVC events found - this may indicate the storage provisioner is not responding');
}
} catch {
// Ignore PVC read errors
}
throw error;
}
}
private static async createPVC(
pvcName: string,
buildParameters: BuildParameters,
kubeClient: k8s.CoreV1Api,
namespace: string,
) {
const pvc = new k8s.V1PersistentVolumeClaim();
pvc.apiVersion = 'v1';
pvc.kind = 'PersistentVolumeClaim';
pvc.metadata = {
name: pvcName,
};
pvc.spec = {
accessModes: ['ReadWriteOnce'],
storageClassName: buildParameters.kubeStorageClass === '' ? 'standard' : buildParameters.kubeStorageClass,
resources: {
requests: {
storage: buildParameters.kubeVolumeSize,
},
},
};
const result = await kubeClient.createNamespacedPersistentVolumeClaim(namespace, pvc);
return result;
}
private static async handleResult(
result: { response: IncomingMessage; body: k8s.V1PersistentVolumeClaim },
kubeClient: k8s.CoreV1Api,
namespace: string,
pvcName: string,
) {
const name = result.body.metadata?.name || '';
OrchestratorLogger.log(`PVC ${name} created`);
await this.watchUntilPVCNotPending(kubeClient, name, namespace);
OrchestratorLogger.log(`PVC ${name} is ready and not pending`);
core.setOutput('volume', pvcName);
}
}
export default KubernetesStorage;
@@ -0,0 +1,763 @@
import { CoreV1Api, KubeConfig } from '@kubernetes/client-node';
import OrchestratorLogger from '../../services/core/orchestrator-logger';
import { waitUntil } from 'async-wait-until';
import { OrchestratorSystem } from '../../services/core/orchestrator-system';
import Orchestrator from '../../orchestrator';
import KubernetesPods from './kubernetes-pods';
import { FollowLogStreamService } from '../../services/core/follow-log-stream-service';
class KubernetesTaskRunner {
static readonly maxRetry: number = 3;
static lastReceivedMessage: string = ``;
static async runTask(
kubeConfig: KubeConfig,
kubeClient: CoreV1Api,
jobName: string,
podName: string,
containerName: string,
namespace: string,
) {
let output = '';
let shouldReadLogs = true;
let shouldCleanup = true;
let retriesAfterFinish = 0;
let kubectlLogsFailedCount = 0;
const maxKubectlLogsFailures = 3;
// eslint-disable-next-line no-constant-condition
while (true) {
await new Promise((resolve) => setTimeout(resolve, 3000));
OrchestratorLogger.log(
`Streaming logs from pod: ${podName} container: ${containerName} namespace: ${namespace} ${Orchestrator.buildParameters.kubeVolumeSize}/${Orchestrator.buildParameters.containerCpu}/${Orchestrator.buildParameters.containerMemory}`,
);
const isRunning = await KubernetesPods.IsPodRunning(podName, namespace, kubeClient);
const callback = (outputChunk: string) => {
// Filter out kubectl error messages about being unable to retrieve container logs
// These errors pollute the output and don't contain useful information
const lowerChunk = outputChunk.toLowerCase();
if (lowerChunk.includes('unable to retrieve container logs')) {
OrchestratorLogger.log(`Filtered kubectl error: ${outputChunk.trim()}`);
return;
}
output += outputChunk;
// split output chunk and handle per line
for (const chunk of outputChunk.split(`\n`)) {
// Skip empty chunks and kubectl error messages (case-insensitive)
const lowerCaseChunk = chunk.toLowerCase();
if (chunk.trim() && !lowerCaseChunk.includes('unable to retrieve container logs')) {
({ shouldReadLogs, shouldCleanup, output } = FollowLogStreamService.handleIteration(
chunk,
shouldReadLogs,
shouldCleanup,
output,
));
}
}
};
try {
// Always specify container name explicitly to avoid containerd:// errors
// Use -f for running pods, --previous for terminated pods
await OrchestratorSystem.Run(
`kubectl logs ${podName} -c ${containerName} -n ${namespace}${isRunning ? ' -f' : ' --previous'}`,
false,
true,
callback,
);
// Reset failure count on success
kubectlLogsFailedCount = 0;
} catch (error: any) {
kubectlLogsFailedCount++;
await new Promise((resolve) => setTimeout(resolve, 3000));
const continueStreaming = await KubernetesPods.IsPodRunning(podName, namespace, kubeClient);
OrchestratorLogger.log(`K8s logging error ${error} ${continueStreaming}`);
// Filter out kubectl error messages from the error output
const errorMessage = error?.message || error?.toString() || '';
const isKubectlLogsError =
errorMessage.includes('unable to retrieve container logs for containerd://') ||
errorMessage.toLowerCase().includes('unable to retrieve container logs');
if (isKubectlLogsError) {
OrchestratorLogger.log(
`Kubectl unable to retrieve logs, attempt ${kubectlLogsFailedCount}/${maxKubectlLogsFailures}`,
);
// If kubectl logs has failed multiple times, try reading the log file directly from the pod
// This works even if the pod is terminated, as long as it hasn't been deleted
if (kubectlLogsFailedCount >= maxKubectlLogsFailures && !isRunning && !continueStreaming) {
OrchestratorLogger.log(`Attempting to read log file directly from pod as fallback...`);
try {
// Try to read the log file from the pod
// Use kubectl exec for running pods, or try to access via PVC if pod is terminated
let logFileContent = '';
if (isRunning) {
// Pod is still running, try exec
logFileContent = await OrchestratorSystem.Run(
`kubectl exec ${podName} -c ${containerName} -n ${namespace} -- cat /home/job-log.txt 2>/dev/null || echo ""`,
true,
true,
);
} else {
// Pod is terminated, try to create a temporary pod to read from the PVC
// First, check if we can still access the pod's filesystem
OrchestratorLogger.log(`Pod is terminated, attempting to read log file via temporary pod...`);
// For terminated pods, we might not be able to exec, so we'll skip this fallback
// and rely on the log file being written to the PVC (if mounted)
OrchestratorLogger.logWarning(`Cannot read log file from terminated pod via exec`);
}
if (logFileContent && logFileContent.trim()) {
OrchestratorLogger.log(`Successfully read log file from pod (${logFileContent.length} chars)`);
// Process the log file content line by line
for (const line of logFileContent.split(`\n`)) {
const lowerLine = line.toLowerCase();
if (line.trim() && !lowerLine.includes('unable to retrieve container logs')) {
({ shouldReadLogs, shouldCleanup, output } = FollowLogStreamService.handleIteration(
line,
shouldReadLogs,
shouldCleanup,
output,
));
}
}
// Check if we got the end of transmission marker
if (FollowLogStreamService.DidReceiveEndOfTransmission) {
OrchestratorLogger.log('end of log stream (from log file)');
break;
}
} else {
OrchestratorLogger.logWarning(`Log file read returned empty content, continuing with available logs`);
// If we can't read the log file, break out of the loop to return whatever logs we have
// This prevents infinite retries when kubectl logs consistently fails
break;
}
} catch (execError: any) {
OrchestratorLogger.logWarning(`Failed to read log file from pod: ${execError}`);
// If we've exhausted all options, break to return whatever logs we have
break;
}
}
}
// If pod is not running and we tried --previous but it failed, try without --previous
if (!isRunning && !continueStreaming && error?.message?.includes('previous terminated container')) {
OrchestratorLogger.log(`Previous container not found, trying current container logs...`);
try {
await OrchestratorSystem.Run(
`kubectl logs ${podName} -c ${containerName} -n ${namespace}`,
false,
true,
callback,
);
// If we successfully got logs, check for end of transmission
if (FollowLogStreamService.DidReceiveEndOfTransmission) {
OrchestratorLogger.log('end of log stream');
break;
}
// If we got logs but no end marker, continue trying (might be more logs)
if (retriesAfterFinish < KubernetesTaskRunner.maxRetry) {
retriesAfterFinish++;
continue;
}
// If we've exhausted retries, break
break;
} catch (fallbackError: any) {
OrchestratorLogger.log(`Fallback log fetch also failed: ${fallbackError}`);
// If both fail, continue retrying if we haven't exhausted retries
if (retriesAfterFinish < KubernetesTaskRunner.maxRetry) {
retriesAfterFinish++;
continue;
}
// Only break if we've exhausted all retries
OrchestratorLogger.logWarning(
`Could not fetch any container logs after ${KubernetesTaskRunner.maxRetry} retries`,
);
break;
}
}
if (continueStreaming) {
continue;
}
if (retriesAfterFinish < KubernetesTaskRunner.maxRetry) {
retriesAfterFinish++;
continue;
}
// If we've exhausted retries and it's not a previous container issue, throw
if (!error?.message?.includes('previous terminated container')) {
throw error;
}
// For previous container errors, we've already tried fallback, so just break
OrchestratorLogger.logWarning(
`Could not fetch previous container logs after retries, but continuing with available logs`,
);
break;
}
if (FollowLogStreamService.DidReceiveEndOfTransmission) {
OrchestratorLogger.log('end of log stream');
break;
}
}
// After kubectl logs loop ends, read log file as fallback to capture any messages
// written after kubectl stopped reading (e.g., "Collected Logs" from post-build)
// This ensures all log messages are included in BuildResults for test assertions
// If output is empty, we need to be more aggressive about getting logs
const needsFallback = output.trim().length === 0;
const missingCollectedLogs = !output.includes('Collected Logs');
if (needsFallback) {
OrchestratorLogger.log('Output is empty, attempting aggressive log collection fallback...');
// Give the pod a moment to finish writing logs before we try to read them
await new Promise((resolve) => setTimeout(resolve, 5000));
}
// Always try fallback if output is empty, if pod is terminated, or if "Collected Logs" is missing
// The "Collected Logs" check ensures we try to get post-build messages even if we have some output
try {
const isPodStillRunning = await KubernetesPods.IsPodRunning(podName, namespace, kubeClient);
const shouldTryFallback = !isPodStillRunning || needsFallback || missingCollectedLogs;
if (shouldTryFallback) {
const reason = needsFallback
? 'output is empty'
: missingCollectedLogs
? 'Collected Logs missing from output'
: 'pod is terminated';
OrchestratorLogger.log(
`Pod is ${isPodStillRunning ? 'running' : 'terminated'} and ${reason}, reading log file as fallback...`,
);
try {
// Try to read the log file from the pod
// For killed pods (OOM), kubectl exec might not work, so we try multiple approaches
// First try --previous flag for terminated containers, then try without it
let logFileContent = '';
// Try multiple approaches to get the log file
// Order matters: try terminated container first, then current, then PVC, then kubectl logs as last resort
// For K8s, the PVC is mounted at /data, so try reading from there too
const attempts = [
// For terminated pods, try --previous first
`kubectl exec ${podName} -c ${containerName} -n ${namespace} --previous -- cat /home/job-log.txt 2>/dev/null || echo ""`,
// Try current container
`kubectl exec ${podName} -c ${containerName} -n ${namespace} -- cat /home/job-log.txt 2>/dev/null || echo ""`,
// Try reading from PVC (/data) in case log was copied there
`kubectl exec ${podName} -c ${containerName} -n ${namespace} --previous -- cat /data/job-log.txt 2>/dev/null || echo ""`,
`kubectl exec ${podName} -c ${containerName} -n ${namespace} -- cat /data/job-log.txt 2>/dev/null || echo ""`,
// Try kubectl logs as fallback (might capture stdout even if exec fails)
`kubectl logs ${podName} -c ${containerName} -n ${namespace} --previous 2>/dev/null || echo ""`,
`kubectl logs ${podName} -c ${containerName} -n ${namespace} 2>/dev/null || echo ""`,
];
for (const attempt of attempts) {
// If we already have content with "Collected Logs", no need to try more
if (logFileContent && logFileContent.trim() && logFileContent.includes('Collected Logs')) {
OrchestratorLogger.log('Found "Collected Logs" in fallback content, stopping attempts.');
break;
}
try {
OrchestratorLogger.log(`Trying fallback method: ${attempt.slice(0, 80)}...`);
const result = await OrchestratorSystem.Run(attempt, true, true);
if (result && result.trim()) {
// Prefer content that has "Collected Logs" over content that doesn't
if (!logFileContent || !logFileContent.includes('Collected Logs')) {
logFileContent = result;
OrchestratorLogger.log(
`Successfully read logs using fallback method (${logFileContent.length} chars): ${attempt.slice(
0,
50,
)}...`,
);
// If this content has "Collected Logs", we're done
if (logFileContent.includes('Collected Logs')) {
OrchestratorLogger.log('Fallback method successfully captured "Collected Logs".');
break;
}
} else {
OrchestratorLogger.log(`Skipping this result - already have content with "Collected Logs".`);
}
} else {
OrchestratorLogger.log(`Fallback method returned empty result: ${attempt.slice(0, 50)}...`);
}
} catch (attemptError: any) {
OrchestratorLogger.log(
`Fallback method failed: ${attempt.slice(0, 50)}... Error: ${attemptError?.message || attemptError}`,
);
// Continue to next attempt
}
}
if (!logFileContent || !logFileContent.trim()) {
OrchestratorLogger.logWarning(
'Could not read log file from pod after all fallback attempts (may be OOM-killed or pod not accessible).',
);
}
if (logFileContent && logFileContent.trim()) {
OrchestratorLogger.log(
`Read log file from pod as fallback (${logFileContent.length} chars) to capture missing messages`,
);
// Get the lines we already have in output to avoid duplicates
const existingLines = new Set(output.split('\n').map((line) => line.trim()));
// Process the log file content line by line and add missing lines
for (const line of logFileContent.split(`\n`)) {
const trimmedLine = line.trim();
const lowerLine = trimmedLine.toLowerCase();
// Skip empty lines, kubectl errors, and lines we already have
if (
trimmedLine &&
!lowerLine.includes('unable to retrieve container logs') &&
!existingLines.has(trimmedLine)
) {
// Process through FollowLogStreamService - it will append to output
// Don't add to output manually since handleIteration does it
({ shouldReadLogs, shouldCleanup, output } = FollowLogStreamService.handleIteration(
trimmedLine,
shouldReadLogs,
shouldCleanup,
output,
));
}
}
}
} catch (logFileError: any) {
OrchestratorLogger.logWarning(
`Could not read log file from pod as fallback: ${logFileError?.message || logFileError}`,
);
// Continue with existing output - this is a best-effort fallback
}
}
// If output is still empty or missing "Collected Logs" after fallback attempts, add a warning message
// This ensures BuildResults is not completely empty, which would cause test failures
if ((needsFallback && output.trim().length === 0) || (!output.includes('Collected Logs') && shouldTryFallback)) {
OrchestratorLogger.logWarning(
'Could not retrieve "Collected Logs" from pod after all attempts. Pod may have been killed before logs were written.',
);
// Add a minimal message so BuildResults is not completely empty
// This helps with debugging and prevents test failures due to empty results
if (output.trim().length === 0) {
output = 'Pod logs unavailable - pod may have been terminated before logs could be collected.\n';
} else if (!output.includes('Collected Logs')) {
// We have some output but missing "Collected Logs" - append the fallback message
output +=
'\nPod logs incomplete - "Collected Logs" marker not found. Pod may have been terminated before post-build completed.\n';
}
}
} catch (fallbackError: any) {
OrchestratorLogger.logWarning(
`Error checking pod status for log file fallback: ${fallbackError?.message || fallbackError}`,
);
// If output is empty and we hit an error, still add a message so BuildResults isn't empty
if (needsFallback && output.trim().length === 0) {
output = `Error retrieving logs: ${fallbackError?.message || fallbackError}\n`;
}
// Continue with existing output - this is a best-effort fallback
}
// Filter out kubectl error messages from the final output
// These errors can be added via stderr even when kubectl fails
// We filter them out so they don't pollute the BuildResults
const lines = output.split('\n');
const filteredLines = lines.filter((line) => !line.toLowerCase().includes('unable to retrieve container logs'));
const filteredOutput = filteredLines.join('\n');
// Log if we filtered out significant content
const originalLineCount = lines.length;
const filteredLineCount = filteredLines.length;
if (originalLineCount > filteredLineCount) {
OrchestratorLogger.log(
`Filtered out ${originalLineCount - filteredLineCount} kubectl error message(s) from output`,
);
}
return filteredOutput;
}
static async watchUntilPodRunning(kubeClient: CoreV1Api, podName: string, namespace: string) {
let waitComplete: boolean = false;
let message = ``;
let lastPhase = '';
let consecutivePendingCount = 0;
OrchestratorLogger.log(`Watching ${podName} ${namespace}`);
try {
await waitUntil(
async () => {
const status = await kubeClient.readNamespacedPodStatus(podName, namespace);
const phase = status?.body.status?.phase || 'Unknown';
const conditions = status?.body.status?.conditions || [];
const containerStatuses = status?.body.status?.containerStatuses || [];
// Log phase changes
if (phase !== lastPhase) {
OrchestratorLogger.log(`Pod ${podName} phase changed: ${lastPhase} -> ${phase}`);
lastPhase = phase;
consecutivePendingCount = 0;
}
// Check for failure conditions that mean the pod will never start (permanent failures)
// Note: We don't treat "Failed" phase as a permanent failure because the pod might have
// completed its work before being killed (OOM), and we should still try to get logs
const permanentFailureReasons = [
'Unschedulable',
'ImagePullBackOff',
'ErrImagePull',
'CreateContainerError',
'CreateContainerConfigError',
];
const hasPermanentFailureCondition = conditions.some((condition: any) =>
permanentFailureReasons.some((reason) => condition.reason?.includes(reason)),
);
const hasPermanentFailureContainerStatus = containerStatuses.some((containerStatus: any) =>
permanentFailureReasons.some((reason) => containerStatus.state?.waiting?.reason?.includes(reason)),
);
// Only treat permanent failures as errors - pods that completed (Failed/Succeeded) should continue
if (hasPermanentFailureCondition || hasPermanentFailureContainerStatus) {
// Get detailed failure information
const failureCondition = conditions.find((condition: any) =>
permanentFailureReasons.some((reason) => condition.reason?.includes(reason)),
);
const failureContainer = containerStatuses.find((containerStatus: any) =>
permanentFailureReasons.some((reason) => containerStatus.state?.waiting?.reason?.includes(reason)),
);
message = `Pod ${podName} failed to start (permanent failure):\nPhase: ${phase}\n`;
if (failureCondition) {
message += `Condition Reason: ${failureCondition.reason}\nCondition Message: ${failureCondition.message}\n`;
}
if (failureContainer) {
message += `Container Reason: ${failureContainer.state?.waiting?.reason}\nContainer Message: ${failureContainer.state?.waiting?.message}\n`;
}
// Log pod events for additional context
try {
const events = await kubeClient.listNamespacedEvent(namespace);
const podEvents = events.body.items
.filter((x) => x.involvedObject?.name === podName)
.map((x) => ({
message: x.message || ``,
reason: x.reason || ``,
type: x.type || ``,
}));
if (podEvents.length > 0) {
message += `\nRecent Events:\n${JSON.stringify(podEvents.slice(-5), undefined, 2)}`;
}
} catch {
// Ignore event fetch errors
}
OrchestratorLogger.logWarning(message);
// For permanent failures, mark as incomplete and store the error message
// We'll throw an error after the wait loop exits
waitComplete = false;
return true; // Return true to exit wait loop
}
// Pod is complete if it's not Pending or Unknown - it might be Running, Succeeded, or Failed
// For Failed/Succeeded pods, we still want to try to get logs, so we mark as complete
waitComplete = phase !== 'Pending' && phase !== 'Unknown';
// If pod completed (Succeeded/Failed), log it but don't throw - we'll try to get logs
if (waitComplete && phase !== 'Running') {
OrchestratorLogger.log(`Pod ${podName} completed with phase: ${phase}. Will attempt to retrieve logs.`);
}
if (phase === 'Pending') {
consecutivePendingCount++;
// Check for scheduling failures in events (faster than waiting for conditions)
try {
const events = await kubeClient.listNamespacedEvent(namespace);
const podEvents = events.body.items.filter((x) => x.involvedObject?.name === podName);
const failedSchedulingEvents = podEvents.filter(
(x) => x.reason === 'FailedScheduling' || x.reason === 'SchedulingGated',
);
if (failedSchedulingEvents.length > 0) {
const schedulingMessage = failedSchedulingEvents
.map((x) => `${x.reason}: ${x.message || ''}`)
.join('; ');
message = `Pod ${podName} cannot be scheduled:\n${schedulingMessage}`;
OrchestratorLogger.logWarning(message);
waitComplete = false;
return true; // Exit wait loop to throw error
}
// Check if pod is actively pulling an image - if so, allow more time
const isPullingImage = podEvents.some(
(x) => x.reason === 'Pulling' || x.reason === 'Pulled' || x.message?.includes('Pulling image'),
);
const hasImagePullError = podEvents.some(
(x) => x.reason === 'Failed' && (x.message?.includes('pull') || x.message?.includes('image')),
);
if (hasImagePullError) {
message = `Pod ${podName} failed to pull image. Check image availability and credentials.`;
OrchestratorLogger.logWarning(message);
waitComplete = false;
return true; // Exit wait loop to throw error
}
// If actively pulling image, reset pending count to allow more time
// Large images (like Unity 3.9GB) can take 3-5 minutes to pull
if (isPullingImage && consecutivePendingCount > 4) {
OrchestratorLogger.log(
`Pod ${podName} is pulling image (check ${consecutivePendingCount}). This may take several minutes for large images.`,
);
// Don't increment consecutivePendingCount if we're actively pulling
consecutivePendingCount = Math.max(4, consecutivePendingCount - 1);
}
} catch {
// Ignore event fetch errors
}
// For tests, allow more time if image is being pulled (large images need 5+ minutes)
// Otherwise fail faster if stuck in Pending (2 minutes = 8 checks at 15s interval)
const isTest = process.env['orchestratorTests'] === 'true';
const isPullingImage =
containerStatuses.some(
(cs: any) => cs.state?.waiting?.reason === 'ImagePull' || cs.state?.waiting?.reason === 'ErrImagePull',
) || conditions.some((c: any) => c.reason?.includes('Pulling'));
// Allow up to 20 minutes for image pulls in tests (80 checks), 2 minutes otherwise
const maxPendingChecks = isTest && isPullingImage ? 80 : isTest ? 8 : 80;
if (consecutivePendingCount >= maxPendingChecks) {
message = `Pod ${podName} stuck in Pending state for too long (${consecutivePendingCount} checks). This indicates a scheduling problem.`;
// Get events for context
try {
const events = await kubeClient.listNamespacedEvent(namespace);
const podEvents = events.body.items
.filter((x) => x.involvedObject?.name === podName)
.slice(-10)
.map((x) => `${x.type}: ${x.reason} - ${x.message}`);
if (podEvents.length > 0) {
message += `\n\nRecent Events:\n${podEvents.join('\n')}`;
}
// Get pod details to check for scheduling issues
try {
const podStatus = await kubeClient.readNamespacedPodStatus(podName, namespace);
const podSpec = podStatus.body.spec;
const podStatusDetails = podStatus.body.status;
// Check container resource requests
if (podSpec?.containers?.[0]?.resources?.requests) {
const requests = podSpec.containers[0].resources.requests;
message += `\n\nContainer Resource Requests:\n CPU: ${requests.cpu || 'not set'}\n Memory: ${
requests.memory || 'not set'
}\n Ephemeral Storage: ${requests['ephemeral-storage'] || 'not set'}`;
}
// Check node selector and tolerations
if (podSpec?.nodeSelector && Object.keys(podSpec.nodeSelector).length > 0) {
message += `\n\nNode Selector: ${JSON.stringify(podSpec.nodeSelector)}`;
}
if (podSpec?.tolerations && podSpec.tolerations.length > 0) {
message += `\n\nTolerations: ${JSON.stringify(podSpec.tolerations)}`;
}
// Check pod conditions for scheduling issues
if (podStatusDetails?.conditions) {
const allConditions = podStatusDetails.conditions.map(
(c: any) =>
`${c.type}: ${c.status}${c.reason ? ` (${c.reason})` : ''}${
c.message ? ` - ${c.message}` : ''
}`,
);
message += `\n\nPod Conditions:\n${allConditions.join('\n')}`;
const unschedulable = podStatusDetails.conditions.find(
(c: any) => c.type === 'PodScheduled' && c.status === 'False',
);
if (unschedulable) {
message += `\n\nScheduling Issue: ${unschedulable.reason || 'Unknown'} - ${
unschedulable.message || 'No message'
}`;
}
// Check if pod is assigned to a node
message += podStatusDetails?.hostIP
? `\n\nPod assigned to node: ${podStatusDetails.hostIP}`
: `\n\nPod not yet assigned to a node (scheduling pending)`;
}
// Check node resources if pod is assigned
if (podStatusDetails?.hostIP) {
try {
const nodes = await kubeClient.listNode();
const hostIP = podStatusDetails.hostIP;
const assignedNode = nodes.body.items.find((n: any) =>
n.status?.addresses?.some((a: any) => a.address === hostIP),
);
if (assignedNode?.status && assignedNode.metadata?.name) {
const allocatable = assignedNode.status.allocatable || {};
message += `\n\nNode Resources (${assignedNode.metadata.name}):\n Allocatable CPU: ${
allocatable.cpu || 'unknown'
}\n Allocatable Memory: ${allocatable.memory || 'unknown'}\n Allocatable Ephemeral Storage: ${
allocatable['ephemeral-storage'] || 'unknown'
}`;
// Check for taints that might prevent scheduling
if (assignedNode.spec?.taints && assignedNode.spec.taints.length > 0) {
const taints = assignedNode.spec.taints
.map((t: any) => `${t.key}=${t.value}:${t.effect}`)
.join(', ');
message += `\n Node Taints: ${taints}`;
}
}
} catch {
// Ignore node check errors
}
}
} catch {
// Ignore pod status fetch errors
}
} catch {
// Ignore event fetch errors
}
OrchestratorLogger.logWarning(message);
waitComplete = false;
return true; // Exit wait loop to throw error
}
// Log diagnostic info every 4 checks (1 minute) if still pending
if (consecutivePendingCount % 4 === 0) {
const pendingMessage = `Pod ${podName} still Pending (check ${consecutivePendingCount}/${maxPendingChecks}). Phase: ${phase}`;
const conditionMessages = conditions
.map((c: any) => `${c.type}: ${c.reason || 'N/A'} - ${c.message || 'N/A'}`)
.join('; ');
OrchestratorLogger.log(`${pendingMessage}. Conditions: ${conditionMessages || 'None'}`);
// Log events periodically to help diagnose
if (consecutivePendingCount % 8 === 0) {
try {
const events = await kubeClient.listNamespacedEvent(namespace);
const podEvents = events.body.items
.filter((x) => x.involvedObject?.name === podName)
.slice(-3)
.map((x) => `${x.type}: ${x.reason} - ${x.message}`)
.join('; ');
if (podEvents) {
OrchestratorLogger.log(`Recent pod events: ${podEvents}`);
}
} catch {
// Ignore event fetch errors
}
}
}
}
message = `Phase:${phase} \n Reason:${conditions[0]?.reason || ''} \n Message:${
conditions[0]?.message || ''
}`;
if (waitComplete || phase !== 'Pending') return true;
return false;
},
{
timeout: process.env['orchestratorTests'] === 'true' ? 300000 : 2000000, // 5 minutes for tests, ~33 minutes for production
intervalBetweenAttempts: 15000, // 15 seconds
},
);
} catch (waitError: any) {
// If waitUntil times out or throws, get final pod status
try {
const finalStatus = await kubeClient.readNamespacedPodStatus(podName, namespace);
const phase = finalStatus?.body.status?.phase || 'Unknown';
const conditions = finalStatus?.body.status?.conditions || [];
message = `Pod ${podName} timed out waiting to start.\nFinal Phase: ${phase}\n`;
message += conditions.map((c: any) => `${c.type}: ${c.reason} - ${c.message}`).join('\n');
// Get events for context
try {
const events = await kubeClient.listNamespacedEvent(namespace);
const podEvents = events.body.items
.filter((x) => x.involvedObject?.name === podName)
.slice(-5)
.map((x) => `${x.type}: ${x.reason} - ${x.message}`);
if (podEvents.length > 0) {
message += `\n\nRecent Events:\n${podEvents.join('\n')}`;
}
} catch {
// Ignore event fetch errors
}
OrchestratorLogger.logWarning(message);
} catch {
message = `Pod ${podName} timed out and could not retrieve final status: ${waitError?.message || waitError}`;
OrchestratorLogger.logWarning(message);
}
throw new Error(`Pod ${podName} failed to start within timeout. ${message}`);
}
// Only throw if we detected a permanent failure condition
// If the pod completed (Failed/Succeeded), we should still try to get logs
if (!waitComplete) {
// Check the final phase to see if it's a permanent failure or just completed
try {
const finalStatus = await kubeClient.readNamespacedPodStatus(podName, namespace);
const finalPhase = finalStatus?.body.status?.phase || 'Unknown';
if (finalPhase === 'Failed' || finalPhase === 'Succeeded') {
OrchestratorLogger.logWarning(
`Pod ${podName} completed with phase ${finalPhase} before reaching Running state. Will attempt to retrieve logs.`,
);
return true; // Allow workflow to continue and try to get logs
}
} catch {
// If we can't check status, fall through to throw error
}
OrchestratorLogger.logWarning(`Pod ${podName} did not reach running state: ${message}`);
throw new Error(`Pod ${podName} did not start successfully: ${message}`);
}
return waitComplete;
}
}
export default KubernetesTaskRunner;