Cloud runner develop - Stabilizes kubernetes provider (#531)

* fixes

* fixes

* fixes

* fixes

* fixes

* check for startup message in workflows

* check for startup message in workflows

* check for startup message in workflows

* check for startup message in workflows

* check for startup message in workflows

* check for startup message in workflows

* Update cloud-runner-ci-pipeline.yml

* Update cloud-runner-ci-pipeline.yml

* no storage class specified

* log file path

* log file path

* log file path

* log file path

* log file path

* log file path

* log file path

* log file path

* updates

* log file path

* latest develop

* log file path

* log file path

* Update package.json

* log file path

* log file path

* log file path

* log file path

* log file path

* log file path

* log file path

* log file path

* log file path

* log file path

* log file path

* log file path

* log file path

* log file path

* stream logs through standard input and new remote client cli command

* stream logs through standard input and new remote client cli command

* stream logs through standard input and new remote client cli command

* stream logs through standard input and new remote client cli command

* stream logs through standard input and new remote client cli command

* stream logs through standard input and new remote client cli command

* stream logs through standard input and new remote client cli command

* stream logs through standard input and new remote client cli command

* stream logs through standard input and new remote client cli command

* stream logs through standard input and new remote client cli command

* stream logs through standard input and new remote client cli command

* stream logs through standard input and new remote client cli command

* stream logs through standard input and new remote client cli command

* stream logs through standard input and new remote client cli command

* stream logs through standard input and new remote client cli command

* update pipeline to use k3s

* version: 'latest'

* fixes

* disable aws pipe for now

* disable aws pipe for now

* disable aws pipe for now

* disable aws pipe for now

* disable aws pipe for now

* disable aws pipe for now

* disable aws pipe for now

* disable aws pipe for now

* disable aws pipe for now

* disable aws pipe for now

* push k8s logs to LOG SERVICE IP

* push k8s logs to LOG SERVICE IP

* push k8s logs to LOG SERVICE IP

* push k8s logs to LOG SERVICE IP

* push k8s logs to LOG SERVICE IP

* push k8s logs to LOG SERVICE IP

* push k8s logs to LOG SERVICE IP

* push k8s logs to LOG SERVICE IP

* tests

* tests

* tests

* tests

* tests

* tests

* tests

* tests

* tests

* tests

* tests

* tests

* tests

* tests

* tests

* tests

* tests

* podname logs for log service

* podname logs for log service

* podname logs for log service

* podname logs for log service

* podname logs for log service

* podname logs for log service

* podname logs for log service

* podname logs for log service

* podname logs for log service

* hashed logs

* hashed logs

* hashed logs

* hashed logs

* hashed logs

* hashed logs

* no wait, just repeat logs

* no wait, just repeat logs

* remove typo - double await

* test fix - kubernetes - name typo in github yaml

* test fix - kubernetes - name typo in github yaml

* check missing log file

* check missing log file

* Push to steam test

* Push to steam test

* Fix path

* k8s reliable log hashing

* k8s reliable log hashing

* k8s reliable log hashing

* hashed logging k8s

* hashed logging k8s

* hashed logging k8s

* hashed logging k8s

* hashed logging k8s

* hashed logging k8s

* Include log chunk when task runner sees log update, clarify if we can pull logs from same line or next line

* Include log chunk when task runner sees log update, clarify if we can pull logs from same line or next line

* Include log chunk when task runner sees log update, clarify if we can pull logs from same line or next line

* Include log chunk when task runner sees log update, clarify if we can pull logs from same line or next line

* Include log chunk when task runner sees log update, clarify if we can pull logs from same line or next line

* Fix exit flow for k8s job

* hash comparison logging for log complete in k8s flow

* Interrupt k8s logs when logs found

* cleanup async parameter

* cleanup async parameter

* cleanup async parameter

* fixes

* fix

* fix

* fix

* fix

* fix

* fix

* fix

* fix

* fix

* fix

* fix

* fix

* fix

* fix

* fix

* fix

* fix

* fix

* fix

* fix

* fix

* fix

* fix

* fix

* fix

* fix

* fix

* fix

* fix

* fix

* fix

* fix

* fix

* fix

* fix

* fix

* fix

* fix

* fix

* fix

* fix

* fix

* fix

* fix

* fix

* fix

* fix

* fix

* fix

* fix

* fix

* fix

* fix

* fix

* fix

* fix

* fix

* fix

* fix

* fix

* fix

* fix

* fix

* fix

* fix

* fix

* fix

* fix

* fix

* fix

* fix

* fix
This commit is contained in:
Frostebite
2024-02-06 23:46:31 +00:00
committed by GitHub
parent 2800d14403
commit e73b48fb38
47 changed files with 1456 additions and 856 deletions
+36 -9
View File
@@ -14,12 +14,17 @@ import { CoreV1Api } from '@kubernetes/client-node';
import CloudRunner from '../../cloud-runner';
import { ProviderResource } from '../provider-resource';
import { ProviderWorkflow } from '../provider-workflow';
import { RemoteClientLogger } from '../../remote-client/remote-client-logger';
import { KubernetesRole } from './kubernetes-role';
import { CloudRunnerSystem } from '../../services/core/cloud-runner-system';
class Kubernetes implements ProviderInterface {
public static Instance: Kubernetes;
public kubeConfig!: k8s.KubeConfig;
public kubeClient!: k8s.CoreV1Api;
public kubeClientApps!: k8s.AppsV1Api;
public kubeClientBatch!: k8s.BatchV1Api;
public rbacAuthorizationV1Api!: k8s.RbacAuthorizationV1Api;
public buildGuid: string = '';
public buildParameters!: BuildParameters;
public pvcName: string = '';
@@ -30,6 +35,7 @@ class Kubernetes implements ProviderInterface {
public containerName: string = '';
public cleanupCronJobName: string = '';
public serviceAccountName: string = '';
public ip: string = '';
// eslint-disable-next-line no-unused-vars
constructor(buildParameters: BuildParameters) {
@@ -37,11 +43,30 @@ class Kubernetes implements ProviderInterface {
this.kubeConfig = new k8s.KubeConfig();
this.kubeConfig.loadFromDefault();
this.kubeClient = this.kubeConfig.makeApiClient(k8s.CoreV1Api);
this.kubeClientApps = this.kubeConfig.makeApiClient(k8s.AppsV1Api);
this.kubeClientBatch = this.kubeConfig.makeApiClient(k8s.BatchV1Api);
this.rbacAuthorizationV1Api = this.kubeConfig.makeApiClient(k8s.RbacAuthorizationV1Api);
this.namespace = 'default';
CloudRunnerLogger.log('Loaded default Kubernetes configuration for this environment');
}
async PushLogUpdate(logs: string) {
// push logs to nginx file server via 'LOG_SERVICE_IP' env var
const ip = process.env[`LOG_SERVICE_IP`];
if (ip === undefined) {
RemoteClientLogger.logWarning(`LOG_SERVICE_IP not set, skipping log push`);
return;
}
const url = `http://${ip}/api/log`;
RemoteClientLogger.log(`Pushing logs to ${url}`);
// logs to base64
logs = Buffer.from(logs).toString('base64');
const response = await CloudRunnerSystem.Run(`curl -X POST -d "${logs}" ${url}`, false, true);
RemoteClientLogger.log(`Pushed logs to ${url} ${response}`);
}
async listResources(): Promise<ProviderResource[]> {
const pods = await this.kubeClient.listNamespacedPod(this.namespace);
const serviceAccounts = await this.kubeClient.listNamespacedServiceAccount(this.namespace);
@@ -115,9 +140,10 @@ class Kubernetes implements ProviderInterface {
CloudRunnerLogger.log('Cloud Runner K8s workflow!');
// Setup
const id = BuildParameters.shouldUseRetainedWorkspaceMode(this.buildParameters)
? CloudRunner.lockedWorkspace
: this.buildParameters.buildGuid;
const id =
BuildParameters && BuildParameters.shouldUseRetainedWorkspaceMode(this.buildParameters)
? CloudRunner.lockedWorkspace
: this.buildParameters.buildGuid;
this.pvcName = `unity-builder-pvc-${id}`;
await KubernetesStorage.createPersistentVolumeClaim(
this.buildParameters,
@@ -137,10 +163,7 @@ class Kubernetes implements ProviderInterface {
CloudRunnerLogger.log('Watching pod until running');
await KubernetesTaskRunner.watchUntilPodRunning(this.kubeClient, this.podName, this.namespace);
CloudRunnerLogger.log('Pod running, streaming logs');
CloudRunnerLogger.log(
`Starting logs follow for pod: ${this.podName} container: ${this.containerName} namespace: ${this.namespace} pvc: ${this.pvcName} ${CloudRunner.buildParameters.kubeVolumeSize}/${CloudRunner.buildParameters.containerCpu}/${CloudRunner.buildParameters.containerMemory}`,
);
CloudRunnerLogger.log('Pod is running');
output += await KubernetesTaskRunner.runTask(
this.kubeConfig,
this.kubeClient,
@@ -232,8 +255,12 @@ class Kubernetes implements ProviderInterface {
this.jobName,
k8s,
this.containerName,
this.ip,
);
await new Promise((promise) => setTimeout(promise, 15000));
// await KubernetesRole.createRole(this.serviceAccountName, this.namespace, this.rbacAuthorizationV1Api);
const result = await this.kubeClientBatch.createNamespacedJob(this.namespace, jobSpec);
CloudRunnerLogger.log(`Build job created`);
await new Promise((promise) => setTimeout(promise, 5000));
@@ -257,6 +284,7 @@ class Kubernetes implements ProviderInterface {
try {
await this.kubeClientBatch.deleteNamespacedJob(this.jobName, this.namespace);
await this.kubeClient.deleteNamespacedPod(this.podName, this.namespace);
await KubernetesRole.deleteRole(this.serviceAccountName, this.namespace, this.rbacAuthorizationV1Api);
} catch (error: any) {
CloudRunnerLogger.log(`Failed to cleanup`);
if (error.response.body.reason !== `NotFound`) {
@@ -275,14 +303,13 @@ class Kubernetes implements ProviderInterface {
}
async cleanupWorkflow(
buildGuid: string,
buildParameters: BuildParameters,
// eslint-disable-next-line no-unused-vars
branchName: string,
// eslint-disable-next-line no-unused-vars
defaultSecretsArray: { ParameterKey: string; EnvironmentVariable: string; ParameterValue: string }[],
) {
if (BuildParameters.shouldUseRetainedWorkspaceMode(buildParameters)) {
if (BuildParameters && BuildParameters.shouldUseRetainedWorkspaceMode(buildParameters)) {
return;
}
CloudRunnerLogger.log(`deleting PVC`);