Files
unity-builder/src/model/cloud-runner/providers/aws/aws-task-runner.ts
Frostebite 8abce48a48 Cloud runner v0.2 - continued quality of life improvements (#387)
* Update cloud-runner-aws-pipeline.yml

* Update cloud-runner-k8s-pipeline.yml

* yarn build

* yarn build

* correct branch ref

* correct branch ref passed to target repo

* Create k8s-tests.yml

* Delete k8s-tests.yml

* correct branch ref passed to target repo

* correct branch ref passed to target repo

* Always describe AWS tasks for now, because unstable error handling

* Remove unused tree commands

* Use lfs guid sum

* Simple override cache push

* Simple override cache push and pull override to allow pure cloud storage driven caching

* Removal of early branch (breaks lfs caching)

* Remove unused tree commands

* Update action.yml

* Update action.yml

* Support cache and input override commands as input + full support custom hooks

* Increase k8s timeout

* replace filename being appended for unknclear reason

* cache key should not contain whitespaces

* Always try and deploy rook for k8s

* Apply k8s files for rook

* Update action.yml

* Apply k8s files for rook

* Apply k8s files for rook

* cache test and action description for kuber storage class

* Correct test and implement dependency health check and start

* GCP-secret run, cache key

* lfs smudge set explicit and undo explicit

* Run using external secret provider to speed up input

* Update cloud-runner-aws-pipeline.yml

* Add nodejs as build step dependency

* Add nodejs as build step dependency

* Cloud Runner Tests must be specified to capture logs from cloud runner for tests

* Cloud Runner Tests must be specified to capture logs from cloud runner for tests

* Refactor and cleanup - no async input, combined setup/build, removed github logs for cli runs

* Refactor and cleanup - no async input, combined setup/build, removed github logs for cli runs

* Refactor and cleanup - no async input, combined setup/build, removed github logs for cli runs

* Refactor and cleanup - no async input, combined setup/build, removed github logs for cli runs

* Refactor and cleanup - no async input, combined setup/build, removed github logs for cli runs

* better defaults for new inputs

* better defaults

* merge latest

* force build update

* use npm n to update node in unity builder

* use npm n to update node in unity builder

* use npm n to update node in unity builder

* correct new line

* quiet zipping

* quiet zipping

* default secrets for unity username and password

* default secrets for unity username and password

* ls active directory before lfs install

* Get cloud runner secrets from

* Get cloud runner secrets from

* Cleanup setup of default secrets

* Various fixes

* Cleanup setup of default secrets

* Various fixes

* Various fixes

* Various fixes

* Various fixes

* Various fixes

* Various fixes

* Various fixes

* Various fixes

* Various fixes

* Various fixes

* Various fixes

* Various fixes

* Various fixes

* Various fixes

* AWS secrets manager support

* less caching logs

* default k8s storage class to pd-standard

* more readable build commands

* Capture aws exit code 1 reliably

* Always replace /head from branch

* k8s default storage class to standard-rwo

* cleanup

* further cleanup input

* further cleanup input

* further cleanup input

* further cleanup input

* further cleanup input

* folder sizes to inspect caching

* dir command for local cloud runner test

* k8s wait for pending because pvc will not create earlier

* prefer k8s standard storage

* handle empty string as cloud runner cluster input

* local-system is now used for cloud runner test implementation AND correctly unset test CLI input

* local-system is now used for cloud runner test implementation AND correctly unset test CLI input

* fix unterminated quote

* fix unterminated quote

* do not share build parameters in tests - in cloud runner this will cause conflicts with resouces of the same name

* remove head and heads from branch prefix

* fix reversed caching direction of cache-push

* fixes

* fixes

* fixes

* cachePull cli

* fixes

* fixes

* fixes

* fixes

* fixes

* order cache test to be first

* order cache test to be first

* fixes

* populate cache key instead of using branch

* cleanup cli

* garbage-collect-aws cli can iterate over aws resources and cli scans all ts files

* import cli methods

* import cli files explicitly

* import cli files explicitly

* import cli files explicitly

* import cli methods

* import cli methods

* import cli methods

* import cli methods

* import cli methods

* import cli methods

* import cli methods

* import cli methods

* import cli methods

* import cli methods

* import cli methods

* import cli methods

* import cli methods

* import cli methods

* import cli methods

* import cli methods

* import cli methods

* import cli methods

* import cli methods

* import cli methods

* import cli methods

* import cli methods

* import cli methods

* import cli methods

* import cli methods

* import cli methods

* import cli methods

* import cli methods

* import cli methods

* import cli methods

* import cli methods

* import cli methods

* import cli methods

* import cli methods

* import cli methods

* import cli methods

* import cli methods

* log parameters in cloud runner parameter test

* log parameters in cloud runner parameter test

* log parameters in cloud runner parameter test

* Cloud runner param test before caching because we have a fast local cache test now

* Using custom build path relative to repo root rather than project root

* aws-garbage-collect at end of pipeline

* aws-garbage-collect do not actually delete anything for now - just list

* remove some legacy du commands

* Update cloud-runner-aws-pipeline.yml

* log contents after cache pull and fix some scenarios with duplicate secrets

* log contents after cache pull and fix some scenarios with duplicate secrets

* log contents after cache pull and fix some scenarios with duplicate secrets

* PR comments

* Replace guid with uuid package

* use fileExists lambda instead of stat to check file exists in caching

* build failed results in core error message

* Delete sample.txt

* cloud-runner-system prefix changed to cloud-runner

* Update cloud-runner-aws-pipeline.yml

* remove du from caching, should run manually if interested in size, adds too much runtime to job to include by default

* github ephemeral pipeline support

* github ephemeral pipeline support

* Merge remote-tracking branch 'origin/main' into cloud-runner-develop

# Conflicts:
#	dist/index.js.map
#	src/model/cloud-runner/providers/aws/aws-task-runner.ts
#	src/model/cloud-runner/providers/aws/index.ts

* garbage collection

* garbage collection

* self hosted runner pipeline

* self hosted runner pipeline

* self hosted runner pipeline

* self hosted runner pipeline

* self hosted runner pipeline

* self hosted runner pipeline

* self hosted runner pipeline

* self hosted runner pipeline

* self hosted runner pipeline

* self hosted runner pipeline

* ephemeral runner pipeline

* ephemeral runner pipeline

* ephemeral runner pipeline

* download runner each time

* download runner each time

* download runner each time

* garbage collect all older than 1d as part of cleanup

* download runner each time

* number container cpu and memory for aws

* per provider container defaults

* per provider container defaults

* per provider container defaults

* per provider container defaults

* Skip printing size unless cloudRunnerIntegrationTests is true

* transition zip usage in cache to uncompressed tar for speed

* transition zip usage in cache to uncompressed tar for speed

* transition zip usage in cache to uncompressed tar for speed

* transition zip usage in cache to uncompressed tar for speed

* per provider container defaults

* per provider container defaults

* per provider container defaults

* per provider container defaults

* per provider container defaults

* per provider container defaults

* per provider container defaults

* per provider container defaults

* baked in cloud formation template

* baked in cloud formation template

* baked in cloud formation template

* baked in cloud formation template

* baked in cloud formation template

* baked in cloud formation template

* baked in cloud formation template

* baked in cloud formation template

* better aws commands

* better aws commands

* parse number for cloud formation template

* remove container resource defaults from actions yaml

* remove container resource defaults from actions yaml

* skip all input readers when cloud runner is local

* prefer fs/promises

* actually set aws cloud runner step as failure if unity build fails

* default to 3gb of ram - webgl fails on 2
2022-04-22 00:47:45 +01:00

267 lines
9.7 KiB
TypeScript

import * as AWS from 'aws-sdk';
import CloudRunnerEnvironmentVariable from '../../services/cloud-runner-environment-variable';
import * as core from '@actions/core';
import CloudRunnerAWSTaskDef from './cloud-runner-aws-task-def';
import * as zlib from 'zlib';
import CloudRunnerLogger from '../../services/cloud-runner-logger';
import { Input } from '../../..';
import CloudRunner from '../../cloud-runner';
import { CloudRunnerStatics } from '../../cloud-runner-statics';
import { CloudRunnerBuildCommandProcessor } from '../../services/cloud-runner-build-command-process';
class AWSTaskRunner {
static async runTask(
taskDef: CloudRunnerAWSTaskDef,
ECS: AWS.ECS,
CF: AWS.CloudFormation,
environment: CloudRunnerEnvironmentVariable[],
buildGuid: string,
commands: string,
) {
const cluster = taskDef.baseResources?.find((x) => x.LogicalResourceId === 'ECSCluster')?.PhysicalResourceId || '';
const taskDefinition =
taskDef.taskDefResources?.find((x) => x.LogicalResourceId === 'TaskDefinition')?.PhysicalResourceId || '';
const SubnetOne =
taskDef.baseResources?.find((x) => x.LogicalResourceId === 'PublicSubnetOne')?.PhysicalResourceId || '';
const SubnetTwo =
taskDef.baseResources?.find((x) => x.LogicalResourceId === 'PublicSubnetTwo')?.PhysicalResourceId || '';
const ContainerSecurityGroup =
taskDef.baseResources?.find((x) => x.LogicalResourceId === 'ContainerSecurityGroup')?.PhysicalResourceId || '';
const streamName =
taskDef.taskDefResources?.find((x) => x.LogicalResourceId === 'KinesisStream')?.PhysicalResourceId || '';
const task = await ECS.runTask({
cluster,
taskDefinition,
platformVersion: '1.4.0',
overrides: {
containerOverrides: [
{
name: taskDef.taskDefStackName,
environment,
command: ['-c', CloudRunnerBuildCommandProcessor.ProcessCommands(commands, CloudRunner.buildParameters)],
},
],
},
launchType: 'FARGATE',
networkConfiguration: {
awsvpcConfiguration: {
subnets: [SubnetOne, SubnetTwo],
assignPublicIp: 'ENABLED',
securityGroups: [ContainerSecurityGroup],
},
},
}).promise();
const taskArn = task.tasks?.[0].taskArn || '';
CloudRunnerLogger.log('Cloud runner job is starting');
await AWSTaskRunner.waitUntilTaskRunning(ECS, taskArn, cluster);
CloudRunnerLogger.log(
`Cloud runner job status is running ${(await AWSTaskRunner.describeTasks(ECS, cluster, taskArn))?.lastStatus}`,
);
const { output, shouldCleanup } = await this.streamLogsUntilTaskStops(
ECS,
CF,
taskDef,
cluster,
taskArn,
streamName,
);
const taskData = await AWSTaskRunner.describeTasks(ECS, cluster, taskArn);
const exitCode = taskData.containers?.[0].exitCode;
const wasSuccessful = exitCode === 0 || (exitCode === undefined && taskData.lastStatus === 'RUNNING');
if (wasSuccessful) {
CloudRunnerLogger.log(`Cloud runner job has finished successfully`);
return { output, shouldCleanup };
} else {
if (taskData.stoppedReason === 'Essential container in task exited' && exitCode === 1) {
throw new Error('Container exited with code 1');
}
const message = `Cloud runner job exit code ${exitCode}`;
taskData.overrides = undefined;
taskData.attachments = undefined;
CloudRunnerLogger.log(`${message} ${JSON.stringify(taskData, undefined, 4)}`);
throw new Error(message);
}
}
private static async waitUntilTaskRunning(ECS: AWS.ECS, taskArn: string, cluster: string) {
try {
await ECS.waitFor('tasksRunning', { tasks: [taskArn], cluster }).promise();
} catch (error_) {
const error = error_ as Error;
await new Promise((resolve) => setTimeout(resolve, 3000));
CloudRunnerLogger.log(
`Cloud runner job has ended ${
(await AWSTaskRunner.describeTasks(ECS, cluster, taskArn)).containers?.[0].lastStatus
}`,
);
core.setFailed(error);
core.error(error);
}
}
static async describeTasks(ECS: AWS.ECS, clusterName: string, taskArn: string) {
const tasks = await ECS.describeTasks({
cluster: clusterName,
tasks: [taskArn],
}).promise();
if (tasks.tasks?.[0]) {
return tasks.tasks?.[0];
} else {
throw new Error('No task found');
}
}
static async streamLogsUntilTaskStops(
ECS: AWS.ECS,
CF: AWS.CloudFormation,
taskDef: CloudRunnerAWSTaskDef,
clusterName: string,
taskArn: string,
kinesisStreamName: string,
) {
const kinesis = new AWS.Kinesis();
const stream = await AWSTaskRunner.getLogStream(kinesis, kinesisStreamName);
let iterator = await AWSTaskRunner.getLogIterator(kinesis, stream);
const logBaseUrl = `https://${Input.region}.console.aws.amazon.com/cloudwatch/home?region=${CF.config.region}#logsV2:log-groups/log-group/${taskDef.taskDefStackName}`;
CloudRunnerLogger.log(`You can also see the logs at AWS Cloud Watch: ${logBaseUrl}`);
let shouldReadLogs = true;
let shouldCleanup = true;
let timestamp: number = 0;
let output = '';
while (shouldReadLogs) {
await new Promise((resolve) => setTimeout(resolve, 1500));
const taskData = await AWSTaskRunner.describeTasks(ECS, clusterName, taskArn);
({ timestamp, shouldReadLogs } = AWSTaskRunner.checkStreamingShouldContinue(taskData, timestamp, shouldReadLogs));
({ iterator, shouldReadLogs, output, shouldCleanup } = await AWSTaskRunner.handleLogStreamIteration(
kinesis,
iterator,
shouldReadLogs,
taskDef,
output,
shouldCleanup,
));
}
return { output, shouldCleanup };
}
private static async handleLogStreamIteration(
kinesis: AWS.Kinesis,
iterator: string,
shouldReadLogs: boolean,
taskDef: CloudRunnerAWSTaskDef,
output: string,
shouldCleanup: boolean,
) {
const records = await kinesis
.getRecords({
ShardIterator: iterator,
})
.promise();
iterator = records.NextShardIterator || '';
({ shouldReadLogs, output, shouldCleanup } = AWSTaskRunner.logRecords(
records,
iterator,
taskDef,
shouldReadLogs,
output,
shouldCleanup,
));
return { iterator, shouldReadLogs, output, shouldCleanup };
}
private static checkStreamingShouldContinue(taskData: AWS.ECS.Task, timestamp: number, shouldReadLogs: boolean) {
if (taskData?.lastStatus === 'UNKNOWN') {
CloudRunnerLogger.log('## Cloud runner job unknwon');
}
if (taskData?.lastStatus !== 'RUNNING') {
if (timestamp === 0) {
CloudRunnerLogger.log('## Cloud runner job stopped, streaming end of logs');
timestamp = Date.now();
}
if (timestamp !== 0 && Date.now() - timestamp > 30000) {
CloudRunnerLogger.log('## Cloud runner status is not RUNNING for 30 seconds, last query for logs');
shouldReadLogs = false;
}
CloudRunnerLogger.log(`## Status of job: ${taskData.lastStatus}`);
}
return { timestamp, shouldReadLogs };
}
private static logRecords(
records,
iterator: string,
taskDef: CloudRunnerAWSTaskDef,
shouldReadLogs: boolean,
output: string,
shouldCleanup: boolean,
) {
if (records.Records.length > 0 && iterator) {
for (let index = 0; index < records.Records.length; index++) {
const json = JSON.parse(
zlib.gunzipSync(Buffer.from(records.Records[index].Data as string, 'base64')).toString('utf8'),
);
if (json.messageType === 'DATA_MESSAGE') {
for (let logEventsIndex = 0; logEventsIndex < json.logEvents.length; logEventsIndex++) {
let message = json.logEvents[logEventsIndex].message;
if (json.logEvents[logEventsIndex].message.includes(`---${CloudRunner.buildParameters.logId}`)) {
CloudRunnerLogger.log('End of log transmission received');
shouldReadLogs = false;
} else if (message.includes('Rebuilding Library because the asset database could not be found!')) {
core.warning('LIBRARY NOT FOUND!');
core.setOutput('library-found', 'false');
} else if (message.includes('Build succeeded')) {
core.setOutput('build-result', 'success');
} else if (message.includes('Build fail')) {
core.setOutput('build-result', 'failed');
core.setFailed('unity build failed');
core.error('BUILD FAILED!');
} else if (message.includes(': Listening for Jobs')) {
core.setOutput('cloud runner stop watching', 'true');
shouldReadLogs = false;
shouldCleanup = false;
core.warning('cloud runner stop watching');
}
message = `[${CloudRunnerStatics.logPrefix}] ${message}`;
if (CloudRunner.buildParameters.cloudRunnerIntegrationTests) {
output += message;
}
CloudRunnerLogger.log(message);
}
}
}
}
return { shouldReadLogs, output, shouldCleanup };
}
private static async getLogStream(kinesis: AWS.Kinesis, kinesisStreamName: string) {
return await kinesis
.describeStream({
StreamName: kinesisStreamName,
})
.promise();
}
private static async getLogIterator(kinesis: AWS.Kinesis, stream) {
return (
(
await kinesis
.getShardIterator({
ShardIteratorType: 'TRIM_HORIZON',
StreamName: stream.StreamDescription.StreamName,
ShardId: stream.StreamDescription.Shards[0].ShardId,
})
.promise()
).ShardIterator || ''
);
}
}
export default AWSTaskRunner;