mirror of
https://github.com/game-ci/unity-builder.git
synced 2026-06-15 12:36:48 -07:00
fixes
This commit is contained in:
@@ -30,6 +30,8 @@ jobs:
|
|||||||
runs-on: ubuntu-latest
|
runs-on: ubuntu-latest
|
||||||
env:
|
env:
|
||||||
K3D_NODE_CONTAINERS: 'k3d-unity-builder-agent-0'
|
K3D_NODE_CONTAINERS: 'k3d-unity-builder-agent-0'
|
||||||
|
AWS_FORCE_PROVIDER: aws
|
||||||
|
RESOURCE_TRACKING: 'true'
|
||||||
steps:
|
steps:
|
||||||
# ==========================================
|
# ==========================================
|
||||||
# SETUP SECTION
|
# SETUP SECTION
|
||||||
@@ -223,66 +225,6 @@ jobs:
|
|||||||
kubectl run test-localstack --image=curlimages/curl --rm -i --restart=Never --timeout=10s -- \
|
kubectl run test-localstack --image=curlimages/curl --rm -i --restart=Never --timeout=10s -- \
|
||||||
curl -v --max-time 5 http://host.k3d.internal:4566/_localstack/health 2>&1 | head -20 || \
|
curl -v --max-time 5 http://host.k3d.internal:4566/_localstack/health 2>&1 | head -20 || \
|
||||||
echo "Cluster connectivity test - if this fails, LocalStack may not be accessible from k3d"
|
echo "Cluster connectivity test - if this fails, LocalStack may not be accessible from k3d"
|
||||||
- name: Pre-pull Unity image into k3d cluster
|
|
||||||
timeout-minutes: 15
|
|
||||||
run: |
|
|
||||||
# Pre-pull the Unity image into the k3d cluster before running tests
|
|
||||||
# This ensures it's cached in the k3d node's containerd and won't need to be pulled during test execution
|
|
||||||
UNITY_IMAGE="unityci/editor:ubuntu-2021.3.45f1-base-3"
|
|
||||||
|
|
||||||
# Check disk space before pulling
|
|
||||||
echo "Checking disk space before pre-pulling Unity image..."
|
|
||||||
K3D_NODE_CONTAINERS="${K3D_NODE_CONTAINERS:-k3d-unity-builder-agent-0 k3d-unity-builder-server-0}"
|
|
||||||
for NODE in $K3D_NODE_CONTAINERS; do
|
|
||||||
echo "Disk space in $NODE:"
|
|
||||||
docker exec "$NODE" sh -c "df -h /var/lib/rancher/k3s 2>/dev/null || df -h / 2>/dev/null || true" || true
|
|
||||||
done
|
|
||||||
|
|
||||||
# Clean up before pulling to ensure we have space
|
|
||||||
echo "Cleaning up before pre-pulling image..."
|
|
||||||
for NODE in $K3D_NODE_CONTAINERS; do
|
|
||||||
docker exec "$NODE" sh -c "crictl rm --all 2>/dev/null || true" || true
|
|
||||||
# Only remove non-Unity images to preserve space while keeping Unity image if already cached
|
|
||||||
docker exec "$NODE" sh -c "for img in \$(crictl images -q 2>/dev/null); do repo=\$(crictl inspecti \$img --format '{{.repo}}' 2>/dev/null || echo ''); if echo \"\$repo\" | grep -qvE 'unityci/editor|unity'; then crictl rmi \$img 2>/dev/null || true; fi; done" || true
|
|
||||||
done || true
|
|
||||||
|
|
||||||
# Explicitly pull the image on BOTH nodes to ensure it's cached wherever pods might be scheduled
|
|
||||||
# This prevents "no space left" errors when pods are scheduled on nodes without the cached image
|
|
||||||
echo "Pulling Unity image directly on each node to ensure it's cached..."
|
|
||||||
for NODE in $K3D_NODE_CONTAINERS; do
|
|
||||||
echo "Checking if image already exists on $NODE..."
|
|
||||||
IMAGE_EXISTS=$(docker exec "$NODE" sh -c "crictl images | grep -q unityci/editor && echo 'yes' || echo 'no'" || echo "no")
|
|
||||||
if [ "$IMAGE_EXISTS" = "yes" ]; then
|
|
||||||
echo "Unity image already cached on $NODE, skipping pull"
|
|
||||||
else
|
|
||||||
echo "Pulling Unity image on $NODE (this may take several minutes for 3.9GB image)..."
|
|
||||||
# Use crictl pull directly in the node's containerd
|
|
||||||
# This ensures the image is cached in the node's local storage
|
|
||||||
# Use timeout to prevent hanging indefinitely (10 minutes max)
|
|
||||||
if timeout 600 docker exec "$NODE" sh -c "crictl pull $UNITY_IMAGE 2>&1"; then
|
|
||||||
echo "Successfully pulled image on $NODE"
|
|
||||||
# Verify it's cached
|
|
||||||
docker exec "$NODE" sh -c "crictl images | grep unityci/editor || echo 'Warning: Image not found after pull'" || true
|
|
||||||
else
|
|
||||||
PULL_EXIT_CODE=$?
|
|
||||||
if [ $PULL_EXIT_CODE -eq 124 ]; then
|
|
||||||
echo "Warning: Image pull on $NODE timed out after 10 minutes. Checking if partially cached..."
|
|
||||||
else
|
|
||||||
echo "Warning: Image pull on $NODE failed (exit code: $PULL_EXIT_CODE). Checking if partially cached..."
|
|
||||||
fi
|
|
||||||
docker exec "$NODE" sh -c "crictl images | grep unityci/editor || echo 'Image not found on $NODE'" || true
|
|
||||||
echo "Note: Pods scheduled on $NODE will attempt to pull the image during runtime, which may fail if disk space is insufficient."
|
|
||||||
fi
|
|
||||||
fi
|
|
||||||
done
|
|
||||||
|
|
||||||
# Verify image is cached
|
|
||||||
echo "Checking if Unity image is cached..."
|
|
||||||
for NODE in $K3D_NODE_CONTAINERS; do
|
|
||||||
docker exec "$NODE" sh -c "crictl images | grep unityci/editor || echo 'Image not found in $NODE'" || true
|
|
||||||
done
|
|
||||||
|
|
||||||
echo "Image pre-pull completed. Image should be cached in k3d node."
|
|
||||||
- name: Clean up K8s test resources before tests
|
- name: Clean up K8s test resources before tests
|
||||||
run: |
|
run: |
|
||||||
echo "Cleaning up K8s test resources..."
|
echo "Cleaning up K8s test resources..."
|
||||||
|
|||||||
@@ -194,6 +194,10 @@ inputs:
|
|||||||
description:
|
description:
|
||||||
'[CloudRunner] Either local, k8s or aws can be used to run builds on a remote cluster. Additional parameters must
|
'[CloudRunner] Either local, k8s or aws can be used to run builds on a remote cluster. Additional parameters must
|
||||||
be configured.'
|
be configured.'
|
||||||
|
resourceTracking:
|
||||||
|
default: 'false'
|
||||||
|
required: false
|
||||||
|
description: '[CloudRunner] Enable resource tracking logs for disk usage and allocation summaries.'
|
||||||
containerCpu:
|
containerCpu:
|
||||||
default: ''
|
default: ''
|
||||||
required: false
|
required: false
|
||||||
|
|||||||
@@ -19,6 +19,7 @@ import SharedWorkspaceLocking from './services/core/shared-workspace-locking';
|
|||||||
import { FollowLogStreamService } from './services/core/follow-log-stream-service';
|
import { FollowLogStreamService } from './services/core/follow-log-stream-service';
|
||||||
import CloudRunnerResult from './services/core/cloud-runner-result';
|
import CloudRunnerResult from './services/core/cloud-runner-result';
|
||||||
import CloudRunnerOptions from './options/cloud-runner-options';
|
import CloudRunnerOptions from './options/cloud-runner-options';
|
||||||
|
import ResourceTracking from './services/core/resource-tracking';
|
||||||
|
|
||||||
class CloudRunner {
|
class CloudRunner {
|
||||||
public static Provider: ProviderInterface;
|
public static Provider: ProviderInterface;
|
||||||
@@ -37,6 +38,8 @@ class CloudRunner {
|
|||||||
CloudRunnerLogger.setup();
|
CloudRunnerLogger.setup();
|
||||||
CloudRunnerLogger.log(`Setting up cloud runner`);
|
CloudRunnerLogger.log(`Setting up cloud runner`);
|
||||||
CloudRunner.buildParameters = buildParameters;
|
CloudRunner.buildParameters = buildParameters;
|
||||||
|
ResourceTracking.logAllocationSummary('setup');
|
||||||
|
await ResourceTracking.logDiskUsageSnapshot('setup');
|
||||||
if (CloudRunner.buildParameters.githubCheckId === ``) {
|
if (CloudRunner.buildParameters.githubCheckId === ``) {
|
||||||
CloudRunner.buildParameters.githubCheckId = await GitHub.createGitHubCheck(CloudRunner.buildParameters.buildGuid);
|
CloudRunner.buildParameters.githubCheckId = await GitHub.createGitHubCheck(CloudRunner.buildParameters.buildGuid);
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -287,6 +287,10 @@ class CloudRunnerOptions {
|
|||||||
return CloudRunnerOptions.getInput('asyncCloudRunner') === 'true';
|
return CloudRunnerOptions.getInput('asyncCloudRunner') === 'true';
|
||||||
}
|
}
|
||||||
|
|
||||||
|
public static get resourceTracking(): boolean {
|
||||||
|
return CloudRunnerOptions.getInput('resourceTracking') === 'true';
|
||||||
|
}
|
||||||
|
|
||||||
public static get useLargePackages(): boolean {
|
public static get useLargePackages(): boolean {
|
||||||
return CloudRunnerOptions.getInput(`useLargePackages`) === `true`;
|
return CloudRunnerOptions.getInput(`useLargePackages`) === `true`;
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -15,6 +15,7 @@ import { ProviderWorkflow } from '../provider-workflow';
|
|||||||
import { TaskService } from './services/task-service';
|
import { TaskService } from './services/task-service';
|
||||||
import CloudRunnerOptions from '../../options/cloud-runner-options';
|
import CloudRunnerOptions from '../../options/cloud-runner-options';
|
||||||
import { AwsClientFactory } from './aws-client-factory';
|
import { AwsClientFactory } from './aws-client-factory';
|
||||||
|
import ResourceTracking from '../../services/core/resource-tracking';
|
||||||
|
|
||||||
class AWSBuildEnvironment implements ProviderInterface {
|
class AWSBuildEnvironment implements ProviderInterface {
|
||||||
private baseStackName: string;
|
private baseStackName: string;
|
||||||
@@ -90,6 +91,8 @@ class AWSBuildEnvironment implements ProviderInterface {
|
|||||||
secrets: CloudRunnerSecret[],
|
secrets: CloudRunnerSecret[],
|
||||||
): Promise<string> {
|
): Promise<string> {
|
||||||
process.env.AWS_REGION = Input.region;
|
process.env.AWS_REGION = Input.region;
|
||||||
|
ResourceTracking.logAllocationSummary('aws workflow');
|
||||||
|
await ResourceTracking.logDiskUsageSnapshot('aws workflow (host)');
|
||||||
AwsClientFactory.getECS();
|
AwsClientFactory.getECS();
|
||||||
const CF = AwsClientFactory.getCloudFormation();
|
const CF = AwsClientFactory.getCloudFormation();
|
||||||
AwsClientFactory.getKinesis();
|
AwsClientFactory.getKinesis();
|
||||||
|
|||||||
@@ -17,6 +17,7 @@ import { ProviderWorkflow } from '../provider-workflow';
|
|||||||
import { RemoteClientLogger } from '../../remote-client/remote-client-logger';
|
import { RemoteClientLogger } from '../../remote-client/remote-client-logger';
|
||||||
import { KubernetesRole } from './kubernetes-role';
|
import { KubernetesRole } from './kubernetes-role';
|
||||||
import { CloudRunnerSystem } from '../../services/core/cloud-runner-system';
|
import { CloudRunnerSystem } from '../../services/core/cloud-runner-system';
|
||||||
|
import ResourceTracking from '../../services/core/resource-tracking';
|
||||||
|
|
||||||
class Kubernetes implements ProviderInterface {
|
class Kubernetes implements ProviderInterface {
|
||||||
public static Instance: Kubernetes;
|
public static Instance: Kubernetes;
|
||||||
@@ -137,6 +138,9 @@ class Kubernetes implements ProviderInterface {
|
|||||||
): Promise<string> {
|
): Promise<string> {
|
||||||
try {
|
try {
|
||||||
CloudRunnerLogger.log('Cloud Runner K8s workflow!');
|
CloudRunnerLogger.log('Cloud Runner K8s workflow!');
|
||||||
|
ResourceTracking.logAllocationSummary('k8s workflow');
|
||||||
|
await ResourceTracking.logDiskUsageSnapshot('k8s workflow (host)');
|
||||||
|
await ResourceTracking.logK3dNodeDiskUsage('k8s workflow (before job)');
|
||||||
|
|
||||||
// Setup
|
// Setup
|
||||||
const id =
|
const id =
|
||||||
|
|||||||
@@ -14,11 +14,13 @@ import GitHub from '../../github';
|
|||||||
import BuildParameters from '../../build-parameters';
|
import BuildParameters from '../../build-parameters';
|
||||||
import { Cli } from '../../cli/cli';
|
import { Cli } from '../../cli/cli';
|
||||||
import CloudRunnerOptions from '../options/cloud-runner-options';
|
import CloudRunnerOptions from '../options/cloud-runner-options';
|
||||||
|
import ResourceTracking from '../services/core/resource-tracking';
|
||||||
|
|
||||||
export class RemoteClient {
|
export class RemoteClient {
|
||||||
@CliFunction(`remote-cli-pre-build`, `sets up a repository, usually before a game-ci build`)
|
@CliFunction(`remote-cli-pre-build`, `sets up a repository, usually before a game-ci build`)
|
||||||
static async setupRemoteClient() {
|
static async setupRemoteClient() {
|
||||||
CloudRunnerLogger.log(`bootstrap game ci cloud runner...`);
|
CloudRunnerLogger.log(`bootstrap game ci cloud runner...`);
|
||||||
|
await ResourceTracking.logDiskUsageSnapshot('remote-cli-pre-build (start)');
|
||||||
if (!(await RemoteClient.handleRetainedWorkspace())) {
|
if (!(await RemoteClient.handleRetainedWorkspace())) {
|
||||||
await RemoteClient.bootstrapRepository();
|
await RemoteClient.bootstrapRepository();
|
||||||
}
|
}
|
||||||
@@ -206,6 +208,7 @@ export class RemoteClient {
|
|||||||
// that read from the log file rather than stdout
|
// that read from the log file rather than stdout
|
||||||
RemoteClientLogger.log(successMessage);
|
RemoteClientLogger.log(successMessage);
|
||||||
CloudRunnerLogger.log(successMessage);
|
CloudRunnerLogger.log(successMessage);
|
||||||
|
await ResourceTracking.logDiskUsageSnapshot('remote-cli-post-build (end)');
|
||||||
|
|
||||||
return new Promise((result) => result(``));
|
return new Promise((result) => result(``));
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -0,0 +1,84 @@
|
|||||||
|
import CloudRunnerLogger from './cloud-runner-logger';
|
||||||
|
import CloudRunnerOptions from '../../options/cloud-runner-options';
|
||||||
|
import CloudRunner from '../../cloud-runner';
|
||||||
|
import { CloudRunnerSystem } from './cloud-runner-system';
|
||||||
|
|
||||||
|
class ResourceTracking {
|
||||||
|
static isEnabled(): boolean {
|
||||||
|
return (
|
||||||
|
CloudRunnerOptions.resourceTracking ||
|
||||||
|
CloudRunnerOptions.cloudRunnerDebug ||
|
||||||
|
process.env['cloudRunnerTests'] === 'true'
|
||||||
|
);
|
||||||
|
}
|
||||||
|
|
||||||
|
static logAllocationSummary(context: string) {
|
||||||
|
if (!ResourceTracking.isEnabled()) {
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
const buildParameters = CloudRunner.buildParameters;
|
||||||
|
const allocations = {
|
||||||
|
providerStrategy: buildParameters.providerStrategy,
|
||||||
|
containerCpu: buildParameters.containerCpu,
|
||||||
|
containerMemory: buildParameters.containerMemory,
|
||||||
|
dockerCpuLimit: buildParameters.dockerCpuLimit,
|
||||||
|
dockerMemoryLimit: buildParameters.dockerMemoryLimit,
|
||||||
|
kubeVolumeSize: buildParameters.kubeVolumeSize,
|
||||||
|
kubeStorageClass: buildParameters.kubeStorageClass,
|
||||||
|
kubeVolume: buildParameters.kubeVolume,
|
||||||
|
containerNamespace: buildParameters.containerNamespace,
|
||||||
|
storageProvider: buildParameters.storageProvider,
|
||||||
|
rcloneRemote: buildParameters.rcloneRemote,
|
||||||
|
dockerWorkspacePath: buildParameters.dockerWorkspacePath,
|
||||||
|
cacheKey: buildParameters.cacheKey,
|
||||||
|
maxRetainedWorkspaces: buildParameters.maxRetainedWorkspaces,
|
||||||
|
useCompressionStrategy: buildParameters.useCompressionStrategy,
|
||||||
|
useLargePackages: buildParameters.useLargePackages,
|
||||||
|
ephemeralStorageRequest: process.env['cloudRunnerTests'] === 'true' ? 'not set' : '2Gi',
|
||||||
|
};
|
||||||
|
|
||||||
|
CloudRunnerLogger.log(`[ResourceTracking] Allocation summary (${context}):`);
|
||||||
|
CloudRunnerLogger.log(JSON.stringify(allocations, undefined, 2));
|
||||||
|
}
|
||||||
|
|
||||||
|
static async logDiskUsageSnapshot(context: string) {
|
||||||
|
if (!ResourceTracking.isEnabled()) {
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
CloudRunnerLogger.log(`[ResourceTracking] Disk usage snapshot (${context})`);
|
||||||
|
await ResourceTracking.runAndLog('df -h', 'df -h');
|
||||||
|
await ResourceTracking.runAndLog('du -sh .', 'du -sh .');
|
||||||
|
await ResourceTracking.runAndLog('du -sh ./cloud-runner-cache', 'du -sh ./cloud-runner-cache');
|
||||||
|
await ResourceTracking.runAndLog('du -sh ./temp', 'du -sh ./temp');
|
||||||
|
await ResourceTracking.runAndLog('du -sh ./logs', 'du -sh ./logs');
|
||||||
|
}
|
||||||
|
|
||||||
|
static async logK3dNodeDiskUsage(context: string) {
|
||||||
|
if (!ResourceTracking.isEnabled()) {
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
const nodes = ['k3d-unity-builder-agent-0', 'k3d-unity-builder-server-0'];
|
||||||
|
CloudRunnerLogger.log(`[ResourceTracking] K3d node disk usage (${context})`);
|
||||||
|
for (const node of nodes) {
|
||||||
|
await ResourceTracking.runAndLog(
|
||||||
|
`k3d node ${node}`,
|
||||||
|
`docker exec ${node} sh -c "df -h /var/lib/rancher/k3s 2>/dev/null || df -h / 2>/dev/null || true" || true`,
|
||||||
|
);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
private static async runAndLog(label: string, command: string) {
|
||||||
|
try {
|
||||||
|
const output = await CloudRunnerSystem.Run(command, true, true);
|
||||||
|
const trimmed = output.trim();
|
||||||
|
CloudRunnerLogger.log(`[ResourceTracking] ${label}:\n${trimmed || 'no output'}`);
|
||||||
|
} catch (error: any) {
|
||||||
|
CloudRunnerLogger.log(`[ResourceTracking] ${label} failed: ${error?.message || error}`);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
export default ResourceTracking;
|
||||||
Reference in New Issue
Block a user