From 04d38f649f8d3d968fab9958e273f53f21543fe8 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Dean=20Qui=C3=B1anola?= Date: Thu, 16 Apr 2026 12:21:17 -0700 Subject: [PATCH] docs(serverless): document missing fitness-check env vars - Add RUNPOD_GPU_MAX_ERROR_MESSAGES to advanced GPU check config - Document RUNPOD_SKIP_AUTO_SYSTEM_CHECKS and RUNPOD_SKIP_GPU_CHECK under a new "Disabling Built-in Checks" subsection, noting that user-registered checks still run when these flags are set These flags exist in rp_fitness.py and rp_gpu_fitness.py but were previously undocumented. --- docs/serverless/worker_fitness_checks.md | 27 ++++++++++++++++++++++++ 1 file changed, 27 insertions(+) diff --git a/docs/serverless/worker_fitness_checks.md b/docs/serverless/worker_fitness_checks.md index a255045a..705910b4 100644 --- a/docs/serverless/worker_fitness_checks.md +++ b/docs/serverless/worker_fitness_checks.md @@ -199,6 +199,12 @@ os.environ["RUNPOD_GPU_TEST_TIMEOUT"] = "60" # Override binary path (for custom/patched versions) os.environ["RUNPOD_BINARY_GPU_TEST_PATH"] = "/custom/path/gpu_test" + +# Cap the number of error messages parsed from gpu_test output (default: 10) +os.environ["RUNPOD_GPU_MAX_ERROR_MESSAGES"] = "20" + +# Skip auto-registration of this check (primarily for testing) +os.environ["RUNPOD_SKIP_GPU_CHECK"] = "true" ``` **What it tests**: @@ -374,6 +380,27 @@ os.environ["RUNPOD_MIN_MEMORY_GB"] = "8.0" os.environ["RUNPOD_MIN_DISK_PERCENT"] = "15.0" ``` +### Disabling Built-in Checks + +For testing or specialized deployments, built-in checks can be disabled via environment variables. These are not recommended for production use. + +| Env var | Effect | +|---|---| +| `RUNPOD_SKIP_AUTO_SYSTEM_CHECKS=true` | Skips auto-registration of memory, disk, network, CUDA version, CUDA init, and GPU benchmark checks | +| `RUNPOD_SKIP_GPU_CHECK=true` | Skips auto-registration of the native GPU memory allocation test (`gpu_test` binary) | + +```python +import os + +# Disable all auto-registered system checks (testing only) +os.environ["RUNPOD_SKIP_AUTO_SYSTEM_CHECKS"] = "true" + +# Disable the automatic GPU memory allocation test +os.environ["RUNPOD_SKIP_GPU_CHECK"] = "true" +``` + +User-registered checks via `@register_fitness_check` still run regardless of these flags. + ## Behavior ### Execution Timing