diff --git a/.server-changes/supervisor-pod-dns-ndots.md b/.server-changes/supervisor-pod-dns-ndots.md new file mode 100644 index 0000000000..48d3563f14 --- /dev/null +++ b/.server-changes/supervisor-pod-dns-ndots.md @@ -0,0 +1,8 @@ +--- +area: supervisor +type: feature +--- + +Add `KUBERNETES_POD_DNS_NDOTS_OVERRIDE_ENABLED` flag (off by default) that overrides the cluster default and sets `dnsConfig.options.ndots` on runner pods (defaulting to 2, configurable via `KUBERNETES_POD_DNS_NDOTS`). Kubernetes defaults pods to `ndots: 5`, so any name with fewer than 5 dots — including typical external domains like `api.example.com` — is first walked through every entry in the cluster search list (`.svc.cluster.local`, `svc.cluster.local`, `cluster.local`) before being tried as-is, turning one resolution into 4+ CoreDNS queries (×2 with A+AAAA). Using a lower `ndots` value reduces DNS query amplification in the `cluster.local` zone. + +Note: before enabling, make sure no code path relies on search-list expansion for names with dots ≥ the configured value — those names will hit their as-is form first and could resolve externally before falling back to the cluster search path. diff --git a/apps/supervisor/src/env.ts b/apps/supervisor/src/env.ts index b69fb24d73..f2d54741ee 100644 --- a/apps/supervisor/src/env.ts +++ b/apps/supervisor/src/env.ts @@ -121,6 +121,16 @@ const Env = z KUBERNETES_MEMORY_OVERHEAD_GB: z.coerce.number().min(0).optional(), // Optional memory overhead to add to the limit in GB KUBERNETES_SCHEDULER_NAME: z.string().optional(), // Custom scheduler name for pods + + // Pod DNS config — override the cluster default ndots to `KUBERNETES_POD_DNS_NDOTS`. + // Default k8s ndots is 5: any name with fewer than 5 dots (e.g. `api.example.com`, 2 dots) is first walked + // through every entry in the cluster search list (`.svc.cluster.local`, `svc.cluster.local`, `cluster.local`) + // before being tried as-is, turning one resolution into 4+ CoreDNS queries (×2 with A+AAAA). + // Overriding the default can be useful to cut CoreDNS query amplification for external domains. + // Note: before enabling, make sure no code path relies on search-list expansion for names with dots ≥ the value + // set here — those names will now hit their as-is form first and could resolve externally before falling back. + KUBERNETES_POD_DNS_NDOTS_OVERRIDE_ENABLED: BoolEnv.default(false), + KUBERNETES_POD_DNS_NDOTS: z.coerce.number().int().min(1).max(15).default(2), // Large machine affinity settings - large-* presets prefer a dedicated pool KUBERNETES_LARGE_MACHINE_AFFINITY_ENABLED: BoolEnv.default(false), KUBERNETES_LARGE_MACHINE_AFFINITY_POOL_LABEL_KEY: z @@ -189,7 +199,9 @@ const Env = z if (!validEffects.includes(effect)) { ctx.addIssue({ code: z.ZodIssueCode.custom, - message: `Invalid toleration effect "${effect}" in "${entry}". Must be one of: ${validEffects.join(", ")}`, + message: `Invalid toleration effect "${effect}" in "${entry}". Must be one of: ${validEffects.join( + ", " + )}`, }); return z.NEVER; } diff --git a/apps/supervisor/src/workloadManager/kubernetes.ts b/apps/supervisor/src/workloadManager/kubernetes.ts index ec08926721..b2ed05c9f1 100644 --- a/apps/supervisor/src/workloadManager/kubernetes.ts +++ b/apps/supervisor/src/workloadManager/kubernetes.ts @@ -321,6 +321,13 @@ export class KubernetesWorkloadManager implements WorkloadManager { }, } : {}), + ...(env.KUBERNETES_POD_DNS_NDOTS_OVERRIDE_ENABLED + ? { + dnsConfig: { + options: [{ name: "ndots", value: `${env.KUBERNETES_POD_DNS_NDOTS}` }], + }, + } + : {}), }; }