diff --git a/.claude/agents/security-reviewer.md b/.claude/agents/security-reviewer.md index a5625045..6ae10889 100644 --- a/.claude/agents/security-reviewer.md +++ b/.claude/agents/security-reviewer.md @@ -4,7 +4,7 @@ Apply these rules from CLAUDE.md exactly: **Safe File Operations**: Use safeDelete()/safeDeleteSync() from @socketsecurity/lib/fs. NEVER fs.rm(), fs.rmSync(), or rm -rf. Use os.tmpdir() + fs.mkdtemp() for temp dirs. NEVER use fetch() — use httpJson/httpText/httpRequest from @socketsecurity/lib/http-request. -**Absolute Rules**: NEVER use npx, pnpm dlx, or yarn dlx. Use pnpm exec or pnpm run with pinned devDeps. +**Absolute Rules**: NEVER use npx, pnpm dlx, or yarn dlx. Use pnpm exec or pnpm run with pinned devDeps. # zizmor: documentation-prohibition **Work Safeguards**: Scripts modifying multiple files must have backup/rollback. Git operations that rewrite history require explicit confirmation. @@ -12,7 +12,7 @@ Apply these rules from CLAUDE.md exactly: 1. **Secrets**: Hardcoded API keys, passwords, tokens, private keys in code or config 2. **Injection**: Command injection via shell: true or string interpolation in spawn/exec. Path traversal in file operations. -3. **Dependencies**: npx/dlx usage. Unpinned versions (^ or ~). Missing minimumReleaseAge bypass justification. +3. **Dependencies**: npx/dlx usage. Unpinned versions (^ or ~). Missing minimumReleaseAge bypass justification. # zizmor: documentation-checklist 4. **File operations**: fs.rm without safeDelete. process.chdir usage. fetch() usage (must use lib's httpRequest). 5. **GitHub Actions**: Unpinned action versions (must use full SHA). Secrets outside env blocks. Template injection from untrusted inputs. 6. **Error handling**: Sensitive data in error messages. Stack traces exposed to users. diff --git a/.claude/hooks/check-new-deps/README.md b/.claude/hooks/check-new-deps/README.md index 25fb1128..5be7f3a6 100644 --- a/.claude/hooks/check-new-deps/README.md +++ b/.claude/hooks/check-new-deps/README.md @@ -8,9 +8,10 @@ When Claude edits a file like `package.json`, `requirements.txt`, `Cargo.toml`, 1. **Detects the file type** and extracts dependency names from the content 2. **Diffs against the old content** (for edits) so only *newly added* deps are checked -3. **Queries the Socket.dev API** to check for malware -4. **Blocks the edit** (exit code 2) if malware is detected -5. **Allows** (exit code 0) if everything is clean or the file isn't a manifest +3. **Queries the Socket.dev API** to check for malware and critical security alerts +4. **Blocks the edit** (exit code 2) if malware or critical alerts are found +5. **Warns** (but allows) if a package has a low quality score +6. **Allows** (exit code 0) if everything is clean or the file isn't a manifest ## How it works @@ -29,8 +30,11 @@ Build Package URLs (PURLs) for each dep │ ▼ Call sdk.checkMalware(components) + - ≤5 deps: parallel firewall API (fast, full data) + - >5 deps: batch PURL API (efficient) │ - ├── Malware detected → EXIT 2 (blocked) + ├── Malware/critical alert → EXIT 2 (blocked) + ├── Low score → warn, EXIT 0 (allowed) └── Clean → EXIT 0 (allowed) ``` diff --git a/.claude/hooks/check-new-deps/index.mts b/.claude/hooks/check-new-deps/index.mts index 0118aa87..180037e6 100644 --- a/.claude/hooks/check-new-deps/index.mts +++ b/.claude/hooks/check-new-deps/index.mts @@ -86,6 +86,7 @@ interface CheckResult { reason?: string } + // A cached API lookup result with expiration timestamp. interface CacheEntry { result: CheckResult | undefined @@ -159,23 +160,46 @@ const extractors: Record = { (m): Dep => ({ type: 'cargo', name: m[1] }) ), 'Cargo.toml': (content: string): Dep[] => { - // Rust: only extract from [dependencies], [dev-dependencies], [build-dependencies] sections. - // Skip [package], [lib], [bin], [workspace], [profile] metadata sections. + // Rust: extract crate names from dep lines. + // + // Two-mode strategy because the hook receives either a full + // Cargo.toml (Write) or a fragment (Edit's new_string, often just + // the added line with no section header): + // + // Full file — scan only [dependencies] / [dev-dependencies] / + // [build-dependencies] (incl. target-specific + // [target.*.dependencies] via the `.` suffix) + // and skip [package], [features], [profile], etc. + // Fragment — no section headers at all → treat the whole + // content as an implicit [dependencies] body and + // match any `name = "..."` or `name = { version = "..." }`. + // + // The lineRe requires the value to look like a version spec + // (string or table with a `version` key), so `[features]`-style + // `key = ["derive"]` array values don't match even in fragment mode. const deps: Dep[] = [] - const depSectionRe = /^\[(?:(?:dev-|build-)?dependencies(?:\.[^\]]+)?)\]\s*$/gm + const depSectionRe = /^\[(?:(?:dev-|build-)?dependencies(?:\.[^\]]+)?|target\.[^\]]+\.(?:dev-|build-)?dependencies(?:\.[^\]]+)?)\]\s*$/gm const anySectionRe = /^\[/gm + const lineRe = /^(\w[\w-]*)\s*=\s*(?:\{[^}]*version\s*=\s*"[^"]*"|\s*"[^"]*")/gm + const push = (section: string) => { + let m + while ((m = lineRe.exec(section)) !== null) { + deps.push({ type: 'cargo', name: m[1] }) + } + lineRe.lastIndex = 0 + } + const hasAnySection = /^\[/m.test(content) + if (!hasAnySection) { + push(content) + return deps + } let sectionMatch while ((sectionMatch = depSectionRe.exec(content)) !== null) { const sectionStart = sectionMatch.index + sectionMatch[0].length anySectionRe.lastIndex = sectionStart const nextSection = anySectionRe.exec(content) const sectionEnd = nextSection ? nextSection.index : content.length - const sectionText = content.slice(sectionStart, sectionEnd) - const lineRe = /^(\w[\w-]*)\s*=\s*(?:\{[^}]*version\s*=\s*"[^"]*"|\s*"[^"]*")/gm - let m - while ((m = lineRe.exec(sectionText)) !== null) { - deps.push({ type: 'cargo', name: m[1] }) - } + push(content.slice(sectionStart, sectionEnd)) } return deps }, @@ -280,21 +304,6 @@ const extractors: Record = { 'yarn.lock': extractNpmLockfile, } -// --- main (only when executed directly, not imported) --- - -if (fileURLToPath(import.meta.url) === path.resolve(process.argv[1])) { - // Read the full JSON blob from stdin (piped by Claude Code). - let input = '' - for await (const chunk of process.stdin) input += chunk - const hook: HookInput = JSON.parse(input) - - if (hook.tool_name !== 'Edit' && hook.tool_name !== 'Write') { - process.exitCode = 0 - } else { - process.exitCode = await check(hook) - } -} - // --- core --- // Orchestrates the full check: extract deps, diff against old, query API. @@ -728,3 +737,26 @@ export { extractTerraform, findExtractor, } + +// --- main (only when executed directly, not imported) --- +// +// Kept at the bottom because the module uses top-level await +// (`for await (const chunk of process.stdin)`) to read the hook payload. +// Top-level await suspends module evaluation at the suspension point, so +// any `const` declared AFTER the suspending block is still in the TDZ +// when the awaited work calls back into the module (e.g. extractNpm → +// PACKAGE_JSON_METADATA_KEYS). Placing main last guarantees every +// module-level declaration is initialized before main runs. + +if (fileURLToPath(import.meta.url) === path.resolve(process.argv[1])) { + // Read the full JSON blob from stdin (piped by Claude Code). + let input = '' + for await (const chunk of process.stdin) input += chunk + const hook: HookInput = JSON.parse(input) + + if (hook.tool_name !== 'Edit' && hook.tool_name !== 'Write') { + process.exitCode = 0 + } else { + process.exitCode = await check(hook) + } +} diff --git a/.claude/hooks/check-new-deps/package.json b/.claude/hooks/check-new-deps/package.json index 1da79119..4d8496b9 100644 --- a/.claude/hooks/check-new-deps/package.json +++ b/.claude/hooks/check-new-deps/package.json @@ -1,5 +1,5 @@ { - "name": "@socketsecurity/hook-check-new-deps", + "name": "hook-check-new-deps", "private": true, "type": "module", "main": "./index.mts", @@ -11,7 +11,7 @@ }, "dependencies": { "@socketregistry/packageurl-js": "1.4.2", - "@socketsecurity/lib": "5.21.0", + "@socketsecurity/lib": "5.24.0", "@socketsecurity/sdk": "4.0.1" }, "devDependencies": { diff --git a/.claude/hooks/path-guard/README.md b/.claude/hooks/path-guard/README.md new file mode 100644 index 00000000..523a31b4 --- /dev/null +++ b/.claude/hooks/path-guard/README.md @@ -0,0 +1,66 @@ +# path-guard + +Claude Code `PreToolUse` hook that refuses `Edit`/`Write` tool calls that would *construct* a multi-segment build/output path inline in a `.mts` or `.cts` file. Mandatory across the Socket fleet — every repo ships this file byte-for-byte via `scripts/sync-scaffolding.mjs`. + +**Mantra: 1 path, 1 reference.** + +Construct a path *once* in the canonical `paths.mts` (or a build-infra helper); reference the computed value everywhere else. + +## What it blocks + +| Rule | Example | Fix | +|------|---------|-----| +| **A** — Multi-stage path constructed inline | `path.join(PKG, 'build', mode, 'out', 'Final', name)` | Construct in the package's `scripts/paths.mts` (or use `getFinalBinaryPath` from `build-infra/lib/paths`); import the computed value here | +| **B** — Cross-package path traversal | `path.join(PKG, '..', 'lief-builder', 'build', ...)` | Add `lief-builder: workspace:*` as a dep; import its `paths.mts` via the workspace `exports` field | + +The hook fires on `Edit` and `Write` tool calls when the target path ends in `.mts` or `.cts`. Other extensions (`.ts`, `.mjs`, `.js`, `.yml`, `.json`, `.md`) pass through — TS path code lives in `.mts` per CLAUDE.md, and other file types are covered by the `scripts/check-paths.mts` gate at commit time. + +## What it allows + +- Edits to a `paths.mts` (canonical constructor — every package's source of truth). +- Edits to `scripts/check-paths.mts` (the gate, which legitimately enumerates patterns). +- Edits to this hook's own files (the test suite has to enumerate the same patterns). +- Edits to `scripts/check-consistency.mts` (existing path-scanning gate). +- `path.join` calls with a single stage segment (e.g. `path.join(packageRoot, 'build', 'temp')`) — that's a one-off helper path, not a multi-stage build output. +- `path.join` calls with no stage segments at all (most general-purpose joins). +- Any string concatenation that doesn't go through `path.join` — the hook is regex-based and intentionally narrow; the gate runs a deeper scan at commit time. + +## Stage segments the hook recognizes + +These come from `build-infra/lib/constants.mts` `BUILD_STAGES` plus the lowercase directory-name siblings used by some builders: + +`Final`, `Release`, `Stripped`, `Compressed`, `Optimized`, `Synced`, `wasm`, `downloaded` + +Two or more in the same `path.join` call (or one stage + one of `'build'`/`'out'` + one mode `'dev'`/`'prod'`) triggers Rule A. + +## Known sibling packages (for Rule B) + +The hook recognizes Rule B traversals only when the next segment after `..` is a known fleet package name: + +`binflate`, `binject`, `binpress`, `bin-infra`, `build-infra`, `codet5-models-builder`, `curl-builder`, `iocraft-builder`, `ink-builder`, `libpq-builder`, `lief-builder`, `minilm-builder`, `models`, `napi-go`, `node-smol-builder`, `onnxruntime-builder`, `opentui-builder`, `stubs-builder`, `ultraviolet-builder`, `yoga-layout-builder` + +When a new package joins the workspace, add it here. + +## Control flow + +The hook reads the tool-use payload from stdin, type-checks `tool_name === 'Edit'` or `'Write'`, filters to `.mts`/`.cts` files, and runs `check(source)`. Any rule violation `throw`s a typed `BlockError`; a single top-level `try/catch` in `main()` writes the block message to stderr and sets `process.exitCode = 2`. + +Hook bugs fail **open** — a crash in the hook writes a log line and returns exit 0 so legitimate work isn't blocked on a bad deploy. The companion `scripts/check-paths.mts` gate runs a thorough whole-repo scan at `pnpm check` time, catching anything the hook misses. + +## Testing + +```bash +pnpm --filter hook-path-guard test +``` + +Adding a new detection pattern: update `STAGE_SEGMENTS` (or `KNOWN_SIBLING_PACKAGES`) in `index.mts`, add a positive and negative test in `test/path-guard.test.mts`. + +## Updating across the fleet + +This file is in `IDENTICAL_FILES` in `scripts/sync-scaffolding.mjs` (in `socket-repo-template`). After editing, run from `socket-repo-template`: + +```bash +node scripts/sync-scaffolding.mjs --all --fix +``` + +to propagate the change to every fleet repo. diff --git a/.claude/hooks/path-guard/index.mts b/.claude/hooks/path-guard/index.mts new file mode 100644 index 00000000..ced9fcfc --- /dev/null +++ b/.claude/hooks/path-guard/index.mts @@ -0,0 +1,339 @@ +#!/usr/bin/env node +// Claude Code PreToolUse hook — path-guard firewall. +// +// Mantra: 1 path, 1 reference. +// +// Blocks Edit/Write tool calls that would *construct* a multi-segment +// build/output path inline in a `.mts` or `.cts` file, instead of +// importing the constructed value from the canonical `paths.mts` (or a +// build-infra helper). This fires BEFORE the write lands; exit code 2 +// makes Claude Code refuse the tool call so the diff never touches the +// repo. The model sees the rejection reason on stderr and retries with +// an import-based approach. +// +// What the hook checks (subset of the gate's rules — diff-local only): +// +// Rule A — Multi-stage path construction: a `path.join(...)` call or +// string-template that stitches together two or more "stage" segments +// like `'Final'`, `'Release'`, `'Stripped'`, `'Compressed'`, +// `'Optimized'`, `'Synced'`, `'wasm'`, `'downloaded'` together with +// `'build'` / `'out'` / a mode (`'dev'`/`'prod'`) or platform-arch. +// Outside a `paths.mts` file, this is always a violation: the +// construction belongs in a helper, every consumer imports the +// computed value. +// +// Rule B — Cross-package traversal: `path.join(*, '..', '', 'build', ...)` reaches into a sibling's build output +// without going through its `exports`. Forces consumers to declare a +// workspace dep and import the sibling's `paths.mts`. The R28 yoga/ +// ink bug — ink hand-building yoga's wasm path and missing the +// `wasm/` segment — is exactly the failure mode this prevents. +// +// What the hook does NOT check (the gate handles repo-wide concerns): +// +// Rule C — workflow YAML repetition (gate scans .yml files). +// Rule D — comment-encoded paths (gate scans comments + JSDoc). +// Rule F — same path reconstructed in multiple files (needs whole- +// repo state). +// Rule G — Makefile / Dockerfile / shell-script paths (different +// tool, gate covers). +// +// Scope: +// +// - Fires only on `Edit` and `Write` tool calls. +// - Skips files NOT ending in `.mts` or `.cts`. TS path code lives +// there; .ts/.mjs/.js sources in `additions/` have different +// constraints per CLAUDE.md. +// - Skips when the target itself is a `paths.mts` (canonical +// constructor), the gate (`scripts/check-paths.mts`), or this hook +// — those files legitimately enumerate stage segments. +// +// Control flow uses a `BlockError` thrown from check helpers so every +// short-circuit path goes through a single `process.exitCode = 2` drop +// at the top-level catch — no scattered `process.exit(2)` that can race +// with buffered stderr. The hook fails OPEN on its own bugs (exit 0 + +// log) so a bad deploy of the hook can't brick the session. + +import process from 'node:process' + +import { + BUILD_ROOT_SEGMENTS, + KNOWN_SIBLING_PACKAGES, + MODE_SEGMENTS, + STAGE_SEGMENTS, +} from './segments.mts' + +// File-path patterns that are exempt from the hook entirely. Edits to +// these files legitimately need to enumerate path segments. +const EXEMPT_FILE_PATTERNS: RegExp[] = [ + // Any paths.mts is the canonical constructor. + /(^|\/)paths\.(mts|cts)$/, + // The gate itself and this hook — both enumerate the patterns to + // detect them. + /scripts\/check-paths\.mts$/, + /\.claude\/hooks\/path-guard\/index\.(mts|cts)$/, + /\.claude\/hooks\/path-guard\/test\//, + // Existing path-scanning gates that intentionally enumerate. + /scripts\/check-consistency\.mts$/, +] + +class BlockError extends Error { + public readonly rule: string + public readonly suggestion: string + public readonly snippet: string + constructor(rule: string, suggestion: string, snippet: string) { + super(rule) + this.name = 'BlockError' + this.rule = rule + this.suggestion = suggestion + this.snippet = snippet.slice(0, 240) + (snippet.length > 240 ? '…' : '') + } +} + +const stdin = (): Promise => + new Promise(resolve => { + let buf = '' + process.stdin.setEncoding('utf8') + process.stdin.on('data', chunk => (buf += chunk)) + process.stdin.on('end', () => resolve(buf)) + }) + +type ToolInput = { + tool_name?: string + tool_input?: { + file_path?: string + new_string?: string + content?: string + } +} + +const isInScope = (filePath: string): boolean => { + if (!filePath) { + return false + } + // Only inspect TypeScript-Module / CommonJS-Module sources. Per + // the user's directive, allowlist by extension. + if (!filePath.endsWith('.mts') && !filePath.endsWith('.cts')) { + return false + } + return !EXEMPT_FILE_PATTERNS.some(re => re.test(filePath)) +} + +// Extract every `path.join(...)` and `path.resolve(...)` call from +// the diff and return its argument substring. Uses paren-balancing so +// deeply nested arguments like `path.join(getDir(child(x)), 'Final')` +// are captured correctly — a regex-only approach silently missed any +// argument with 2+ levels of nested parentheses. +const extractPathCalls = ( + source: string, +): Array<{ snippet: string; literals: string[] }> => { + const calls: Array<{ snippet: string; literals: string[] }> = [] + const callRe = /\bpath\.(?:join|resolve)\s*\(/g + let m: RegExpExecArray | null + while ((m = callRe.exec(source)) !== null) { + const callStart = m.index + const argsStart = callRe.lastIndex + let depth = 1 + let i = argsStart + let inString: '"' | "'" | '`' | null = null + while (i < source.length && depth > 0) { + const ch = source[i]! + if (inString) { + if (ch === '\\') { + i += 2 + continue + } + if (ch === inString) { + inString = null + } + } else { + if (ch === '"' || ch === "'" || ch === '`') { + inString = ch + } else if (ch === '(') { + depth += 1 + } else if (ch === ')') { + depth -= 1 + if (depth === 0) { + break + } + } + } + i += 1 + } + if (depth !== 0) { + continue + } + const args = source.slice(argsStart, i) + const litRe = /(['"])((?:\\.|(?!\1)[^\\])*)\1/g + const literals: string[] = [] + let lit: RegExpExecArray | null + while ((lit = litRe.exec(args)) !== null) { + const value = lit[2] + if (value !== undefined) { + literals.push(value) + } + } + calls.push({ snippet: source.slice(callStart, i + 1), literals }) + callRe.lastIndex = i + 1 + } + return calls +} + +const checkRuleA = (calls: ReturnType): void => { + for (const call of calls) { + const stages = call.literals.filter(l => STAGE_SEGMENTS.has(l)) + const buildRoots = call.literals.filter(l => BUILD_ROOT_SEGMENTS.has(l)) + const modes = call.literals.filter(l => MODE_SEGMENTS.has(l)) + // Trigger if: 2+ stage segments OR (1 stage + 1 build-root + 1 mode). + // Both shapes indicate a hand-built build-output path. + const twoStages = stages.length >= 2 + const stagePlusContext = + stages.length >= 1 && buildRoots.length >= 1 && modes.length >= 1 + if (twoStages || stagePlusContext) { + throw new BlockError( + 'A — multi-stage path constructed inline', + 'Construct this path in the owning `paths.mts` (or a build-infra helper like `getFinalBinaryPath`) and import the computed value here. 1 path, 1 reference.', + call.snippet, + ) + } + } +} + +const checkRuleB = (calls: ReturnType): void => { + for (const call of calls) { + // A sibling package name *immediately after* a `..` literal (no + // path segment in between) plus build context elsewhere in the + // call indicates cross-package traversal. The previous "sticky + // sawDotDot" form fired falsely when '..' appeared early and an + // unrelated sibling-named segment appeared much later. + const hasBuildContext = call.literals.some( + l => BUILD_ROOT_SEGMENTS.has(l) || STAGE_SEGMENTS.has(l), + ) + if (!hasBuildContext) { + continue + } + for (let i = 0; i < call.literals.length - 1; i++) { + if ( + call.literals[i] === '..' && + KNOWN_SIBLING_PACKAGES.has(call.literals[i + 1]!) + ) { + const sibling = call.literals[i + 1]! + throw new BlockError( + 'B — cross-package path traversal', + `Don't reach into '${sibling}'s build output via \`..\`. Add \`${sibling}: workspace:*\` as a dep and import its \`paths.mts\` via the \`exports\` field. 1 path, 1 reference.`, + call.snippet, + ) + } + } + } +} + +// Backtick template-literal detection. Path construction via +// `${buildDir}/out/Final/${binary}` follows the same shape as +// path.join() and constitutes the same Rule A violation. Placeholders +// (${...}) are stripped to a sentinel that won't match any segment +// set, so segments composed entirely of interpolation contribute +// nothing to the trigger. +const TEMPLATE_LITERAL_RE = /`((?:\\.|(?:\$\{(?:[^{}]|\{[^{}]*\})*\})|(?!`)[^\\])*)`/g + +const checkRuleATemplate = (source: string): void => { + TEMPLATE_LITERAL_RE.lastIndex = 0 + let m: RegExpExecArray | null + while ((m = TEMPLATE_LITERAL_RE.exec(source)) !== null) { + const body = m[1] ?? '' + if (!body.includes('/')) { + continue + } + const stripped = body.replace(/\$\{(?:[^{}]|\{[^{}]*\})*\}/g, '\x00') + const segments = stripped + .split('/') + .filter(s => s.length > 0 && s !== '\x00') + const stages = segments.filter(s => STAGE_SEGMENTS.has(s)) + const buildRoots = segments.filter(s => BUILD_ROOT_SEGMENTS.has(s)) + const modes = segments.filter(s => MODE_SEGMENTS.has(s)) + // Template literal trigger is tighter than path.join() because + // backtick strings often appear in patch fixtures, error messages, + // and other multi-line content that incidentally contains stage + // tokens like `wasm`. Require the canonical build-output shape. + const hasBuildAndOut = + buildRoots.includes('build') && buildRoots.includes('out') + const hasOut = buildRoots.includes('out') + const hasBuild = buildRoots.includes('build') + const triggers = + (hasBuildAndOut && stages.length >= 1) || + (stages.length >= 2 && hasOut) || + (hasBuild && stages.length >= 1 && modes.length >= 1) + if (triggers) { + throw new BlockError( + 'A — multi-stage path constructed inline via template literal', + 'Construct this path in the owning `paths.mts` (or a build-infra helper) and import the computed value here. 1 path, 1 reference.', + m[0], + ) + } + } +} + +const check = (source: string): void => { + const calls = extractPathCalls(source) + if (calls.length > 0) { + checkRuleA(calls) + checkRuleB(calls) + } + checkRuleATemplate(source) +} + +const emitBlock = (filePath: string, err: BlockError): void => { + process.stderr.write( + `\n[path-guard] Blocked: ${err.rule}\n` + + ` Mantra: 1 path, 1 reference\n` + + ` File: ${filePath}\n` + + ` Snippet: ${err.snippet}\n` + + ` Fix: ${err.suggestion}\n\n`, + ) +} + +const main = async (): Promise => { + const raw = await stdin() + if (!raw) { + return + } + let payload: ToolInput + try { + payload = JSON.parse(raw) as ToolInput + } catch { + return + } + if (payload.tool_name !== 'Edit' && payload.tool_name !== 'Write') { + return + } + const filePath = payload.tool_input?.file_path ?? '' + if (!isInScope(filePath)) { + return + } + // Edit tool sends `new_string` (the replacement); Write sends + // `content` (the full file). Either is the text we'd be putting on + // disk. + const source = + payload.tool_input?.new_string ?? payload.tool_input?.content ?? '' + if (!source) { + return + } + + try { + check(source) + } catch (e) { + if (e instanceof BlockError) { + emitBlock(filePath, e) + process.exitCode = 2 + return + } + throw e + } +} + +main().catch(e => { + // Never block a tool call due to a bug in the hook itself. Log it + // so we notice, but fail open. + process.stderr.write(`[path-guard] hook error (allowing): ${e}\n`) + process.exitCode = 0 +}) diff --git a/.claude/hooks/path-guard/package.json b/.claude/hooks/path-guard/package.json new file mode 100644 index 00000000..a7cb5039 --- /dev/null +++ b/.claude/hooks/path-guard/package.json @@ -0,0 +1,12 @@ +{ + "name": "hook-path-guard", + "private": true, + "type": "module", + "main": "./index.mts", + "exports": { + ".": "./index.mts" + }, + "scripts": { + "test": "node --test test/*.test.mts" + } +} diff --git a/.claude/hooks/path-guard/segments.mts b/.claude/hooks/path-guard/segments.mts new file mode 100644 index 00000000..891d0b8b --- /dev/null +++ b/.claude/hooks/path-guard/segments.mts @@ -0,0 +1,80 @@ +// Canonical path-segment vocabulary shared by the path-guard hook +// (.claude/hooks/path-guard/index.mts) and gate (scripts/check-paths.mts). +// +// Mantra: 1 path, 1 reference. This module is the *one* place stage, +// build-root, mode, and sibling-package vocabulary is defined. Both +// consumers import from here so they can never drift apart. +// +// Synced byte-identically across the Socket fleet via +// socket-repo-template/scripts/sync-scaffolding.mjs (IDENTICAL_FILES). +// When adding a new stage/build-root/mode/sibling, edit this file in +// the template and re-sync. + +// "Stage" segments — Rule A core. Two of these spread via `path.join` +// or interpolated into a template literal is a finding outside a +// canonical `paths.mts`. Sourced from build-infra/lib/constants.mts +// `BUILD_STAGES` plus their lowercase directory-name siblings used by +// some builders. +export const STAGE_SEGMENTS = new Set([ + 'Compressed', + 'downloaded', + 'Final', + 'Optimized', + 'Release', + 'Stripped', + 'Synced', + 'wasm', +]) + +// "Build-root" segments — at least one must be present together with +// a stage segment to confirm we're constructing a build output path +// rather than something coincidental. Example: a join that yields +// `//` doesn't fire if no build-root segment is +// present; `/build//out/` does. +export const BUILD_ROOT_SEGMENTS = new Set(['build', 'out']) + +// Build-mode segments — a stage segment plus one of these is also a +// finding (`build///out/` is the canonical shape). +export const MODE_SEGMENTS = new Set(['dev', 'prod', 'shared']) + +// Sibling fleet packages (Rule B). Union of all packages across the +// Socket fleet — the gate is byte-identical via sync-scaffolding, so +// listing every fleet package keeps Rule B firing in any repo. When a +// new package joins the workspace, add it here and propagate via +// `node scripts/sync-scaffolding.mjs --all --fix` from +// socket-repo-template. +export const KNOWN_SIBLING_PACKAGES = new Set([ + // socket-btm + 'bin-infra', + 'binflate', + 'binject', + 'binpress', + 'build-infra', + 'codet5-models-builder', + 'curl-builder', + 'ink-builder', + 'iocraft-builder', + 'libpq-builder', + 'lief-builder', + 'minilm-builder', + 'models', + 'napi-go', + 'node-smol-builder', + 'onnxruntime-builder', + 'opentui-builder', + 'stubs-builder', + 'ultraviolet-builder', + 'yoga-layout-builder', + // socket-cli + 'cli', + 'package-builder', + // socket-tui + 'core', + 'react', + 'renderer', + 'ultraviolet', + 'yoga', + // socket-registry / ultrathink + 'acorn', + 'npm', +]) diff --git a/.claude/hooks/path-guard/test/path-guard.test.mts b/.claude/hooks/path-guard/test/path-guard.test.mts new file mode 100644 index 00000000..a3c3a132 --- /dev/null +++ b/.claude/hooks/path-guard/test/path-guard.test.mts @@ -0,0 +1,378 @@ +// Tests for the path-guard hook. Each `node:test` block writes a +// mock PreToolUse payload to the hook's stdin and asserts on its exit +// code + stderr. Exit 2 = blocked; exit 0 = allowed. +// +// Run: pnpm --filter hook-path-guard test +// (or directly: node --test test/*.test.mts) + +import { spawnSync } from 'node:child_process' +import path from 'node:path' +import process from 'node:process' +import { fileURLToPath } from 'node:url' + +import { describe, it } from 'node:test' +import assert from 'node:assert/strict' + +const __filename = fileURLToPath(import.meta.url) +const __dirname = path.dirname(__filename) +const HOOK = path.resolve(__dirname, '..', 'index.mts') + +const runHook = ( + toolName: string, + filePath: string, + source: string, +): { code: number; stderr: string } => { + const payload = JSON.stringify({ + tool_name: toolName, + tool_input: + toolName === 'Edit' + ? { file_path: filePath, new_string: source } + : { file_path: filePath, content: source }, + }) + const result = spawnSync(process.execPath, [HOOK], { + encoding: 'utf8', + input: payload, + }) + return { + code: result.status ?? -1, + stderr: result.stderr, + } +} + +describe('path-guard — Rule A (multi-stage construction)', () => { + it('blocks two stage segments in path.join', () => { + const source = ` + const p = path.join(PACKAGE_ROOT, 'wasm', 'out', 'Final', 'bin') + ` + const { code, stderr } = runHook( + 'Write', + 'packages/foo/scripts/build.mts', + source, + ) + assert.equal(code, 2) + assert.match(stderr, /Blocked: A/) + assert.match(stderr, /1 path, 1 reference/) + }) + + it('blocks build + mode + stage', () => { + const source = ` + const p = path.join(PKG, 'build', 'dev', 'out', 'Final', 'binary') + ` + const { code } = runHook( + 'Edit', + 'packages/foo/scripts/build.mts', + source, + ) + assert.equal(code, 2) + }) + + it('blocks Release + Stripped together', () => { + const source = ` + const p = path.join(buildDir, 'Release', 'Stripped') + ` + const { code } = runHook( + 'Write', + 'packages/foo/scripts/release.mts', + source, + ) + assert.equal(code, 2) + }) + + it('allows single stage segment with one build root', () => { + // 'build' + 'temp' → no stage segment at all → pass + const source = ` + const tmp = path.join(packageRoot, 'build', 'temp') + ` + const { code } = runHook( + 'Write', + 'packages/foo/scripts/build.mts', + source, + ) + assert.equal(code, 0) + }) + + it('allows path.join with no stage segments', () => { + const source = ` + const cfg = path.join(packageRoot, 'config', 'settings.json') + ` + const { code } = runHook( + 'Write', + 'packages/foo/scripts/build.mts', + source, + ) + assert.equal(code, 0) + }) +}) + +describe('path-guard — Rule B (cross-package traversal)', () => { + it('blocks .. + sibling package + build context', () => { + const source = ` + const lief = path.join(PKG, '..', 'lief-builder', 'build', 'Final') + ` + const { code, stderr } = runHook( + 'Write', + 'packages/binject/scripts/build.mts', + source, + ) + assert.equal(code, 2) + assert.match(stderr, /Blocked: B/) + assert.match(stderr, /lief-builder/) + }) + + it('allows .. + sibling without build context', () => { + // Reaching into a sibling for a non-build asset is allowed; the + // gate may still flag it but the hook is scoped to build paths. + const source = ` + const cfg = path.join(PKG, '..', 'lief-builder', 'config.json') + ` + const { code } = runHook( + 'Write', + 'packages/binject/scripts/build.mts', + source, + ) + assert.equal(code, 0) + }) + + it('does not fire on traversal to unknown directory', () => { + const source = ` + const x = path.join(PKG, '..', 'fixtures', 'build', 'Final') + ` + const { code } = runHook( + 'Write', + 'packages/foo/test/test.mts', + source, + ) + assert.equal(code, 0) + }) + + it('does not fire when .. and sibling are non-adjacent (regression)', () => { + // Earlier regex ran with sticky sawDotDot — once it saw `..` it + // would flag any later sibling-named segment. The fix requires + // the sibling to appear *immediately* after `..`. + const source = ` + const x = path.join(PKG, '..', 'cache', 'lief-builder', 'config.json') + ` + const { code } = runHook( + 'Write', + 'packages/foo/scripts/build.mts', + source, + ) + assert.equal(code, 0) + }) +}) + +describe('path-guard — paren-balance correctness', () => { + it('detects A through nested function-call args (regression)', () => { + // Old regex used \\([^()]*\\) which only handled one nesting + // level — `path.join(getDir(child(x)), 'build', 'dev', 'Final')` + // silently slipped through. The paren-balancing scanner catches it. + const source = ` + const p = path.join(getDir(child(x)), 'build', 'dev', 'out', 'Final') + ` + const { code } = runHook( + 'Write', + 'packages/foo/scripts/build.mts', + source, + ) + assert.equal(code, 2) + }) + + it('detects A in path.resolve() too', () => { + const source = ` + const p = path.resolve(PKG, 'build', 'dev', 'out', 'Final', 'bin') + ` + const { code } = runHook( + 'Write', + 'packages/foo/scripts/build.mts', + source, + ) + assert.equal(code, 2) + }) +}) + +describe('path-guard — template literals', () => { + it('detects A in fully-literal template path', () => { + const source = '\n const p = `build/dev/out/Final/binary`\n ' + const { code } = runHook( + 'Write', + 'packages/foo/scripts/build.mts', + source, + ) + assert.equal(code, 2) + }) + + it('detects A in template with placeholders', () => { + const source = + '\n const p = `${PKG}/build/${mode}/${arch}/out/Final/${name}`\n ' + const { code } = runHook( + 'Write', + 'packages/foo/scripts/build.mts', + source, + ) + assert.equal(code, 2) + }) + + it('allows template with single non-stage segment', () => { + const source = '\n const url = `https://example.com/path`\n ' + const { code } = runHook( + 'Write', + 'packages/foo/scripts/build.mts', + source, + ) + assert.equal(code, 0) + }) + + it('allows template with no stage segments', () => { + const source = '\n const tmp = `${packageRoot}/build/temp/cache`\n ' + const { code } = runHook( + 'Write', + 'packages/foo/scripts/build.mts', + source, + ) + assert.equal(code, 0) + }) + + it('allows template that is purely interpolation', () => { + // `${a}/${b}/${c}` has no literal stage segments. + const source = '\n const p = `${a}/${b}/${c}`\n ' + const { code } = runHook( + 'Write', + 'packages/foo/scripts/build.mts', + source, + ) + assert.equal(code, 0) + }) +}) + +describe('path-guard — file-type filter', () => { + it('skips .ts files', () => { + const source = ` + const p = path.join(PKG, 'build', 'dev', 'out', 'Final', 'bin') + ` + const { code } = runHook('Write', 'packages/foo/src/index.ts', source) + assert.equal(code, 0) + }) + + it('skips .mjs files', () => { + const source = ` + const p = path.join(PKG, 'build', 'dev', 'out', 'Final', 'bin') + ` + const { code } = runHook('Write', 'additions/foo.mjs', source) + assert.equal(code, 0) + }) + + it('skips .yml files', () => { + const source = ` + run: | + FINAL="build/\${MODE}/\${ARCH}/out/Final" + ` + const { code } = runHook( + 'Write', + '.github/workflows/foo.yml', + source, + ) + assert.equal(code, 0) + }) + + it('inspects .mts files', () => { + const source = ` + const p = path.join(PKG, 'build', 'dev', 'out', 'Final', 'bin') + ` + const { code } = runHook( + 'Write', + 'packages/foo/scripts/build.mts', + source, + ) + assert.equal(code, 2) + }) + + it('inspects .cts files', () => { + const source = ` + const p = path.join(PKG, 'build', 'dev', 'out', 'Final', 'bin') + ` + const { code } = runHook( + 'Write', + 'packages/foo/scripts/build.cts', + source, + ) + assert.equal(code, 2) + }) +}) + +describe('path-guard — exempt files', () => { + it('allows edits to paths.mts', () => { + const source = ` + export const FINAL_DIR = path.join(PKG, 'build', 'dev', 'out', 'Final') + ` + const { code } = runHook( + 'Write', + 'packages/foo/scripts/paths.mts', + source, + ) + assert.equal(code, 0) + }) + + it('allows edits to check-paths.mts (the gate)', () => { + const source = ` + const PATTERNS = [path.join('build', 'Final', 'wasm')] + ` + const { code } = runHook('Write', 'scripts/check-paths.mts', source) + assert.equal(code, 0) + }) + + it('allows edits to the path-guard hook itself', () => { + const source = ` + const STAGES = ['Final', 'Release', 'Stripped'] + ` + const { code } = runHook( + 'Write', + '.claude/hooks/path-guard/index.mts', + source, + ) + assert.equal(code, 0) + }) + + it('allows edits to path-guard tests', () => { + const source = ` + const fixture = path.join('build', 'dev', 'out', 'Final') + ` + const { code } = runHook( + 'Write', + '.claude/hooks/path-guard/test/path-guard.test.mts', + source, + ) + assert.equal(code, 0) + }) +}) + +describe('path-guard — tool-name filter', () => { + it('skips Bash', () => { + const source = `path.join(PKG, 'build', 'dev', 'out', 'Final', 'bin')` + const { code } = runHook('Bash', '', source) + assert.equal(code, 0) + }) + + it('skips Read', () => { + const source = '' + const { code } = runHook('Read', 'packages/foo/scripts/build.mts', source) + assert.equal(code, 0) + }) +}) + +describe('path-guard — bug-tolerance (fails open)', () => { + it('passes through invalid JSON payload', () => { + const result = spawnSync(process.execPath, [HOOK], { + encoding: 'utf8', + input: 'not json at all', + }) + assert.equal(result.status, 0) + }) + + it('passes through empty stdin', () => { + const result = spawnSync(process.execPath, [HOOK], { + encoding: 'utf8', + input: '', + }) + assert.equal(result.status, 0) + }) +}) diff --git a/.claude/hooks/path-guard/tsconfig.json b/.claude/hooks/path-guard/tsconfig.json new file mode 100644 index 00000000..53c5c847 --- /dev/null +++ b/.claude/hooks/path-guard/tsconfig.json @@ -0,0 +1,15 @@ +{ + "compilerOptions": { + "declarationMap": false, + "erasableSyntaxOnly": true, + "module": "nodenext", + "moduleResolution": "nodenext", + "noEmit": true, + "rewriteRelativeImportExtensions": true, + "skipLibCheck": true, + "sourceMap": false, + "strict": true, + "target": "esnext", + "verbatimModuleSyntax": true + } +} diff --git a/.claude/hooks/private-name-guard/README.md b/.claude/hooks/private-name-guard/README.md new file mode 100644 index 00000000..a15d4412 --- /dev/null +++ b/.claude/hooks/private-name-guard/README.md @@ -0,0 +1,59 @@ +# private-name-guard + +`PreToolUse` hook that **never blocks**. On every `Bash` command that +would publish text to a public Git/GitHub surface, writes a short +reminder to stderr so the model re-reads the command with the rule +freshly in mind: + +> No private repos or internal project names in public surfaces. Omit +> the reference entirely — don't substitute a placeholder. The +> placeholder itself is a tell. + +Attention priming, not enforcement. The model is responsible for +applying the rule — the hook just ensures the rule is in the active +context at the moment the command is about to fire. + +Sibling to `public-surface-reminder`, which covers customer/company +names and internal work-item IDs. The two hooks compose: both fire on +the same public-surface commands, each priming a distinct slice of the +rule set. + +## What counts as "public surface" + +- `git commit` (including `--amend`) +- `git push` +- `gh pr (create|edit|comment|review)` +- `gh issue (create|edit|comment)` +- `gh api -X POST|PATCH|PUT` +- `gh release (create|edit)` + +Any other `Bash` command passes through silently. + +## Why no denylist + +Because a denylist is itself a leak. A file named `private-projects.txt` +that enumerates "these are our internal repos" is worse than no list at +all — anyone who finds it gets the org's full internal map for free. +Recognition happens at write time, every time, by the model reading the +text it's about to send. The hook just makes sure that read happens. + +## Wiring + +`.claude/settings.json`: + +```json +{ + "hooks": { + "PreToolUse": [ + { + "matcher": "Bash", + "hooks": [{ "type": "command", "command": "node .claude/hooks/private-name-guard/index.mts" }] + } + ] + } +} +``` + +## Exit code + +Always `0`. The hook never blocks; it only prints to stderr. diff --git a/.claude/hooks/private-name-guard/index.mts b/.claude/hooks/private-name-guard/index.mts new file mode 100644 index 00000000..fa8ea323 --- /dev/null +++ b/.claude/hooks/private-name-guard/index.mts @@ -0,0 +1,89 @@ +#!/usr/bin/env node +// Claude Code PreToolUse hook — private-name guard. +// +// Never blocks. On every Bash command that would publish text to a public +// Git/GitHub surface (git commit, git push, gh pr/issue/api/release write), +// writes a short reminder to stderr so the model re-reads the command with +// the rule freshly in mind: +// +// No private repos or internal project names in public surfaces. +// Omit the reference entirely — don't substitute a placeholder. +// +// Exit code is always 0. This is attention priming, not enforcement. The +// model is responsible for applying the rule — the hook just makes sure +// the rule is in the active context at the moment the command is about +// to fire. +// +// Deliberately carries no enumerated denylist. Recognition and replacement +// happen at write time, not via a list of names. A denylist is itself a +// leak — a file named `private-projects.txt` would be the very thing it +// tries to prevent. +// +// Reads a Claude Code PreToolUse JSON payload from stdin: +// { "tool_name": "Bash", "tool_input": { "command": "..." } } + +import { readFileSync } from 'node:fs' + +type ToolInput = { + tool_name?: string + tool_input?: { + command?: string + } +} + +// Commands that can publish content outside the local machine. +// Keep broad — better to remind on an extra read than miss a write. +const PUBLIC_SURFACE_PATTERNS: RegExp[] = [ + /\bgit\s+commit\b/, + /\bgit\s+push\b/, + /\bgh\s+pr\s+(create|edit|comment|review)\b/, + /\bgh\s+issue\s+(create|edit|comment)\b/, + /\bgh\s+api\b[^|]*-X\s*(POST|PATCH|PUT)\b/i, + /\bgh\s+release\s+(create|edit)\b/, +] + +function isPublicSurface(command: string): boolean { + const normalized = command.replace(/\s+/g, ' ') + return PUBLIC_SURFACE_PATTERNS.some(re => re.test(normalized)) +} + +function main(): void { + let raw = '' + try { + raw = readFileSync(0, 'utf8') + } catch { + return + } + + let input: ToolInput + try { + input = JSON.parse(raw) + } catch { + return + } + + if (input.tool_name !== 'Bash') { + return + } + const command = input.tool_input?.command + if (!command || typeof command !== 'string') { + return + } + if (!isPublicSurface(command)) { + return + } + + const lines = [ + '[private-name-guard] This command writes to a public Git/GitHub surface.', + ' • Re-read the commit message / PR body / comment BEFORE it sends.', + ' • No private repo names. No internal project codenames. No unreleased', + ' product names. No internal-only tooling repos absent from the public', + ' org page. No customer/partner names.', + ' • Omit the reference entirely. Do not substitute a placeholder — the', + ' placeholder itself is a tell.', + ' • If you spot one, cancel and rewrite the text first.', + ] + process.stderr.write(lines.join('\n') + '\n') +} + +main() diff --git a/.claude/hooks/private-name-guard/package.json b/.claude/hooks/private-name-guard/package.json new file mode 100644 index 00000000..64c972ff --- /dev/null +++ b/.claude/hooks/private-name-guard/package.json @@ -0,0 +1,12 @@ +{ + "name": "hook-private-name-guard", + "private": true, + "type": "module", + "main": "./index.mts", + "exports": { + ".": "./index.mts" + }, + "devDependencies": { + "@types/node": "24.9.2" + } +} diff --git a/.claude/hooks/public-surface-reminder/README.md b/.claude/hooks/public-surface-reminder/README.md new file mode 100644 index 00000000..4b66ee1e --- /dev/null +++ b/.claude/hooks/public-surface-reminder/README.md @@ -0,0 +1,36 @@ +# public-surface-reminder + +`PreToolUse` hook that **never blocks**. On every `Bash` command that would +publish text to a public Git/GitHub surface, writes a short reminder to +stderr so the model re-reads the command with the two rules freshly in +mind: + +1. **No real customer or company names.** Use `Acme Inc`. No exceptions. +2. **No internal work-item IDs or tracker URLs.** No `SOC-123` / + `ENG-456` / `ASK-789` / similar, no `linear.app` / `sentry.io` URLs. + +Attention priming, not enforcement. The model is responsible for actually +applying the rule — the hook just ensures the rule is in the active +context at the moment the command is about to fire. + +## What counts as "public surface" + +- `git commit` (including `--amend`) +- `git push` +- `gh pr (create|edit|comment|review)` +- `gh issue (create|edit|comment)` +- `gh api -X POST|PATCH|PUT` +- `gh release (create|edit)` + +Any other `Bash` command passes through silently. + +## Why no denylist + +Because a denylist is itself a customer leak. A file named +`customers.txt` that enumerates "these are our customers" is worse than +the bug it tries to prevent. Recognition and replacement happen at write +time, done by the model, every time. + +## Exit code + +Always `0`. The hook prints a reminder and steps aside. diff --git a/.claude/hooks/public-surface-reminder/index.mts b/.claude/hooks/public-surface-reminder/index.mts new file mode 100644 index 00000000..a3eaed7e --- /dev/null +++ b/.claude/hooks/public-surface-reminder/index.mts @@ -0,0 +1,85 @@ +#!/usr/bin/env node +// Claude Code PreToolUse hook — public-surface reminder. +// +// Never blocks. On every Bash command that would publish text to a public +// Git/GitHub surface (git commit, git push, gh pr/issue/api/release write), +// writes a short reminder to stderr so the model re-reads the command with +// the two rules freshly in mind: +// +// 1. No real customer/company names — ever. Use `Acme Inc` instead. +// 2. No internal work-item IDs or tracker URLs — no `SOC-123`, `ENG-456`, +// `ASK-789`, `linear.app`, `sentry.io`, etc. +// +// Exit code is always 0. This is attention priming, not enforcement. The +// model is responsible for actually applying the rule — the hook just makes +// sure the rule is in the active context at the moment the command is about +// to fire. +// +// Deliberately carries no list of customer names. Recognition and +// replacement happen at write time, not via enumeration. +// +// Reads a Claude Code PreToolUse JSON payload from stdin: +// { "tool_name": "Bash", "tool_input": { "command": "..." } } + +import { readFileSync } from 'node:fs' + +type ToolInput = { + tool_name?: string + tool_input?: { + command?: string + } +} + +// Commands that can publish content outside the local machine. +// Keep broad — better to remind on an extra read than miss a write. +const PUBLIC_SURFACE_PATTERNS: RegExp[] = [ + /\bgit\s+commit\b/, + /\bgit\s+push\b/, + /\bgh\s+pr\s+(create|edit|comment|review)\b/, + /\bgh\s+issue\s+(create|edit|comment)\b/, + /\bgh\s+api\b[^|]*-X\s*(POST|PATCH|PUT)\b/i, + /\bgh\s+release\s+(create|edit)\b/, +] + +function isPublicSurface(command: string): boolean { + const normalized = command.replace(/\s+/g, ' ') + return PUBLIC_SURFACE_PATTERNS.some(re => re.test(normalized)) +} + +function main(): void { + let raw = '' + try { + raw = readFileSync(0, 'utf8') + } catch { + return + } + + let input: ToolInput + try { + input = JSON.parse(raw) + } catch { + return + } + + if (input.tool_name !== 'Bash') { + return + } + const command = input.tool_input?.command + if (!command || typeof command !== 'string') { + return + } + if (!isPublicSurface(command)) { + return + } + + const lines = [ + '[public-surface-reminder] This command writes to a public Git/GitHub surface.', + ' • Re-read the commit message / PR body / comment BEFORE it sends.', + ' • No real customer or company names — use `Acme Inc`. No exceptions.', + ' • No internal work-item IDs or tracker URLs (linear.app, sentry.io, SOC-/ENG-/ASK-/etc.).', + ' • If you spot one, cancel and rewrite the text first.', + ] + process.stderr.write(lines.join('\n') + '\n') +} + +main() diff --git a/.claude/hooks/public-surface-reminder/package.json b/.claude/hooks/public-surface-reminder/package.json new file mode 100644 index 00000000..09cf63ac --- /dev/null +++ b/.claude/hooks/public-surface-reminder/package.json @@ -0,0 +1,12 @@ +{ + "name": "hook-public-surface-reminder", + "private": true, + "type": "module", + "main": "./index.mts", + "exports": { + ".": "./index.mts" + }, + "devDependencies": { + "@types/node": "24.9.2" + } +} diff --git a/.claude/hooks/release-workflow-guard/README.md b/.claude/hooks/release-workflow-guard/README.md new file mode 100644 index 00000000..6f274ae5 --- /dev/null +++ b/.claude/hooks/release-workflow-guard/README.md @@ -0,0 +1,68 @@ +# release-workflow-guard + +`PreToolUse` hook that **blocks** every Bash command that would +dispatch a GitHub Actions workflow. Exit code `2`; the model never +gets to fire the command. + +> Workflow dispatches are irrevocable. Publish workflows push npm +> versions (unpublishable after 24h). Build/Release workflows pin +> GitHub releases by SHA. Container workflows push immutable image +> tags. Even build workflows with a `dry_run` input still treat the +> dispatch itself as the prod trigger — the user runs them +> manually, never Claude. + +## What gets blocked + +- `gh workflow run ` +- `gh workflow dispatch ` (alias of `run`) +- `gh api .../actions/workflows//dispatches` POST/PUT + +Any other `Bash` command passes through silently. + +## Why no per-workflow allowlist + +Because allowlists drift. A "benign" CI dispatch today becomes a +prod-touching dispatch tomorrow when someone wires a publish step +behind it; the allowlist hasn't updated. The cost of an extra +block is one re-prompt (the user runs the command in their own +terminal). The cost of a missed prod dispatch is irreversible. +Block all dispatches; let the user judge. + +## Override + +There is no opt-out. If a real workflow id needs dispatching during +a Claude session, the user runs it themselves — either in a plain +shell, via the GitHub Actions UI, or by typing `! gh workflow run +...` outside of a Claude prompt where the hook doesn't fire. + +## Wiring + +`.claude/settings.json`: + +```json +{ + "hooks": { + "PreToolUse": [ + { + "matcher": "Bash", + "hooks": [{ "type": "command", "command": "node .claude/hooks/release-workflow-guard/index.mts" }] + } + ] + } +} +``` + +## Exit code + +- `0` — command is not a workflow dispatch; pass through +- `2` — command is a workflow dispatch; block + write reason to stderr + +## Sibling hooks + +- `private-name-guard` — primes the model on private repo / project names +- `public-surface-reminder` — primes on customer / company names +- `token-guard` — blocks token-leaking shell shapes + +`release-workflow-guard` is the third hook that **blocks** rather +than primes (alongside `token-guard` and `path-guard`). The shared +rule: block when the harm of a wrong fire is irreversible. diff --git a/.claude/hooks/release-workflow-guard/index.mts b/.claude/hooks/release-workflow-guard/index.mts new file mode 100644 index 00000000..b824c3f5 --- /dev/null +++ b/.claude/hooks/release-workflow-guard/index.mts @@ -0,0 +1,137 @@ +#!/usr/bin/env node +// Claude Code PreToolUse hook — release-workflow-guard. +// +// BLOCKS every Bash command that would dispatch a GitHub Actions +// workflow. The user runs workflow_dispatch jobs manually after +// reviewing the release commit and waiting for CI to pass — +// auto-triggering is irrevocable in the short term: +// +// - Publish workflows push npm versions (unpublishable after 24h). +// - Build/Release workflows publish GitHub releases pinned by SHA. +// - Container workflows push immutable image tags. +// +// Even nominally-CI workflow_dispatches often carry prod side +// effects (the socket-btm binary builders gate prod releases on a +// `dry_run` input, but the dispatch itself is the trigger). The +// safe default is "block all dispatches and ask the user to run +// them themselves." Cost of an extra block: one re-prompt. Cost +// of a missed prod publish: irreversible. +// +// Exit code 2 with a clear stderr message stops the tool call. The +// model never gets to fire the command. The user re-runs it from +// their own terminal (or via the GitHub Actions UI) when ready. +// +// Blocked patterns: +// - `gh workflow run ` +// - `gh workflow dispatch ` (alias of `run`) +// - `gh api ... actions/workflows//dispatches` POST/PUT +// +// This hook is the enforcement layer paired with the CLAUDE.md +// rule. The rule documents the policy; the hook makes it +// mechanical so the model can't accidentally dispatch a workflow +// even when reasoning about urgent release work. +// +// Reads a Claude Code PreToolUse JSON payload from stdin: +// { "tool_name": "Bash", "tool_input": { "command": "..." } } + +import { readFileSync } from 'node:fs' +import process from 'node:process' + +type ToolInput = { + tool_name?: string + tool_input?: { + command?: string + } +} + +// `gh workflow run ` / `gh workflow dispatch `. +// The captured workflow argument is reported back so the user can +// see what was blocked. +const GH_WORKFLOW_DISPATCH_RE = + /\bgh\s+workflow\s+(?:run|dispatch)\b(?:\s+(?:--repo|--ref|-f|--field)\s+\S+)*\s+(['"]?)([^\s'"]+)\1/ + +// `gh api .../actions/workflows//dispatches` (POST/PUT). +// The path component implies dispatch — no need to also match -X. +const GH_API_WORKFLOW_DISPATCH_RE = + /\bgh\s+api\b[^|]*?\/actions\/workflows\/([^/\s]+)\/dispatches\b/ + +function detectDispatch(command: string): { + blocked: boolean + workflow?: string + shape?: string +} { + const normalized = command.replace(/\s+/g, ' ') + + const cliMatch = GH_WORKFLOW_DISPATCH_RE.exec(normalized) + if (cliMatch) { + return { + blocked: true, + workflow: cliMatch[2], + shape: 'gh workflow run/dispatch', + } + } + + const apiMatch = GH_API_WORKFLOW_DISPATCH_RE.exec(normalized) + if (apiMatch) { + return { + blocked: true, + workflow: apiMatch[1], + shape: 'gh api .../dispatches', + } + } + + return { blocked: false } +} + +function main(): void { + let raw = '' + try { + raw = readFileSync(0, 'utf8') + } catch { + return + } + + let input: ToolInput + try { + input = JSON.parse(raw) + } catch { + return + } + + if (input.tool_name !== 'Bash') { + return + } + const command = input.tool_input?.command + if (!command || typeof command !== 'string') { + return + } + + const { blocked, workflow, shape } = detectDispatch(command) + if (!blocked) { + return + } + + const lines = [ + '[release-workflow-guard] BLOCKED: this command would dispatch a', + ` GitHub Actions workflow (${shape}, target: ${workflow ?? ''}).`, + '', + ' Workflow dispatches often have irreversible prod side effects:', + ' - Publish workflows push npm versions (unpublishable after 24h).', + ' - Build/Release workflows create GitHub releases pinned by SHA.', + ' - Container workflows push immutable image tags.', + " - Even build workflows with a 'dry_run' input still treat the", + ' dispatch itself as the prod trigger.', + '', + ' The user runs workflow_dispatch jobs manually — never Claude.', + ' Tell the user to run the command in their own terminal (or', + ' via the GitHub Actions UI), then resume.', + '', + ' This hook has no opt-out. If you genuinely need to run a', + ' benign dispatch (e.g. a debug-only utility workflow), ask', + " the user to invoke it themselves; don't seek a bypass here.", + ] + process.stderr.write(lines.join('\n') + '\n') + process.exitCode = 2 +} + +main() diff --git a/.claude/hooks/release-workflow-guard/package.json b/.claude/hooks/release-workflow-guard/package.json new file mode 100644 index 00000000..7e19aa13 --- /dev/null +++ b/.claude/hooks/release-workflow-guard/package.json @@ -0,0 +1,12 @@ +{ + "name": "hook-release-workflow-guard", + "private": true, + "type": "module", + "main": "./index.mts", + "exports": { + ".": "./index.mts" + }, + "devDependencies": { + "@types/node": "24.9.2" + } +} diff --git a/.claude/hooks/token-guard/README.md b/.claude/hooks/token-guard/README.md new file mode 100644 index 00000000..9cba28a5 --- /dev/null +++ b/.claude/hooks/token-guard/README.md @@ -0,0 +1,57 @@ +# token-guard + +Claude Code `PreToolUse` hook that refuses Bash tool calls that would leak secrets to tool output. Mandatory across the Socket fleet — every repo ships this file byte-for-byte via `scripts/sync-scaffolding.mjs`. + +## What it blocks + +| Rule | Example | Fix | +|------|---------|-----| +| Literal token in command | `echo vtwn_abc123…` | Rotate the exposed token; read tokens from `.env.local` at spawn time, never inline them | +| `env`/`printenv`/`export -p`/`set` dumping everything | `env \| grep FOO` (unredacted) | `env \| sed 's/=.*/=/'` or filter specific keys | +| `.env*` read without redactor | `cat .env.local` | `sed 's/=.*/=/' .env.local` or `grep -v '^#' .env.local \| cut -d= -f1` | +| `curl -H "Authorization:"` with unfiltered stdout | `curl -H "Authorization: Bearer $TOKEN" api.example.com` | Redirect to file/`/dev/null`, or pipe to `jq`/`grep`/`head`/`wc`/`cut`/`awk` | +| References sensitive env var name writing unredacted to stdout | `echo $API_KEY` | Same as above | + +## What it allows + +- Any write to a file (`>`, `>>`, `tee`) +- Any pipe through `jq`, `grep`, `head`, `tail`, `wc`, `cut`, `awk`, `sed s/=.*/=/`, `python3 -m json.tool` +- Legitimate `git`/`pnpm`/`npm`/`node`/`tsc`/`oxfmt`/`oxlint` invocations that happen to reference env var names but don't echo values +- Any curl call that does not carry an `Authorization:` header + +## Detected token shapes + +Literal value patterns caught in-command: + +- Val Town — `vtwn_` +- Linear — `lin_api_` +- OpenAI / Anthropic — `sk-` (20+ chars) +- Stripe — `sk_live_`, `sk_test_`, `pk_live_`, `rk_live_` +- GitHub — `ghp_`, `gho_`, `ghs_`, `ghu_`, `ghr_`, `github_pat_` +- GitLab — `glpat-` +- AWS — `AKIA…` +- Slack — `xoxb-`, `xoxa-`, `xoxp-`, `xoxr-`, `xoxs-` +- Google — `AIza…` +- JWTs — three-segment `eyJ…` + +## Control flow + +The hook reads the tool-use payload from stdin, type-checks `tool_name === 'Bash'`, and runs `check(command)`. Any rule violation `throw`s a typed `BlockError`; a single top-level `try/catch` in `main()` writes the block message to stderr and sets `process.exitCode = 2`. Hook bugs fail **open** — a crash in the hook writes a log line and returns exit 0 so legitimate work isn't blocked on a bad deploy. + +## Testing + +```bash +pnpm --filter hook-token-guard test +``` + +Adding new token-shape detections: update `LITERAL_TOKEN_PATTERNS` in `index.mts`, add a positive and negative test in `test/token-guard.test.mts`. + +## Updating across the fleet + +This file is in `IDENTICAL_FILES` in `scripts/sync-scaffolding.mjs`. After editing, run from `socket-repo-template`: + +```bash +node scripts/sync-scaffolding.mjs --all --fix +``` + +to propagate the change to every fleet repo. diff --git a/.claude/hooks/token-guard/index.mts b/.claude/hooks/token-guard/index.mts new file mode 100644 index 00000000..8ea2dba2 --- /dev/null +++ b/.claude/hooks/token-guard/index.mts @@ -0,0 +1,267 @@ +#!/usr/bin/env node +// Claude Code PreToolUse hook — token-guard firewall. +// +// Blocks Bash commands that would echo token-bearing env vars into +// tool output. This fires BEFORE the command runs; exit code 2 makes +// Claude Code refuse the tool call. The model sees the rejection +// reason on stderr and retries with a redacted formulation. +// +// Blocked patterns: +// - Literal token shapes in the command string (vtwn_, lin_api_, +// sk-, ghp_, AKIA, xox, AIza, JWT, etc.) — hardest block, logs +// a redacted message and urges rotation +// - `env`, `printenv`, `export -p`, `set` with no filter pipeline +// - `cat` / `head` / `tail` / `less` / `more` of .env* files +// without a redaction step +// - `curl -H "Authorization: ..."` with output going to unfiltered +// stdout (not /dev/null, not a file, not piped to jq/grep/etc.) +// - Commands referencing a sensitive env var name (*TOKEN*, +// *SECRET*, *PASSWORD*, *API_KEY*, *SIGNING_KEY*, *PRIVATE_KEY*, +// *AUTH*, *CREDENTIAL*) that write to stdout without redaction +// +// Control flow uses a `BlockError` thrown from check helpers so every +// short-circuit path goes through a single `process.exitCode = 2` +// drop at the top-level catch — no scattered `process.exit(2)` that +// can race with buffered stderr. + +import process from 'node:process' + +// Name fragments matched case-insensitively against the command. +const SENSITIVE_ENV_NAMES = [ + 'TOKEN', + 'SECRET', + 'PASSWORD', + 'PASS', + 'API_KEY', + 'APIKEY', + 'SIGNING_KEY', + 'PRIVATE_KEY', + 'AUTH', + 'CREDENTIAL', +] + +// Pipelines that "launder" earlier-stage secrets into safe output. +// The first two patterns match `sed 's/.../redact.../'` and +// `sed 's/.../FOO=*****/'` regardless of which delimiter sed uses +// (`/`, `#`, `|`). `[\s\S]*?` reaches across the delimiter between +// the search and replacement parts (the previous `[^/|#]*` couldn't +// cross `/` and so missed the canonical `sed 's/=.*/=/'` +// — the very command the token-guard error message suggests). +const REDACTION_MARKERS = [ + /\bsed\b[^|]*s[/|#][\s\S]*?\s*\/dev\/null/, + />>\s*[^|]/, + />\s*[^|]/, +] + +// Commands that dump all env vars to stdout with no filter. +const ALWAYS_DANGEROUS = [ + /^\s*env\s*(?:\||&&|;|$)/, + /^\s*env\s*$/, + /^\s*printenv\s*(?:\||&&|;|$)/, + /^\s*printenv\s*$/, + /^\s*export\s+-p\s*(?:\||&&|;|$)/, + /^\s*set\s*(?:\||&&|;|$)/, +] + +// Plain reads of .env files that would dump values to stdout. +const ENV_FILE_READ = /\b(?:cat|head|tail|less|more|bat)\b[^|]*\.env[^/\s|]*/ + +// curl calls that include an Authorization header. +const CURL_WITH_AUTH = + /\bcurl\b(?:[^|]|\|(?!\s*(?:sed|grep|head|tail|jq)))*(?:-H|--header)\s*['"]?Authorization:/i + +// Literal token-shape patterns — if any match in the command string, +// a real token has been pasted somewhere it shouldn't have been. +const LITERAL_TOKEN_PATTERNS: Array<[RegExp, string]> = [ + [/\bvtwn_[A-Za-z0-9_-]{8,}/, 'Val Town token (vtwn_)'], + [/\blin_api_[A-Za-z0-9_-]{8,}/, 'Linear API token (lin_api_)'], + [/\bsk-[A-Za-z0-9_-]{20,}/, 'OpenAI/Anthropic-style secret key (sk-)'], + [/\bsk_live_[A-Za-z0-9_-]{16,}/, 'Stripe live secret (sk_live_)'], + [/\bsk_test_[A-Za-z0-9_-]{16,}/, 'Stripe test secret (sk_test_)'], + [/\bpk_live_[A-Za-z0-9_-]{16,}/, 'Stripe live publishable (pk_live_)'], + [/\brk_live_[A-Za-z0-9_-]{16,}/, 'Stripe live restricted (rk_live_)'], + [/\bghp_[A-Za-z0-9]{30,}/, 'GitHub personal access token (ghp_)'], + [/\bgho_[A-Za-z0-9]{30,}/, 'GitHub OAuth token (gho_)'], + [/\bghs_[A-Za-z0-9]{30,}/, 'GitHub app server token (ghs_)'], + [/\bghu_[A-Za-z0-9]{30,}/, 'GitHub user access token (ghu_)'], + [/\bghr_[A-Za-z0-9]{30,}/, 'GitHub refresh token (ghr_)'], + [/\bgithub_pat_[A-Za-z0-9_]{20,}/, 'GitHub fine-grained PAT'], + [/\bglpat-[A-Za-z0-9_-]{16,}/, 'GitLab PAT (glpat-)'], + [/\bAKIA[0-9A-Z]{16}/, 'AWS access key ID (AKIA)'], + [/\bxox[baprs]-[A-Za-z0-9-]{10,}/, 'Slack token (xox_-)'], + [/\bAIza[0-9A-Za-z_-]{35}/, 'Google API key (AIza)'], + [/\beyJ[A-Za-z0-9_-]{10,}\.[A-Za-z0-9_-]{10,}\.[A-Za-z0-9_-]{10,}/, 'JWT'], +] + +class BlockError extends Error { + public readonly rule: string + public readonly suggestion: string + public readonly showCommand: boolean + constructor(rule: string, suggestion: string, showCommand = true) { + super(rule) + this.name = 'BlockError' + this.rule = rule + this.suggestion = suggestion + this.showCommand = showCommand + } +} + +const stdin = (): Promise => + new Promise(resolve => { + let buf = '' + process.stdin.setEncoding('utf8') + process.stdin.on('data', chunk => (buf += chunk)) + process.stdin.on('end', () => resolve(buf)) + }) + +type ToolInput = { + tool_name?: string + tool_input?: { command?: string } +} + +const hasRedaction = (command: string): boolean => + REDACTION_MARKERS.some(re => re.test(command)) + +// Word-boundary match so `PASS` doesn't fire on `PATHS-ALLOWLIST` and +// `AUTH` doesn't fire on `AUTHOR`. Env-var-style boundaries treat `_` +// as a separator (so `ACCESS_TOKEN` matches `TOKEN`) but require a +// non-alphanumeric character on each end (so `PATHS` doesn't match +// `PASS`). The pre-fix substring match created false positives +// whenever a path name happened to contain a sensitive keyword as a +// literal substring. +const sensitiveEnvBoundaryRes = SENSITIVE_ENV_NAMES.map( + frag => new RegExp(String.raw`(?:^|[^A-Z0-9])${frag}(?:[^A-Z0-9]|$)`), +) +const referencesSensitiveEnv = (command: string): boolean => { + const upper = command.toUpperCase() + return sensitiveEnvBoundaryRes.some(re => re.test(upper)) +} + +const matchesAlwaysDangerous = (command: string): RegExp | null => { + for (const re of ALWAYS_DANGEROUS) { + if (re.test(command)) { + return re + } + } + return null +} + +const check = (command: string): void => { + // 0. Literal token-shape in the command string — hardest block. + // A real token value already landed in the command, which itself is + // logged. We refuse to echo it further and urge rotation. + for (const [pattern, label] of LITERAL_TOKEN_PATTERNS) { + if (pattern.test(command)) { + throw new BlockError( + `literal ${label} found in command string`, + 'Rotate the exposed token immediately. Never paste tokens into commands; read them from .env.local or a keychain at subprocess spawn time.', + false, + ) + } + } + + // 1. Always-dangerous patterns. Skip when the command already has a + // redaction pipeline — the suggested fix here is `env | sed ...`, + // which would itself match ALWAYS_DANGEROUS without this guard. + const dangerous = matchesAlwaysDangerous(command) + if (dangerous && !hasRedaction(command)) { + throw new BlockError( + `\`${dangerous.source}\` dumps env to stdout`, + 'Pipe through redaction, e.g. `env | sed "s/=.*/=/"` or filter specific keys.', + ) + } + + // 2. .env file reads without redaction. + if (ENV_FILE_READ.test(command) && !hasRedaction(command)) { + throw new BlockError( + '.env file read without a redaction pipeline', + 'Use `sed "s/=.*/=/" .env.local` or `grep -v "^#" .env.local | cut -d= -f1` for key names only.', + ) + } + + // 3. curl with Authorization header and unsanitized stdout. + const curlHasAuth = CURL_WITH_AUTH.test(command) + const curlOutputSafe = + />\s*\/dev\/null|>\s*[^|&]/.test(command) || + /\|\s*(?:jq|grep|head|tail|wc|cut|awk|python3?\s+-m\s+json\.tool)\b/.test( + command, + ) + if (curlHasAuth && !curlOutputSafe) { + throw new BlockError( + 'curl with Authorization header and unsanitized stdout', + 'Redirect response to /dev/null, pipe to jq/grep/head, or save to a file.', + ) + } + + // 4. References a sensitive env var name and writes to stdout + // without a redaction step. Skip when curl-with-auth passed — that + // rule already evaluated the same pipeline. + if ( + !curlHasAuth && + referencesSensitiveEnv(command) && + !hasRedaction(command) + ) { + const isPureWrite = /^\s*(?:git|pnpm|npm|node|tsc|oxfmt|oxlint)\b/.test( + command, + ) + if (!isPureWrite) { + throw new BlockError( + 'command references sensitive env var name and writes to stdout without redaction', + 'Redirect to a file, pipe through `sed "s/=.*/=/"`, or ensure only key names (not values) are printed.', + ) + } + } +} + +const emitBlock = (command: string, err: BlockError): void => { + const safeCommand = err.showCommand + ? command.slice(0, 200) + (command.length > 200 ? '…' : '') + : '' + process.stderr.write( + `\n[token-guard] Blocked: ${err.rule}\n` + + ` Command: ${safeCommand}\n` + + ` Fix: ${err.suggestion}\n\n`, + ) +} + +const main = async (): Promise => { + const raw = await stdin() + if (!raw) { + return + } + let payload: ToolInput + try { + payload = JSON.parse(raw) as ToolInput + } catch { + return + } + if (payload.tool_name !== 'Bash') { + return + } + const command = payload.tool_input?.command ?? '' + if (!command) { + return + } + + try { + check(command) + } catch (e) { + if (e instanceof BlockError) { + emitBlock(command, e) + process.exitCode = 2 + return + } + throw e + } +} + +main().catch(e => { + // Never block a tool call due to a bug in the hook itself. Log it + // so we notice, but fail open. + process.stderr.write(`[token-guard] hook error (allowing): ${e}\n`) + process.exitCode = 0 +}) diff --git a/.claude/hooks/token-guard/package.json b/.claude/hooks/token-guard/package.json new file mode 100644 index 00000000..fc68951d --- /dev/null +++ b/.claude/hooks/token-guard/package.json @@ -0,0 +1,12 @@ +{ + "name": "hook-token-guard", + "private": true, + "type": "module", + "main": "./index.mts", + "exports": { + ".": "./index.mts" + }, + "scripts": { + "test": "node --test test/*.test.mts" + } +} diff --git a/.claude/hooks/token-guard/test/token-guard.test.mts b/.claude/hooks/token-guard/test/token-guard.test.mts new file mode 100644 index 00000000..b2ab6714 --- /dev/null +++ b/.claude/hooks/token-guard/test/token-guard.test.mts @@ -0,0 +1,225 @@ +/** + * @fileoverview Tests for the token-guard hook. + * + * Runs the hook as a subprocess (node --test), piping a tool-use + * payload on stdin and asserting on the exit code + stderr. Exit 2 + * means the hook refused the command; exit 0 means it passed it + * through. + */ + +import { describe, it } from 'node:test' +import assert from 'node:assert/strict' + +import { whichSync } from '@socketsecurity/lib/bin' +import { spawnSync } from '@socketsecurity/lib/spawn' + +const hookScript = new URL('../index.mts', import.meta.url).pathname +const nodeBin = whichSync('node') +if (!nodeBin) { + throw new Error('"node" not found on PATH') +} + +function runHook(command: string, toolName = 'Bash'): { + code: number | null + stdout: string + stderr: string +} { + const input = JSON.stringify({ + tool_name: toolName, + tool_input: { command }, + }) + const result = spawnSync(nodeBin, [hookScript], { + input, + timeout: 5_000, + stdio: ['pipe', 'pipe', 'pipe'], + }) + return { + code: result.status, + stdout: (result.stdout || '').toString(), + stderr: (result.stderr || '').toString(), + } +} + +describe('token-guard hook', () => { + describe('allows safe commands', () => { + it('plain echo', () => { + assert.equal(runHook('echo hello').code, 0) + }) + it('git log', () => { + assert.equal(runHook('git log -1 --oneline').code, 0) + }) + it('pnpm install', () => { + assert.equal(runHook('pnpm install').code, 0) + }) + it('node script', () => { + assert.equal(runHook('node scripts/build.mts').code, 0) + }) + it('sed with redaction on .env', () => { + assert.equal( + runHook("sed 's/=.*/=/' .env.local").code, + 0, + ) + }) + it('grep key-names-only on .env', () => { + assert.equal( + runHook("grep -v '^#' .env.local | cut -d= -f1").code, + 0, + ) + }) + it('curl without Authorization header', () => { + assert.equal(runHook('curl -sS https://api.example.com').code, 0) + }) + it('curl with auth piped to jq', () => { + assert.equal( + runHook( + 'curl -sS -H "Authorization: Bearer $TOKEN" https://api.example.com | jq .name', + ).code, + 0, + ) + }) + it('curl with auth redirected to file', () => { + assert.equal( + runHook( + 'curl -sS -H "Authorization: Bearer $TOKEN" https://api.example.com > out.json', + ).code, + 0, + ) + }) + it('non-Bash tool is always allowed', () => { + assert.equal(runHook('env', 'Edit').code, 0) + }) + }) + + describe('blocks literal token shapes', () => { + it('Val Town token', () => { + const r = runHook('echo vtwn_ABCDEFGHIJKL') + assert.equal(r.code, 2) + assert.match(r.stderr, /Val Town token/) + }) + it('Linear API token', () => { + const r = runHook('echo lin_api_ABCDEFGHIJKLMNOP') + assert.equal(r.code, 2) + assert.match(r.stderr, /Linear API token/) + }) + it('GitHub PAT', () => { + const r = runHook( + 'echo ghp_ABCDEFGHIJKLMNOPQRSTUVWXYZabcd1234', + ) + assert.equal(r.code, 2) + assert.match(r.stderr, /GitHub personal access token/) + }) + it('AWS access key', () => { + const r = runHook('echo AKIAIOSFODNN7EXAMPLE') + assert.equal(r.code, 2) + assert.match(r.stderr, /AWS access key/) + }) + it('Stripe test secret', () => { + const r = runHook('echo sk_test_ABCDEFGHIJKLMNOP') + assert.equal(r.code, 2) + assert.match(r.stderr, /Stripe test secret/) + }) + it('JWT', () => { + const r = runHook( + 'echo eyJhbGciOiJIUzI1NiJ9.eyJzdWIiOiIxMjM0NTY3ODkwIn0.SflKxwRJSMeKKF2QT4fwpMeJf36POk6yJV_adQssw5c', + ) + assert.equal(r.code, 2) + assert.match(r.stderr, /JWT/) + }) + it('redacts the command in stderr so the literal token is not re-logged', () => { + const r = runHook('echo vtwn_SECRETVALUE') + assert.equal(r.code, 2) + assert.doesNotMatch(r.stderr, /SECRETVALUE/) + assert.match(r.stderr, /suppressed/) + }) + }) + + describe('blocks env/printenv dumps', () => { + it('bare env', () => { + assert.equal(runHook('env').code, 2) + }) + it('env piped without redactor', () => { + assert.equal(runHook('env | grep FOO').code, 2) + }) + it('printenv', () => { + assert.equal(runHook('printenv').code, 2) + }) + it('export -p', () => { + assert.equal(runHook('export -p').code, 2) + }) + }) + + describe('blocks .env reads without redaction', () => { + it('cat .env.local', () => { + assert.equal(runHook('cat .env.local').code, 2) + }) + it('head .env', () => { + assert.equal(runHook('head .env').code, 2) + }) + it('less .env.production', () => { + assert.equal(runHook('less .env.production').code, 2) + }) + }) + + describe('blocks curl with auth to unfiltered stdout', () => { + it('plain curl -H Authorization', () => { + const r = runHook( + 'curl -sS -H "Authorization: Bearer $TOKEN" https://api.example.com', + ) + assert.equal(r.code, 2) + assert.match(r.stderr, /Authorization header and unsanitized stdout/) + }) + }) + + describe('blocks sensitive-env-name references without redaction', () => { + it('echoing $API_KEY', () => { + assert.equal(runHook('echo $API_KEY').code, 2) + }) + it('ruby -e with $TOKEN', () => { + assert.equal( + runHook('ruby -e "puts ENV[\'ACCESS_TOKEN\']"').code, + 2, + ) + }) + }) + + describe('does not false-positive on substring of sensitive name', () => { + // Regression: `PATHS-ALLOWLIST.YML` toUpperCase()d contains `PASS` + // as a substring, which the pre-fix unbounded match treated as + // a sensitive env reference. Word-boundary fix means `PASS` must + // be a standalone token (or at a `_`/`-`/`.`/`/` boundary). + it('paths-allowlist.yml does not trip PASS', () => { + assert.equal(runHook('cat .github/paths-allowlist.yml').code, 0) + }) + it('AUTHOR_NAME does not trip AUTH', () => { + // AUTHOR ends with R; the boundary-after match correctly skips + // it because the next char is `_`, but `AUTH` followed by `O` + // (alphanumeric) is not a token boundary. + assert.equal(runHook('echo $AUTHOR_NAME').code, 0) + }) + it('PASSAGE_TIME does not trip PASS', () => { + assert.equal(runHook('echo $PASSAGE_TIME').code, 0) + }) + }) + + describe('fails open on malformed input', () => { + it('empty stdin', () => { + const r = spawnSync(nodeBin, [hookScript], { + input: '', + timeout: 5_000, + stdio: ['pipe', 'pipe', 'pipe'], + }) + assert.equal(r.status, 0) + }) + it('non-JSON stdin', () => { + const r = spawnSync(nodeBin, [hookScript], { + input: 'not json', + timeout: 5_000, + stdio: ['pipe', 'pipe', 'pipe'], + }) + assert.equal(r.status, 0) + }) + it('empty command', () => { + assert.equal(runHook('').code, 0) + }) + }) +}) diff --git a/.claude/hooks/token-guard/tsconfig.json b/.claude/hooks/token-guard/tsconfig.json new file mode 100644 index 00000000..53c5c847 --- /dev/null +++ b/.claude/hooks/token-guard/tsconfig.json @@ -0,0 +1,15 @@ +{ + "compilerOptions": { + "declarationMap": false, + "erasableSyntaxOnly": true, + "module": "nodenext", + "moduleResolution": "nodenext", + "noEmit": true, + "rewriteRelativeImportExtensions": true, + "skipLibCheck": true, + "sourceMap": false, + "strict": true, + "target": "esnext", + "verbatimModuleSyntax": true + } +} diff --git a/.claude/settings.json b/.claude/settings.json index ac130fc1..b290afb3 100644 --- a/.claude/settings.json +++ b/.claude/settings.json @@ -7,6 +7,31 @@ { "type": "command", "command": "node .claude/hooks/check-new-deps/index.mts" + }, + { + "type": "command", + "command": "node .claude/hooks/path-guard/index.mts" + } + ] + }, + { + "matcher": "Bash", + "hooks": [ + { + "type": "command", + "command": "node .claude/hooks/private-name-guard/index.mts" + }, + { + "type": "command", + "command": "node .claude/hooks/public-surface-reminder/index.mts" + }, + { + "type": "command", + "command": "node .claude/hooks/release-workflow-guard/index.mts" + }, + { + "type": "command", + "command": "node .claude/hooks/token-guard/index.mts" } ] } diff --git a/.claude/skills/_shared/path-guard-rule.md b/.claude/skills/_shared/path-guard-rule.md new file mode 100644 index 00000000..fa42a32e --- /dev/null +++ b/.claude/skills/_shared/path-guard-rule.md @@ -0,0 +1,39 @@ + + +## 1 path, 1 reference + +**A path is *constructed* exactly once. Everywhere else *references* the constructed value.** + +Referencing a single computed path many times is fine — that's the whole point of computing it once. What's banned is *re-constructing* the same path in multiple places, because that's where drift is born. Three concrete shapes: + +1. **Within a package** — every script, test, and lib file that needs a build path imports it from the package's `scripts/paths.mts` (or `lib/paths.mts`). No `path.join('build', mode, ...)` outside that module. + +2. **Across packages** — when package B consumes package A's output, B imports A's `paths.mts` via the workspace `exports` field. Never `path.join(PKG, '..', '', 'build', ...)`. The R28 yoga/ink bug — ink hand-building yoga's wasm path and missing the `wasm/` segment — is the canonical failure mode this rule prevents. + +3. **Workflows, Dockerfiles, shell scripts** — they can't `import` TS, so they construct the string once and reference it everywhere downstream. Workflows: a "Compute paths" step exposes `steps.paths.outputs.final_dir`; later steps read `${{ steps.paths.outputs.final_dir }}`. Dockerfiles/shell: assign once to a variable, reference by name thereafter. Each canonical construction carries a comment naming the source-of-truth `paths.mts` so the YAML can't drift from TS without a flagged change. **Re-building** the same path in a second step is the violation, not referring to the constructed value many times. + +Comments that re-state a full path are forbidden. The import statement IS the comment. Docs and READMEs may describe the structure ("output goes under the Final dir") but should not encode a complete `build///out/Final/binary` string — encoded paths get parsed by tools and silently rot. + +Code execution takes priority over docs: violations in `.mts`/`.cts`, Makefiles, Dockerfiles, workflow YAML, and shell scripts are blocking. README and doc-comment violations are advisory unless they contain a fully-qualified path with no parametric placeholders. + +### Three-level enforcement + +- **Hook** — `.claude/hooks/path-guard/` blocks `Edit`/`Write` calls that would introduce a violation in a `.mts`/`.cts` file. Refusal at edit time stops new duplication from landing. +- **Gate** — `scripts/check-paths.mts` runs in `pnpm check`. Fails the build on any violation that isn't allowlisted. +- **Skill** — `/path-guard` audits the repo and fixes findings; `/path-guard check` reports only; `/path-guard install` drops the gate + hook + rule into a fresh repo. + +The mantra is intentionally short so it sticks: **1 path, 1 reference**. When in doubt, find the canonical owner and import from it. diff --git a/.claude/skills/path-guard/SKILL.md b/.claude/skills/path-guard/SKILL.md new file mode 100644 index 00000000..747ad02b --- /dev/null +++ b/.claude/skills/path-guard/SKILL.md @@ -0,0 +1,250 @@ +--- +name: path-guard +description: Audit and fix path duplication in this Socket repo. Apply the strict "1 path, 1 reference" rule — every build/test/runtime/config path is constructed exactly once; everywhere else references the constructed value. Default mode finds and fixes; `check` mode reports only; `install` mode drops the gate + hook + rule into a fresh repo. +user-invocable: true +allowed-tools: Task, Bash, Read, Edit, Write, Grep, Glob, AskUserQuestion +--- + +# path-guard + +**Mantra: 1 path, 1 reference.** A path is constructed exactly once; everywhere else references the constructed value. Re-constructing the same path twice is the violation, not referencing the constructed value many times. + +## Modes + +- `/path-guard` — full audit-and-fix conversion of the current repo (default). +- `/path-guard check` — read-only audit, report violations, no fixes. +- `/path-guard fix ` — fix a single finding from a prior `check` run, by index. +- `/path-guard install` — drop the gate + hook + rule + allowlist into a fresh repo (for new Socket repos). + +## Three-level enforcement + +The strategy lives in three artifacts that ship together: + +1. **CLAUDE.md rule** — the mantra and detection rules in plain language. Every Socket repo's CLAUDE.md carries `## 1 path, 1 reference`. Synced from `.claude/skills/_shared/path-guard-rule.md`. +2. **Hook** — `.claude/hooks/path-guard/index.mts` runs `PreToolUse` on `Edit`/`Write` of `.mts`/`.cts` files. Blocks new violations at edit time. Mandatory across the fleet. +3. **Gate** — `scripts/check-paths.mts` runs in `pnpm check` (and CI). Whole-repo scan. Fails the build on any unsanctioned violation. + +The hook and gate share their stage / build-root / mode / sibling-package vocabulary via `.claude/hooks/path-guard/segments.mts` — a single canonical source. Adding a new stage segment or fleet package means editing one file; the two consumers can never drift on what counts as a build-output path. + +This skill is the *audit-and-fix workflow* that makes a repo conform initially and validates conformance over time. + +## Detection rules + +The gate enforces six rules. The hook enforces a subset (A and B) since it sees only one diff at a time. + +| Rule | What it catches | Where checked | +|---|---|---| +| **A** | Multi-stage `path.join(...)` constructed inline. Two or more "stage" segments (Final, Release, Stripped, Compressed, Optimized, Synced, wasm, downloaded), or one stage + build-root + mode. | `.mts`/`.cts` files outside a `paths.mts`. Hook + gate. | +| **B** | Cross-package traversal: `path.join(*, '..', '', 'build', ...)` reaching into a sibling's output instead of importing via `exports`. | `.mts`/`.cts` files. Hook + gate. | +| **C** | Workflow YAML constructs the same path string in 2+ steps outside a "Compute paths" step. | `.github/workflows/*.yml`. Gate. | +| **D** | Comment encodes a fully-qualified multi-stage path string (e.g. `# build/dev/darwin-arm64/out/Final/binary`). | `.github/workflows/*.yml`. Gate. | +| **F** | Same path shape constructed in 2+ different files. | All scanned files. Gate. | +| **G** | Hand-built multi-stage path constructed 2+ times in the same Makefile/Dockerfile/shell stage. | `Makefile`, `*.mk`, `*.Dockerfile`, `Dockerfile.*`, `*.sh`. Gate. | + +Comments may describe path *structure* with placeholders (`/` or `${BUILD_MODE}/${PLATFORM_ARCH}`) but should not encode a complete literal path string. Code execution takes priority over docs: violations in `.mts`, Makefiles, Dockerfiles, workflow YAML, shell scripts are blocking. + +## Mode: audit-and-fix (default) + +When invoked as `/path-guard` with no arg: + +1. **Setup** — spawn a worktree off `main` per `CLAUDE.md` parallel-sessions rule: + ```bash + git worktree add -b paths-audit ../-paths-audit main + cd ../-paths-audit + ``` + +2. **Audit** — run the gate to enumerate findings: + ```bash + pnpm run check:paths --json > /tmp/paths-findings.json + pnpm run check:paths --explain # human-readable + ``` + +3. **Fix loop** — for each finding, apply the matching pattern below. After each fix, re-run the gate. Stop iterating when `pnpm run check:paths` exits 0. + +4. **Verify** — run the full check suite + zizmor on any modified workflow: + ```bash + pnpm check + for w in .github/workflows/*.yml; do zizmor "$w"; done + ``` + +5. **Commit and push** — group fixes by logical category (workflows, code, Dockerfiles). Push directly to `main` for repos that allow direct push, or open a PR for repos that require it (socket-cli, socket-sdk-js, socket-registry per their CLAUDE.md / memory entries). + +## Fix patterns + +### Rule A — Multi-stage path constructed inline (in `.mts`/`.cts`) + +**Bad**: +```ts +const finalBinary = path.join(PACKAGE_ROOT, 'build', BUILD_MODE, PLATFORM_ARCH, 'out', 'Final', 'binary') +``` + +**Fix**: move the construction into the package's `scripts/paths.mts` (or `lib/paths.mts`), or use a build-infra helper: +```ts +// In packages/foo/scripts/paths.mts: +export function getBuildPaths(mode, platformArch) { + // ... constructs once ... + return { outputFinalBinary: path.join(PACKAGE_ROOT, 'build', mode, platformArch, 'out', 'Final', binaryName) } +} + +// In the consumer: +import { getBuildPaths } from './paths.mts' +const { outputFinalBinary } = getBuildPaths(mode, platformArch) +``` + +For binsuite tools (binpress/binflate/binject) the canonical helper is `getFinalBinaryPath(packageRoot, mode, platformArch, binaryName)` from `build-infra/lib/paths`. For download caches use `getDownloadedDir(packageRoot)`. + +### Rule B — Cross-package traversal + +**Bad**: +```ts +const liefDir = path.join(PACKAGE_ROOT, '..', 'lief-builder', 'build', mode, platformArch, 'out', 'Final', 'lief') +``` + +**Fix**: declare the workspace dep, expose `paths.mts` via the producer's `exports`, import the helper: + +1. In producer's `package.json`: + ```json + "exports": { + "./scripts/paths": "./scripts/paths.mts" + } + ``` +2. In consumer's `package.json` `dependencies`: + ```json + "lief-builder": "workspace:*" + ``` +3. In consumer: + ```ts + import { getBuildPaths as getLiefBuildPaths } from 'lief-builder/scripts/paths' + const { outputFinalDir } = getLiefBuildPaths(mode, platformArch) + ``` + +### Rule C — Workflow path repetition + +**Bad** (3 steps each rebuilding the same path): +```yaml +- name: Step A + run: cd packages/foo/build/${BUILD_MODE}/${PLATFORM_ARCH}/out/Final && do-thing-1 +- name: Step B + run: cd packages/foo/build/${BUILD_MODE}/${PLATFORM_ARCH}/out/Final && do-thing-2 +- name: Step C + run: cd packages/foo/build/${BUILD_MODE}/${PLATFORM_ARCH}/out/Final && do-thing-3 +``` + +**Fix**: add a "Compute paths" step early in the job that constructs the path once, expose via `$GITHUB_OUTPUT`, reference downstream: + +```yaml +- name: Compute foo paths + id: paths + env: + BUILD_MODE: ${{ steps.build-mode.outputs.mode }} + PLATFORM_ARCH: ${{ steps.platform-arch.outputs.platform_arch }} + run: | + PACKAGE_DIR="packages/foo" + PLATFORM_BUILD_DIR="${PACKAGE_DIR}/build/${BUILD_MODE}/${PLATFORM_ARCH}" + FINAL_DIR="${PLATFORM_BUILD_DIR}/out/Final" + { + echo "package_dir=${PACKAGE_DIR}" + echo "platform_build_dir=${PLATFORM_BUILD_DIR}" + echo "final_dir=${FINAL_DIR}" + } >> "$GITHUB_OUTPUT" + +- name: Step A + env: + FINAL_DIR: ${{ steps.paths.outputs.final_dir }} + run: cd "$FINAL_DIR" && do-thing-1 +# ... etc +``` + +For paths used inside `working-directory: packages/foo` steps, expose a `_rel` companion (e.g. `final_dir_rel=build/${BUILD_MODE}/${PLATFORM_ARCH}/out/Final`) and reference that. + +### Rule D — Comment-encoded paths + +**Bad**: +```yaml +# Path: packages/foo/build/dev/darwin-arm64/out/Final/binary +COPY --from=builder /build/.../out/Final/binary /out/Final/binary +``` + +**Fix**: cite the canonical `paths.mts` instead of duplicating the string: +```yaml +# Layout owned by packages/foo/scripts/paths.mts:getBuildPaths(). +COPY --from=builder /build/packages/foo/build/${BUILD_MODE}/${PLATFORM_ARCH}/out/Final/binary /out/Final/binary +``` + +The comment may describe structure (`/`) but should not be a parsable literal path. + +### Rule G — Dockerfile/Makefile/shell duplicate construction + +**Bad** (Dockerfile reconstructs the path 3 times in the same stage): +```dockerfile +RUN mkdir -p build/${BUILD_MODE}/${PLATFORM_ARCH}/out/Final && \ + cp src build/${BUILD_MODE}/${PLATFORM_ARCH}/out/Final/output && \ + ls build/${BUILD_MODE}/${PLATFORM_ARCH}/out/Final/ +``` + +**Fix**: declare an `ENV` once, reference everywhere: +```dockerfile +# Layout owned by packages/foo/scripts/paths.mts. +ENV FINAL_DIR=build/${BUILD_MODE}/${PLATFORM_ARCH}/out/Final +RUN mkdir -p "$FINAL_DIR" && cp src "$FINAL_DIR/output" && ls "$FINAL_DIR/" +``` + +Each Dockerfile `FROM` stage is its own scope — ENV from the build stage doesn't reach a subsequent `FROM scratch AS export` stage. The gate accounts for this. + +## Mode: check (read-only) + +When invoked as `/path-guard check`: + +```bash +pnpm run check:paths --explain +``` + +Print the gate's findings without making any edits. Exit 0 if clean, 1 if findings present. Useful for CI / pre-merge inspection. + +## Allowlisting a finding + +When a genuine exemption is needed (rare — most "false positives" should be reported as gate bugs), add an entry to `.github/paths-allowlist.yml`. Two ways to pin the entry to a specific site: + +- **`line:`** — exact line number. Strict; a single-line edit above shifts the entry off-target and the finding re-surfaces. +- **`snippet_hash:`** — 12-char SHA-256 prefix of the offending snippet (whitespace-normalized). Drift-resistant: survives reformatting, but any content-changing edit invalidates it. Get the hash: + ```bash + pnpm run check:paths --show-hashes + ``` + +Both may be set — either matching is sufficient. Prefer `snippet_hash` over raw `line:` when the exemption is expected to outlive routine reformatting; prefer `line:` when you specifically *want* the entry to fall off after any nearby edit. + +## Mode: install (new repo) + +When invoked as `/path-guard install` on a Socket repo that doesn't yet have the gate: + +1. Copy the gate file from this skill's reference dir: + ```bash + cp .claude/skills/path-guard/reference/check-paths.mts.tmpl scripts/check-paths.mts + ``` +2. Copy the empty allowlist: + ```bash + cp .claude/skills/path-guard/reference/paths-allowlist.yml.tmpl .github/paths-allowlist.yml + ``` +3. Add `"check:paths": "node scripts/check-paths.mts"` to `package.json`. +4. Wire `runPathHygieneCheck()` into `scripts/check.mts` (after the existing checks). +5. Append the rule snippet from `.claude/skills/_shared/path-guard-rule.md` to the repo's `CLAUDE.md` if a `1 path, 1 reference` section is missing. +6. Add the hook entry to `.claude/settings.json` `PreToolUse` matcher `Edit|Write`: + ```json + { "type": "command", "command": "node .claude/hooks/path-guard/index.mts" } + ``` +7. Run the gate against the repo. Triage findings as you would in audit-and-fix mode. + +## Tie-in with quality-scan + +The `/quality-scan` skill should call `pnpm run check:paths --json` as one of its sub-scans and surface findings as part of its A-F graded report. Failures roll into the overall quality grade. The full audit-and-fix workflow lives here; quality-scan just *detects* during periodic scans. + +## Reference patterns + +When converting a repo to the strategy, the patterns I keep reusing: + +- **TS-first packages**: each package owns a `scripts/paths.mts` with `PACKAGE_ROOT`, `BUILD_ROOT`, `getBuildPaths(mode, platformArch)` returning at minimum `outputFinalDir` and `outputFinalBinary`/`outputFinalFile`. +- **Cross-package consumers**: `package.json` `exports` whitelists `./scripts/paths`. Consumer adds `": workspace:*"` and imports. +- **Workflows**: each job has a "Compute paths" step (`id: paths`) early in the job. Step outputs include `package_dir`, `platform_build_dir`, `final_dir`, named files. `_rel` companions when `working-directory:` is used. +- **Docker stages**: each `FROM` stage declares `ENV PLATFORM_BUILD_DIR=...` and `ENV FINAL_DIR=...` once. Subsequent RUN steps reference the variables. + +The first repo (socket-btm) is the worked example. Read its `scripts/paths.mts` files and `.github/workflows/*.yml` for canonical patterns when applying the strategy elsewhere. diff --git a/.claude/skills/path-guard/reference/check-paths.mts.tmpl b/.claude/skills/path-guard/reference/check-paths.mts.tmpl new file mode 100644 index 00000000..cbecc71e --- /dev/null +++ b/.claude/skills/path-guard/reference/check-paths.mts.tmpl @@ -0,0 +1,947 @@ +#!/usr/bin/env node +/** + * @fileoverview Path-hygiene gate. + * + * Mantra: 1 path, 1 reference. A path is constructed exactly once; + * everywhere else references the constructed value. + * + * Whole-repo scan complementing the per-edit `.claude/hooks/path-guard` + * hook. The hook stops new violations from landing; this gate finds + * the existing ones and blocks merges that introduce more. + * + * Rules enforced: + * + * A — Multi-stage path constructed inline. A `path.join(...)` call + * (or template literal) in a `.mts`/`.cts` file outside a + * `paths.mts` that stitches together two or more "stage" + * segments (Final, Release, Stripped, Compressed, Optimized, + * Synced, wasm, downloaded), or one stage plus a build-root + * (`build`/`out`) plus a mode (`dev`/`prod`/`shared`). The + * construction belongs in the package's `paths.mts` (or a + * build-infra helper); every consumer imports the computed + * value. + * + * B — Cross-package path traversal. A `path.join(*, '..', '', 'build', ...)` reaches into a sibling's build + * output without going through its `exports`. The sibling owns + * its layout; consumers declare a workspace dep and import the + * sibling's `paths.mts`. + * + * C — Hand-built workflow path. A `.github/workflows/*.yml` step + * constructs `build/${...}/out//...` inline outside a + * canonical "Compute paths" step. Workflows can carry path + * strings, but the strings are constructed once and exposed via + * step outputs / job env that downstream steps reference. + * + * D — Comment-encoded paths. Comments (in code or YAML) that re-state + * a fully-qualified multi-stage path. Comments may describe the + * structure ("Final dir" or "build//...") but should not + * encode a complete path string that a tool would parse — the + * canonical construction IS the documentation. + * + * F — Same path constructed in multiple places. The same shape of + * multi-stage `path.join(...)` (or workflow `build/${...}/...` + * string template) appearing in two or more files. Construct + * once and import; references of the constructed value are + * unlimited. + * + * G — Hand-built paths in Makefiles, Dockerfiles, and shell scripts. + * Same shape as A, applied to executable artifacts that don't + * run TypeScript. Each canonical construction must carry a + * comment naming the source-of-truth `paths.mts` so the script + * can't drift from TS without a flagged change. + * + * Allowlist: `.github/paths-allowlist.yml`. Each entry needs a + * `reason` so the list stays audit-able. Patterns are deliberately + * narrow — entries should be specific, not blanket. + * + * Usage: + * node scripts/check-paths.mts # default: report + fail + * node scripts/check-paths.mts --explain # long-form explanation + * node scripts/check-paths.mts --json # machine-readable + * node scripts/check-paths.mts --quiet # silent on clean + * + * Exit codes: + * 0 — clean (no findings, or every finding is allowlisted) + * 1 — findings present + * 2 — gate itself crashed + */ + +import { createHash } from 'node:crypto' +import { existsSync, readFileSync, readdirSync } from 'node:fs' +import path from 'node:path' +import process from 'node:process' + +import { fileURLToPath } from 'node:url' + +import { parseArgs } from 'node:util' + +import { + BUILD_ROOT_SEGMENTS, + KNOWN_SIBLING_PACKAGES, + MODE_SEGMENTS, + STAGE_SEGMENTS, +} from '../.claude/hooks/path-guard/segments.mts' + +// Plain stderr/stdout output — no @socketsecurity/lib dependency so +// the gate is self-contained and works in socket-lib itself (which +// would otherwise import itself). +const logger = { + log: (msg: string) => process.stdout.write(msg + '\n'), + error: (msg: string) => process.stderr.write(msg + '\n'), + step: (msg: string) => process.stdout.write(`→ ${msg}\n`), + success: (msg: string) => process.stdout.write(`✔ ${msg}\n`), + substep: (msg: string) => process.stdout.write(` ${msg}\n`), +} + +const __filename = fileURLToPath(import.meta.url) +const __dirname = path.dirname(__filename) +const REPO_ROOT = path.resolve(__dirname, '..') + +// Stage / build-root / mode / sibling-package vocabularies are imported +// from `.claude/hooks/path-guard/segments.mts` (the canonical source). +// Both this gate and the path-guard hook share that single definition +// — Mantra: 1 path, 1 reference. + +// File-path patterns that legitimately enumerate path segments. +const EXEMPT_FILE_PATTERNS: RegExp[] = [ + // Any paths.mts is the canonical constructor. + /(^|\/)paths\.(mts|cts|js)$/, + // Build-infra owns shared helpers that enumerate stages. + /packages\/build-infra\/lib\/paths\.mts$/, + /packages\/build-infra\/lib\/constants\.mts$/, + // Path-scanning gates that intentionally enumerate. + /scripts\/check-paths\.mts$/, + /scripts\/check-consistency\.mts$/, + /\.claude\/hooks\/path-guard\//, + // Allowlist + config files. + /\.github\/paths-allowlist\.yml$/, +] + +type Finding = { + rule: 'A' | 'B' | 'C' | 'D' | 'F' | 'G' + file: string + line: number + snippet: string + message: string + fix: string +} + +const findings: Finding[] = [] + +const args = parseArgs({ + options: { + explain: { type: 'boolean', default: false }, + json: { type: 'boolean', default: false }, + quiet: { type: 'boolean', default: false }, + 'show-hashes': { type: 'boolean', default: false }, + }, + strict: false, +}) + +const isExempt = (filePath: string): boolean => + EXEMPT_FILE_PATTERNS.some(re => re.test(filePath)) + +// ────────────────────────────────────────────────────────────────── +// Allowlist loading +// ────────────────────────────────────────────────────────────────── + +type AllowlistEntry = { + file?: string + pattern?: string + rule?: string + line?: number + snippet_hash?: string + reason: string +} + +const loadAllowlist = (): AllowlistEntry[] => { + const allowlistPath = path.join(REPO_ROOT, '.github', 'paths-allowlist.yml') + if (!existsSync(allowlistPath)) { + return [] + } + const text = readFileSync(allowlistPath, 'utf8') + // Tiny YAML parser — only the shape we need: list of entries with + // `file`, `pattern`, `rule`, `line`, `reason` scalar fields, plus + // YAML 1.2 block-scalar indicators `|` (literal) and `>` (folded) + // for multi-line reasons. Avoids a yaml dep for a gate that has to + // be self-contained. + const entries: AllowlistEntry[] = [] + let current: Partial | null = null + // When set, subsequent more-indented lines fold into this key as a + // block scalar (literal '|' keeps newlines, folded '>' joins with + // spaces). + let blockKey: string | null = null + let blockKind: '|' | '>' | null = null + let blockIndent = 0 + let blockLines: string[] = [] + const flushBlock = () => { + if (current && blockKey) { + const value = + blockKind === '>' + ? blockLines.join(' ').replace(/\s+/g, ' ').trim() + : blockLines.join('\n').replace(/\n+$/, '') + ;(current as any)[blockKey] = value + } + blockKey = null + blockKind = null + blockLines = [] + } + const indentOf = (line: string): number => { + let i = 0 + while (i < line.length && line[i] === ' ') { + i += 1 + } + return i + } + const lines = text.split('\n') + for (let i = 0; i < lines.length; i++) { + const raw = lines[i]! + const line = raw.replace(/\r$/, '') + // Block-scalar accumulation takes precedence over normal parsing. + if (blockKey !== null) { + if (line.trim() === '') { + // Preserve blank lines inside a literal block; folded blocks + // turn them into paragraph breaks (kept as separate joins). + blockLines.push('') + continue + } + const indent = indentOf(line) + if (indent >= blockIndent) { + blockLines.push(line.slice(blockIndent)) + continue + } + flushBlock() + // Fall through and re-process the dedented line as normal. + } + if (!line.trim() || line.trim().startsWith('#')) { + continue + } + const tryAssign = (key: string, value: string) => { + const trimmed = value.trim() + if (current === null) { + return + } + if (trimmed === '|' || trimmed === '>') { + blockKey = key + blockKind = trimmed as '|' | '>' + blockIndent = indentOf(lines[i + 1] ?? '') || indentOf(line) + 2 + blockLines = [] + return + } + ;(current as any)[key] = + key === 'line' ? Number(unquote(trimmed)) : unquote(trimmed) + } + if (line.startsWith('- ')) { + if (current && current.reason) { + entries.push(current as AllowlistEntry) + } + current = {} + const rest = line.slice(2).trim() + if (rest) { + const m = rest.match(/^([\w-]+):\s*(.*)$/) + if (m) { + tryAssign(m[1]!, m[2]!) + } + } + } else if (current) { + const m = line.match(/^\s+([\w-]+):\s*(.*)$/) + if (m) { + tryAssign(m[1]!, m[2]!) + } + } + } + if (blockKey !== null) { + flushBlock() + } + if (current && current.reason) { + entries.push(current as AllowlistEntry) + } + return entries +} + +const unquote = (s: string): string => { + const t = s.trim() + if ( + (t.startsWith('"') && t.endsWith('"')) || + (t.startsWith("'") && t.endsWith("'")) + ) { + return t.slice(1, -1) + } + return t +} + +const ALLOWLIST = loadAllowlist() + +/** + * Stable, normalized snippet hash. Whitespace-insensitive so trivial + * reformatting (indent change, trailing comma, line wrap) doesn't + * invalidate an allowlist entry, but content-changing edits do. The + * hash exposes only the first 12 hex chars (~48 bits) which is plenty + * for collision-resistance within a single repo's finding set and + * keeps the YAML readable. + */ +const snippetHash = (snippet: string): string => { + const normalized = snippet.replace(/\s+/g, ' ').trim() + return createHash('sha256').update(normalized).digest('hex').slice(0, 12) +} + +/** + * Allowlist matching trades off two failure modes: + * + * - Drift via reformatting (a line shift breaks an entry, the + * finding re-surfaces, devs paper over with a new entry). + * - Stealth allowlisting (an entry pinned to "anywhere in this file" + * silently exempts unrelated future violations). + * + * Strategy: exact line match OR `snippet_hash` match (whitespace- + * normalized SHA-256, first 12 hex). Either is sufficient. Lines stay + * exact (was ±2; the slack let reformatting silently slide), and + * `snippet_hash` provides reformatting-tolerant matching that's still + * tied to the literal text — paste-and-edit cheating would change the + * hash. If neither `line` nor `snippet_hash` is provided, the entry + * matches purely by `rule` + `file` + `pattern` (file-level exempt; + * use sparingly and always pair with a precise `pattern`). + */ +const isAllowlisted = (finding: Finding): boolean => + ALLOWLIST.some(entry => { + if (entry.rule && entry.rule !== finding.rule) { + return false + } + if (entry.file && !finding.file.includes(entry.file)) { + return false + } + if (entry.pattern && !finding.snippet.includes(entry.pattern)) { + return false + } + const lineProvided = entry.line !== undefined + const hashProvided = + typeof entry.snippet_hash === 'string' && entry.snippet_hash.length > 0 + if (lineProvided || hashProvided) { + const lineMatches = lineProvided && entry.line === finding.line + const hashMatches = + hashProvided && entry.snippet_hash === snippetHash(finding.snippet) + if (!(lineMatches || hashMatches)) { + return false + } + } + return true + }) + +// ────────────────────────────────────────────────────────────────── +// File walking +// ────────────────────────────────────────────────────────────────── + +const SKIP_DIRS = new Set([ + '.git', + 'node_modules', + 'build', + 'dist', + 'out', + 'target', + '.cache', + 'upstream', +]) + +const walk = function* ( + dir: string, + filter: (relPath: string) => boolean, +): Generator { + let entries + try { + entries = readdirSync(dir, { withFileTypes: true }) + } catch { + return + } + for (const e of entries) { + if (SKIP_DIRS.has(e.name)) { + continue + } + const full = path.join(dir, e.name) + const rel = path.relative(REPO_ROOT, full) + if (e.isDirectory()) { + yield* walk(full, filter) + } else if (e.isFile() && filter(rel)) { + yield rel + } + } +} + +// ────────────────────────────────────────────────────────────────── +// Rule A + B: code scan (.mts / .cts) +// ────────────────────────────────────────────────────────────────── + +// Locate `path.join(` or `path.resolve(` call sites; argument-list +// extraction uses a paren-balancing scanner below to handle arbitrary +// nesting depth (the previous regex-only approach silently missed any +// argument containing 2+ levels of nested function calls). +const PATH_CALL_RE = /\bpath\.(?:join|resolve)\s*\(/g +const STRING_LITERAL_RE = /(['"])((?:\\.|(?!\1)[^\\])*)\1/g + +// Template literal scanner. Captures backtick-delimited strings +// (including those with `${...}` placeholders) so Rule A also catches +// path construction via template literals like +// `${buildDir}/out/Final/${binary}` or `build/${mode}/out/Final`. +const TEMPLATE_LITERAL_RE = + /`((?:\\.|(?:\$\{(?:[^{}]|\{[^{}]*\})*\})|(?!`)[^\\])*)`/g + +/** + * Convert a template-literal body into a synthetic forward-slash path + * by replacing `${...}` placeholders with a sentinel and normalizing + * separators. Returns the sequence of path segments split on `/`. The + * sentinel doesn't match any STAGE/BUILD_ROOT/MODE token, so a + * placeholder-only segment (`${binaryName}`) won't match those sets. + */ +const templateLiteralSegments = (body: string): string[] => { + // Strip placeholders so they don't introduce noise in segments. + // Empty result for a placeholder is fine; downstream filters by set + // membership and skips empties. + const stripped = body.replace(/\$\{(?:[^{}]|\{[^{}]*\})*\}/g, '\x00') + return stripped.split('/').filter(seg => seg.length > 0 && seg !== '\x00') +} + +/** + * Extract every `path.join(...)` and `path.resolve(...)` call from the + * source text, returning each call's literal start offset and argument + * substring. Uses paren-balancing so deeply-nested arguments like + * `path.join(getDir(child(x)), 'build', 'Final')` are captured fully. + */ +const extractPathCalls = ( + source: string, +): Array<{ offset: number; args: string }> => { + const calls: Array<{ offset: number; args: string }> = [] + PATH_CALL_RE.lastIndex = 0 + let match: RegExpExecArray | null + while ((match = PATH_CALL_RE.exec(source)) !== null) { + const callStart = match.index + const argsStart = PATH_CALL_RE.lastIndex + let depth = 1 + let i = argsStart + let inString: '"' | "'" | '`' | null = null + while (i < source.length && depth > 0) { + const ch = source[i]! + if (inString) { + if (ch === '\\') { + i += 2 + continue + } + if (ch === inString) { + inString = null + } + } else { + if (ch === '"' || ch === "'" || ch === '`') { + inString = ch + } else if (ch === '(') { + depth += 1 + } else if (ch === ')') { + depth -= 1 + if (depth === 0) { + break + } + } + } + i += 1 + } + if (depth === 0) { + calls.push({ offset: callStart, args: source.slice(argsStart, i) }) + PATH_CALL_RE.lastIndex = i + 1 + } + } + return calls +} + +const extractStringLiterals = (args: string): string[] => { + const literals: string[] = [] + let match: RegExpExecArray | null + STRING_LITERAL_RE.lastIndex = 0 + while ((match = STRING_LITERAL_RE.exec(args)) !== null) { + if (match[2] !== undefined) { + literals.push(match[2]) + } + } + return literals +} + +const scanCodeFile = (relPath: string): void => { + const full = path.join(REPO_ROOT, relPath) + let content: string + try { + content = readFileSync(full, 'utf8') + } catch { + return + } + const lines = content.split('\n') + // Build a line-offset map so we can map regex offsets back to line + // numbers cheaply. + const lineOffsets: number[] = [0] + for (let i = 0; i < content.length; i++) { + if (content[i] === '\n') { + lineOffsets.push(i + 1) + } + } + const offsetToLine = (offset: number): number => { + let lo = 0 + let hi = lineOffsets.length - 1 + while (lo < hi) { + const mid = (lo + hi + 1) >>> 1 + if (lineOffsets[mid]! <= offset) { + lo = mid + } else { + hi = mid - 1 + } + } + return lo + 1 + } + + for (const call of extractPathCalls(content)) { + const literals = extractStringLiterals(call.args) + const stages = literals.filter(l => STAGE_SEGMENTS.has(l)) + const buildRoots = literals.filter(l => BUILD_ROOT_SEGMENTS.has(l)) + const modes = literals.filter(l => MODE_SEGMENTS.has(l)) + + // Rule A: 2+ stages OR (1 stage + 1 build-root + 1 mode). + const triggersA = + stages.length >= 2 || + (stages.length >= 1 && buildRoots.length >= 1 && modes.length >= 1) + if (triggersA) { + const line = offsetToLine(call.offset) + const snippet = (lines[line - 1] ?? '').trim() + findings.push({ + rule: 'A', + file: relPath, + line, + snippet, + message: 'Multi-stage path constructed inline (outside paths.mts).', + fix: 'Construct in the owning paths.mts (or use getFinalBinaryPath / getDownloadedDir from build-infra/lib/paths). Import the computed value here.', + }) + } + + // Rule B: each '..' opens a window; the window stays open only + // until the next non-'..' literal. A sibling-package literal + // *immediately after* a '..' (no path segment between them) + // triggers, AND there must be build context elsewhere in the + // call. Resetting per-segment prevents false positives where '..' + // appears earlier and sibling-name appears much later in an + // unrelated position. + const hasBuildContext = literals.some( + l => BUILD_ROOT_SEGMENTS.has(l) || STAGE_SEGMENTS.has(l), + ) + if (hasBuildContext) { + for (let i = 0; i < literals.length - 1; i++) { + if ( + literals[i] === '..' && + KNOWN_SIBLING_PACKAGES.has(literals[i + 1]!) + ) { + const sibling = literals[i + 1]! + const line = offsetToLine(call.offset) + const snippet = (lines[line - 1] ?? '').trim() + findings.push({ + rule: 'B', + file: relPath, + line, + snippet, + message: `Cross-package traversal into '${sibling}' build output.`, + fix: `Add '${sibling}: workspace:*' as a dep, declare an exports entry on '${sibling}' (e.g. './scripts/paths' → './scripts/paths.mts'), and import the path from there.`, + }) + break + } + } + } + } + + // Rule A (template literal variant). Backtick strings like + // `${buildDir}/out/Final/${binary}` or `build/${mode}/${arch}/out/Final` + // construct paths the same way `path.join(...)` does — flag the + // same shapes. Skip raw imports / template tag positions by + // filtering out leading `import.meta.url`-style / tag positions + // implicitly: TEMPLATE_LITERAL_RE matches any backtick string and + // we rely on segment composition to decide if it's a path. + TEMPLATE_LITERAL_RE.lastIndex = 0 + let tmpl: RegExpExecArray | null + while ((tmpl = TEMPLATE_LITERAL_RE.exec(content)) !== null) { + const body = tmpl[1] ?? '' + if (!body.includes('/')) { + continue + } + const segments = templateLiteralSegments(body) + const stages = segments.filter(s => STAGE_SEGMENTS.has(s)) + const buildRoots = segments.filter(s => BUILD_ROOT_SEGMENTS.has(s)) + const modes = segments.filter(s => MODE_SEGMENTS.has(s)) + // Template literal trigger is tighter than path.join() because + // backtick strings often appear in patch fixtures, error messages, + // and other multi-line content that incidentally contains stage + // tokens like `wasm`. Require the canonical build-output shape: + // - 'build' + 'out' + stage (canonical multi-stage layout), OR + // - 2+ stage segments AND 'out' (e.g. `wasm/out/Final`), OR + // - 'build' + stage + literal mode (back-compat with path.join). + const hasBuildAndOut = + buildRoots.includes('build') && buildRoots.includes('out') + const hasOut = buildRoots.includes('out') + const hasBuild = buildRoots.includes('build') + const triggersA = + (hasBuildAndOut && stages.length >= 1) || + (stages.length >= 2 && hasOut) || + (hasBuild && stages.length >= 1 && modes.length >= 1) + if (triggersA) { + const line = offsetToLine(tmpl.index) + const snippet = (lines[line - 1] ?? '').trim() + findings.push({ + rule: 'A', + file: relPath, + line, + snippet, + message: + 'Multi-stage path constructed inline via template literal (outside paths.mts).', + fix: 'Construct in the owning paths.mts (or use getFinalBinaryPath / getDownloadedDir from build-infra/lib/paths). Import the computed value here.', + }) + } + } +} + +// ────────────────────────────────────────────────────────────────── +// Rule C + D: workflow YAML scan +// ────────────────────────────────────────────────────────────────── + +const WORKFLOW_PATH_RE = + /build\/\$\{[^}]+\}\/[^"'`\s]*\/out\/(?:Final|Release|Stripped|Compressed|Optimized|Synced)/g +const WORKFLOW_GH_EXPR_PATH_RE = + /build\/\$\{\{\s*[^}]+\}\}\/[^"'`\s]*\/out\/(?:Final|Release|Stripped|Compressed|Optimized|Synced)/g + +const isInsideComputePathsBlock = ( + lines: string[], + lineIdx: number, +): boolean => { + // Walk backwards up to 60 lines looking for the start of the + // current step. If that step is a "Compute paths" step, the line + // is exempt. + for (let i = lineIdx; i >= Math.max(0, lineIdx - 60); i--) { + const l = lines[i] ?? '' + if (/^\s*-\s*name:/i.test(l)) { + // Step boundary — check if THIS step is a Compute paths step. + // The step body may include `id: paths` even if the name is + // something else (e.g. `id: stub-paths`), so look at the next + // ~20 lines for either marker. + for (let j = i; j < Math.min(lines.length, i + 20); j++) { + const m = lines[j] ?? '' + if ( + /^\s*-\s*name:\s*Compute\s+[\w-]+\s+paths/i.test(m) || + /^\s*id:\s*[\w-]*paths\s*$/i.test(m) + ) { + return true + } + if (j > i && /^\s*-\s*name:/i.test(m)) { + // Hit the next step — current step is NOT Compute paths. + return false + } + } + return false + } + } + return false +} + +const scanWorkflowFile = (relPath: string): void => { + const full = path.join(REPO_ROOT, relPath) + let content: string + try { + content = readFileSync(full, 'utf8') + } catch { + return + } + const lines = content.split('\n') + + // First pass: collect every hand-built path occurrence outside a + // "Compute paths" step. Per the mantra, a single reference is fine + // — what's banned is reconstructing the same path 2+ times. + type PathHit = { + line: number + snippet: string + pathStr: string + } + const occurrences = new Map() + + for (let i = 0; i < lines.length; i++) { + const line = lines[i]! + if (/^\s*#/.test(line)) { + // Skip comment lines from C scan; they're under D below. + continue + } + if (isInsideComputePathsBlock(lines, i)) { + // Inside the canonical construction step — exempt. + continue + } + WORKFLOW_PATH_RE.lastIndex = 0 + WORKFLOW_GH_EXPR_PATH_RE.lastIndex = 0 + const matches: string[] = [] + let m: RegExpExecArray | null + while ((m = WORKFLOW_PATH_RE.exec(line)) !== null) { + matches.push(m[0]) + } + while ((m = WORKFLOW_GH_EXPR_PATH_RE.exec(line)) !== null) { + matches.push(m[0]) + } + for (const pathStr of matches) { + const list = occurrences.get(pathStr) ?? [] + list.push({ line: i + 1, snippet: line.trim(), pathStr }) + occurrences.set(pathStr, list) + } + } + + // Flag every occurrence of a shape that appears 2+ times. + for (const [pathStr, hits] of occurrences) { + if (hits.length < 2) { + continue + } + for (const hit of hits) { + findings.push({ + rule: 'C', + file: relPath, + line: hit.line, + snippet: hit.snippet, + message: `Workflow constructs the same path ${hits.length} times: ${pathStr}`, + fix: 'Add a "Compute paths" step (id: paths) early in the job that computes this path ONCE and exposes it via $GITHUB_OUTPUT. Reference as ${{ steps.paths.outputs. }} in subsequent steps. References of the constructed value are unlimited; reconstructing is the violation.', + }) + } + } + + // Rule D: comments encoding a fully-qualified multi-stage path + // (separate scan since it has different semantics). + for (let i = 0; i < lines.length; i++) { + const line = lines[i]! + if (!/^\s*#/.test(line)) { + continue + } + const literalShape = + /build\/(?:dev|prod|shared)\/[a-z0-9-]+\/(?:wasm\/)?out\/(?:Final|Release|Stripped|Compressed|Optimized|Synced)/i + if (literalShape.test(line)) { + findings.push({ + rule: 'D', + file: relPath, + line: i + 1, + snippet: line.trim(), + message: 'Comment encodes a fully-qualified path string.', + fix: 'Cite the canonical paths.mts (e.g. "see packages//scripts/paths.mts:getBuildPaths()") instead of duplicating the path string. Comments may describe structure with placeholders ("/") but should not be a parsable path.', + }) + } + } +} + +// ────────────────────────────────────────────────────────────────── +// Rule G: Makefile / Dockerfile / shell scan +// ────────────────────────────────────────────────────────────────── + +const SCRIPT_HAND_BUILT_RE = + /build\/\$?\{?(?:BUILD_MODE|MODE|prod|dev)\}?\/[\w${}.-]*\/out\/(?:Final|Release|Stripped|Compressed|Optimized|Synced)/g + +const scanScriptFile = (relPath: string): void => { + const full = path.join(REPO_ROOT, relPath) + let content: string + try { + content = readFileSync(full, 'utf8') + } catch { + return + } + const lines = content.split('\n') + const isDockerfile = + /Dockerfile/i.test(relPath) || /\.glibc$|\.musl$/.test(relPath) + + // First pass: collect every multi-stage path occurrence in this file, + // scoped per Dockerfile stage (each `FROM ... AS ...` starts a new + // scope where ENV/ARG don't propagate). + type Hit = { line: number; text: string; pathStr: string; stage: number } + const hits: Hit[] = [] + let stage = 0 + for (let i = 0; i < lines.length; i++) { + const line = lines[i]! + if (/^\s*#/.test(line)) { + // Skip comments — documentation, not construction. + continue + } + if (isDockerfile && /^FROM\s+/i.test(line)) { + stage += 1 + continue + } + SCRIPT_HAND_BUILT_RE.lastIndex = 0 + let m: RegExpExecArray | null + while ((m = SCRIPT_HAND_BUILT_RE.exec(line)) !== null) { + hits.push({ + line: i + 1, + text: line.trim(), + pathStr: m[0], + stage, + }) + } + } + + // Group by (stage, pathStr) — only flag when a path is built 2+ + // times within the SAME Dockerfile stage (or anywhere in non- + // Dockerfile scripts, where stages don't apply). + const grouped = new Map() + for (const h of hits) { + const key = `${h.stage}::${h.pathStr}` + const list = grouped.get(key) ?? [] + list.push(h) + grouped.set(key, list) + } + for (const [, list] of grouped) { + if (list.length < 2) { + continue + } + for (const hit of list) { + findings.push({ + rule: 'G', + file: relPath, + line: hit.line, + snippet: hit.text, + message: `Hand-built multi-stage path constructed ${list.length} times in this file: ${hit.pathStr}`, + fix: 'Assign to a variable / ENV once near the top of the script / Dockerfile stage, with a comment naming the canonical paths.mts. Reference the variable everywhere downstream. References of a single construction are unlimited; reconstructing the same path is the violation.', + }) + } + } +} + +// ────────────────────────────────────────────────────────────────── +// Rule F: cross-file path repetition +// ────────────────────────────────────────────────────────────────── + +const checkRuleF = (): void => { + // A path is "constructed" each time we see a new path.join with a + // matching shape. Group findings of Rule A by their snippet shape; + // when the same shape appears in 2+ files, demote them to Rule F so + // the message is more accurate. + const byShape = new Map() + for (const f of findings) { + if (f.rule !== 'A') { + continue + } + // Normalize: strip whitespace, identifiers, surrounding context; + // keep just the literal path-segment shape. + const literalsRe = /'[^']*'|"[^"]*"/g + const literals = (f.snippet.match(literalsRe) ?? []).join(',') + if (!literals) { + continue + } + const list = byShape.get(literals) ?? [] + list.push(f) + byShape.set(literals, list) + } + for (const [shape, list] of byShape) { + if (list.length < 2) { + continue + } + // Promote each Rule-A finding in this group to Rule F so the + // message tells the reader the issue is cross-file repetition, + // not just a single hand-build. + for (const f of list) { + f.rule = 'F' + f.message = `Same path shape constructed in ${list.length} places: ${shape.slice(0, 100)}` + f.fix = + 'Construct this path ONCE in a paths.mts (or build-infra helper) and import the computed value. References of the computed variable are unlimited; re-constructing the same shape twice is the violation.' + } + } +} + +// ────────────────────────────────────────────────────────────────── +// Main +// ────────────────────────────────────────────────────────────────── + +const main = (): number => { + // Scan code files (Rule A + B). + for (const rel of walk( + REPO_ROOT, + p => p.endsWith('.mts') || p.endsWith('.cts'), + )) { + if (isExempt(rel)) { + continue + } + scanCodeFile(rel) + } + // Scan workflows (Rule C + D). + const workflowDir = path.join(REPO_ROOT, '.github', 'workflows') + if (existsSync(workflowDir)) { + for (const rel of walk(workflowDir, p => p.endsWith('.yml'))) { + if (isExempt(rel)) { + continue + } + scanWorkflowFile(rel) + } + } + // Scan scripts/Makefiles/Dockerfiles (Rule G). + for (const rel of walk(REPO_ROOT, p => { + const base = path.basename(p) + return ( + base === 'Makefile' || + base.endsWith('.mk') || + base.endsWith('.Dockerfile') || + base === 'Dockerfile' || + base.endsWith('.glibc') || + base.endsWith('.musl') || + (base.endsWith('.sh') && !p.includes('test/')) + ) + })) { + if (isExempt(rel)) { + continue + } + scanScriptFile(rel) + } + // Promote cross-file Rule-A repeats to Rule F. + checkRuleF() + + // Filter against allowlist. + const blocking = findings.filter(f => !isAllowlisted(f)) + + if (args.values.json) { + process.stdout.write( + JSON.stringify( + { findings: blocking, allowlisted: findings.length - blocking.length }, + null, + 2, + ) + '\n', + ) + return blocking.length === 0 ? 0 : 1 + } + + if (blocking.length === 0) { + if (!args.values.quiet) { + logger.success('Path-hygiene check passed (1 path, 1 reference)') + if (findings.length > 0) { + logger.substep(`${findings.length} finding(s) allowlisted`) + } + } + return 0 + } + + logger.error(`Path-hygiene check FAILED — ${blocking.length} finding(s)`) + logger.log('') + logger.log('Mantra: 1 path, 1 reference') + logger.log('') + for (const f of blocking) { + logger.log(` [${f.rule}] ${f.file}:${f.line}`) + logger.log(` ${f.snippet}`) + logger.log(` → ${f.message}`) + if (args.values['show-hashes']) { + logger.log(` snippet_hash: ${snippetHash(f.snippet)}`) + } + if (args.values.explain) { + logger.log(` Fix: ${f.fix}`) + } + logger.log('') + } + if (!args.values.explain) { + logger.log('Run with --explain to see fix suggestions per finding.') + logger.log( + 'Add intentional exceptions to .github/paths-allowlist.yml with a `reason` field.', + ) + logger.log( + 'Run with --show-hashes to print the snippet_hash for each finding (drift-resistant allowlisting).', + ) + } + return 1 +} + +try { + process.exitCode = main() +} catch (e) { + logger.error(`Path-hygiene gate crashed: ${e}`) + process.exitCode = 2 +} diff --git a/.claude/skills/path-guard/reference/claude-md-rule.md b/.claude/skills/path-guard/reference/claude-md-rule.md new file mode 100644 index 00000000..3e32b1ba --- /dev/null +++ b/.claude/skills/path-guard/reference/claude-md-rule.md @@ -0,0 +1,29 @@ + + +## 1 path, 1 reference + +**A path is *constructed* exactly once. Everywhere else *references* the constructed value.** + +Referencing a single computed path many times is fine — that's the whole point of computing it once. What's banned is *re-constructing* the same path in multiple places, because that's where drift is born. + +Three concrete shapes: + +1. **Within a package** — every script, test, and lib file that needs a build path imports it from the package's `scripts/paths.mts` (or `lib/paths.mts`). No `path.join('build', mode, ...)` outside that module. + +2. **Across packages** — when package B consumes package A's output, B imports A's `paths.mts` via the workspace `exports` field. Never `path.join(PKG, '..', '', 'build', ...)`. The R28 yoga/ink bug — ink hand-building yoga's wasm path and missing the `wasm/` segment — is the canonical failure mode this rule prevents. + +3. **Workflows, Dockerfiles, shell scripts** — they can't `import` TS, so they construct the string once and reference it everywhere downstream. Workflows: a "Compute paths" step exposes `steps.paths.outputs.final_dir`; later steps read `${{ steps.paths.outputs.final_dir }}`. Dockerfiles/shell: assign once to a variable / `ENV`, reference by name thereafter. Each canonical construction carries a comment naming the source-of-truth `paths.mts`. **Re-building** the same path in a second step is the violation, not referring to the constructed value many times. + +Comments may describe path *structure* with placeholders ("`/`" or "`${BUILD_MODE}/${PLATFORM_ARCH}`") but should not encode a complete literal path string. Code execution takes priority over docs: violations in `.mts`/`.cts`, Makefiles, Dockerfiles, workflow YAML, and shell scripts are blocking. README and doc-comment violations are advisory unless they contain a fully-qualified path with no parametric placeholders. + +### Three-level enforcement + +- **Hook** — `.claude/hooks/path-guard/` blocks `Edit`/`Write` calls that would introduce a violation in a `.mts`/`.cts` file. Refusal at edit time stops new duplication from landing. +- **Gate** — `scripts/check-paths.mts` runs in `pnpm check`. Fails the build on any violation that isn't allowlisted in `.github/paths-allowlist.yml`. +- **Skill** — `/path-guard` audits the repo and fixes findings; `/path-guard check` reports only; `/path-guard install` drops the gate + hook + rule into a fresh repo. + +The mantra is intentionally short so it sticks: **1 path, 1 reference**. When in doubt, find the canonical owner and import from it. diff --git a/.claude/skills/path-guard/reference/paths-allowlist.yml.tmpl b/.claude/skills/path-guard/reference/paths-allowlist.yml.tmpl new file mode 100644 index 00000000..e2746660 --- /dev/null +++ b/.claude/skills/path-guard/reference/paths-allowlist.yml.tmpl @@ -0,0 +1,28 @@ +# Path-hygiene gate allowlist. +# Mantra: 1 path, 1 reference. +# +# Each entry exempts a specific finding from `scripts/check-paths.mts`. +# Entries MUST carry a `reason` so the list stays audit-able and +# entries can be removed when the underlying code changes. +# +# Schema (all top-level keys optional except `reason`): +# +# - rule: Rule letter (A, B, C, D, F, G). Omit to match any rule. +# file: Substring match against the relative file path. +# pattern: Substring match against the offending snippet. +# line: Line number; matches if within ±2 of the finding. +# reason: Why this site is genuinely exempt. Required. +# +# Prefer narrow entries (rule + file + line + pattern) over blanket +# `file:` entries that exempt the whole file. Genuine exemptions are +# rare — most "false positives" should be reported as gate bugs. +# +# Example: +# +# - rule: A +# file: packages/foo/scripts/legacy-build.mts +# line: 42 +# pattern: "path.join(testDir, 'out', 'Final')" +# reason: | +# legacy-build.mts is scheduled for removal in v2.0; refactoring +# its path construction now would conflict with the rewrite. diff --git a/.claude/skills/security-scan/SKILL.md b/.claude/skills/security-scan/SKILL.md index 7f2fd77e..0c2cf12e 100644 --- a/.claude/skills/security-scan/SKILL.md +++ b/.claude/skills/security-scan/SKILL.md @@ -2,6 +2,7 @@ name: security-scan description: Runs a multi-tool security scan — AgentShield for Claude config, zizmor for GitHub Actions, and optionally Socket CLI for dependency scanning. Produces an A-F graded security report. Use after modifying `.claude/` config, hooks, agents, or GitHub Actions workflows, and before releases. user-invocable: true +allowed-tools: Task, Bash, Read, Grep, Glob --- # Security Scan diff --git a/.config/tsconfig.check.json b/.config/tsconfig.check.json index 92fbca1e..02e43cd6 100644 --- a/.config/tsconfig.check.json +++ b/.config/tsconfig.check.json @@ -11,5 +11,5 @@ "verbatimModuleSyntax": false }, "include": ["../**/*.ts", "../**/*.mts"], - "exclude": ["../**/node_modules/**/*"] + "exclude": ["../**/.cache/**", "../**/node_modules/**/*"] } diff --git a/.git-hooks/_helpers.mts b/.git-hooks/_helpers.mts new file mode 100644 index 00000000..b8a29978 --- /dev/null +++ b/.git-hooks/_helpers.mts @@ -0,0 +1,276 @@ +// Shared helpers for git hooks — API-key allowlist + ANSI colors + +// content scanners. Imported by .git-hooks/{commit-msg,pre-commit, +// pre-push}.mts. No third-party deps; uses only Node built-ins. +// +// Requires Node 25+ for stable .mts type-stripping (no flag needed). +// Earlier Node versions either lacked --experimental-strip-types or +// shipped it under a flag, both unacceptable for hook ergonomics. + +import { spawnSync } from 'node:child_process' +import { existsSync, readFileSync, statSync } from 'node:fs' + +// Hard-fail if Node is below 25. This runs at module load — every +// hook invocation imports _helpers.mts before doing anything, so the +// version check is the first thing that happens. +const NODE_MIN_MAJOR = 25 +const nodeMajor = Number.parseInt( + process.versions.node.split('.')[0] || '0', + 10, +) +if (nodeMajor < NODE_MIN_MAJOR) { + process.stderr.write( + `\x1b[0;31m✗ Hook requires Node >= ${NODE_MIN_MAJOR}.0.0 (have v${process.versions.node})\x1b[0m\n`, + ) + process.stderr.write( + 'Install Node 25+ — these hooks rely on stable .mts type stripping.\n', + ) + process.exit(1) +} + +// ── Allowlist constants ──────────────────────────────────────────── +// These exempt known-safe matches from the API-key scanner. Each +// allowlist entry is a substring; if the matched line contains it, +// the line is dropped from the findings. + +// Real public API key shipped in socket-lib test fixtures. Safe to +// appear anywhere in the fleet. +export const ALLOWED_PUBLIC_KEY = + 'sktsec_t_--RAN5U4ivauy4w37-6aoKyYPDt5ZbaT5JBVMqiwKo_api' + +// Substring marker used in test fixtures (see +// socket-lib/test/unit/utils/fake-tokens.ts). Lines containing this +// are treated as test fixtures. +export const FAKE_TOKEN_MARKER = 'socket-test-fake-token' + +// Legacy lib-scoped marker — accepted during the rename from +// `socket-lib-test-fake-token` to `socket-test-fake-token`. Drop when +// lib's rename PR lands. +export const FAKE_TOKEN_LEGACY = 'socket-lib-test-fake-token' + +// Name of the env var used in shell examples; not a token value. +export const SOCKET_SECURITY_ENV = 'SOCKET_SECURITY_API_KEY=' + +// ── ANSI colors ──────────────────────────────────────────────────── + +export const RED = '\x1b[0;31m' +export const GREEN = '\x1b[0;32m' +export const YELLOW = '\x1b[1;33m' +export const NC = '\x1b[0m' + +// ── Output helpers ───────────────────────────────────────────────── + +export const out = (msg: string): void => { + process.stdout.write(msg + '\n') +} + +export const err = (msg: string): void => { + process.stderr.write(msg + '\n') +} + +export const red = (msg: string): string => `${RED}${msg}${NC}` +export const green = (msg: string): string => `${GREEN}${msg}${NC}` +export const yellow = (msg: string): string => `${YELLOW}${msg}${NC}` + +// ── API-key allowlist filter ─────────────────────────────────────── + +// Drops any line that matches an allowlist entry. +export const filterAllowedApiKeys = (lines: readonly string[]): string[] => { + return lines.filter( + line => + !line.includes(ALLOWED_PUBLIC_KEY) && + !line.includes(FAKE_TOKEN_MARKER) && + !line.includes(FAKE_TOKEN_LEGACY) && + !line.includes(SOCKET_SECURITY_ENV) && + !line.includes('.example'), + ) +} + +// ── Personal-path scanner ────────────────────────────────────────── + +// Real personal paths to flag: /Users/foo/, /home/foo/, C:\Users\foo\. +const PERSONAL_PATH_RE = + /(\/Users\/[^/\s]+\/|\/home\/[^/\s]+\/|C:\\Users\\[^\\]+\\)/ + +// Placeholders we ALLOW (documentation, not real leaks): any path +// component wrapped in <...> or starting with $VAR / ${VAR}. +const PERSONAL_PATH_PLACEHOLDER_RE = + /(\/Users\/<[^>]*>\/|\/home\/<[^>]*>\/|C:\\Users\\<[^>]*>\\|\/Users\/\$\{?[A-Z_]+\}?\/|\/home\/\$\{?[A-Z_]+\}?\/)/ + +export type LineHit = { lineNumber: number; line: string } + +// Returns lines that contain a real personal path (excludes lines +// that are pure placeholders). Caller decides what to do with hits. +export const scanPersonalPaths = (text: string): LineHit[] => { + const hits: LineHit[] = [] + const lines = text.split('\n') + for (let i = 0; i < lines.length; i++) { + const line = lines[i]! + if (!PERSONAL_PATH_RE.test(line)) { + continue + } + if (PERSONAL_PATH_PLACEHOLDER_RE.test(line)) { + // Has placeholder — but might also have a real path on the + // same line. Strip placeholder forms and re-test. + const stripped = line.replace( + new RegExp(PERSONAL_PATH_PLACEHOLDER_RE, 'g'), + '', + ) + if (!PERSONAL_PATH_RE.test(stripped)) { + continue + } + } + hits.push({ lineNumber: i + 1, line }) + } + return hits +} + +// ── Secret scanners ──────────────────────────────────────────────── + +const SOCKET_API_KEY_RE = /sktsec_[a-zA-Z0-9_-]+/ +const AWS_KEY_RE = /(aws_access_key|aws_secret|\bAKIA[0-9A-Z]{16}\b)/i +const GITHUB_TOKEN_RE = /gh[ps]_[a-zA-Z0-9]{36}/ +const PRIVATE_KEY_RE = /-----BEGIN (RSA |EC |DSA )?PRIVATE KEY-----/ + +export const scanSocketApiKeys = (text: string): LineHit[] => { + const hits: LineHit[] = [] + const lines = text.split('\n') + for (let i = 0; i < lines.length; i++) { + const line = lines[i]! + if (SOCKET_API_KEY_RE.test(line)) { + hits.push({ lineNumber: i + 1, line }) + } + } + return filterAllowedApiKeys(hits.map(h => h.line)).map(line => ({ + lineNumber: hits.find(h => h.line === line)!.lineNumber, + line, + })) +} + +export const scanAwsKeys = (text: string): LineHit[] => { + const hits: LineHit[] = [] + const lines = text.split('\n') + for (let i = 0; i < lines.length; i++) { + const line = lines[i]! + if (AWS_KEY_RE.test(line)) { + hits.push({ lineNumber: i + 1, line }) + } + } + return hits +} + +export const scanGitHubTokens = (text: string): LineHit[] => { + const hits: LineHit[] = [] + const lines = text.split('\n') + for (let i = 0; i < lines.length; i++) { + const line = lines[i]! + if (GITHUB_TOKEN_RE.test(line)) { + hits.push({ lineNumber: i + 1, line }) + } + } + return hits +} + +export const scanPrivateKeys = (text: string): LineHit[] => { + const hits: LineHit[] = [] + const lines = text.split('\n') + for (let i = 0; i < lines.length; i++) { + const line = lines[i]! + if (PRIVATE_KEY_RE.test(line)) { + hits.push({ lineNumber: i + 1, line }) + } + } + return hits +} + +// ── npx/dlx scanner ──────────────────────────────────────────────── + +const NPX_DLX_RE = /\b(npx|pnpm dlx|yarn dlx)\b/ + +export const scanNpxDlx = (text: string): LineHit[] => { + const hits: LineHit[] = [] + const lines = text.split('\n') + for (let i = 0; i < lines.length; i++) { + const line = lines[i]! + if (NPX_DLX_RE.test(line) && !line.includes('# zizmor:')) { + hits.push({ lineNumber: i + 1, line }) + } + } + return hits +} + +// ── AI attribution scanner ───────────────────────────────────────── + +const AI_ATTRIBUTION_RE = + /(Generated with.*(Claude|AI)|Co-Authored-By: Claude|Co-Authored-By: AI|🤖 Generated|AI generated|@anthropic\.com|Assistant:|Generated by Claude|Machine generated|Claude Code)/i + +export const containsAiAttribution = (text: string): boolean => + AI_ATTRIBUTION_RE.test(text) + +export const stripAiAttribution = ( + text: string, +): { cleaned: string; removed: number } => { + const lines = text.split('\n') + const kept: string[] = [] + let removed = 0 + for (const line of lines) { + if (AI_ATTRIBUTION_RE.test(line)) { + removed++ + } else { + kept.push(line) + } + } + return { cleaned: kept.join('\n'), removed } +} + +// ── File classification ──────────────────────────────────────────── + +// Files we never scan: hooks themselves, husky shims, test fixtures. +const SKIP_FILE_RE = + /\.(test|spec)\.(m?[jt]s|tsx?|cts|mts)$|\.example$|\/test\/|\/tests\/|fixtures\/|\.git-hooks\/|\.husky\/|node_modules\/|pnpm-lock\.yaml/ + +export const shouldSkipFile = (filePath: string): boolean => + SKIP_FILE_RE.test(filePath) + +// Returns file content as a string. For binaries, runs `strings` to +// extract printable byte sequences (catches paths embedded in WASM +// or other compiled artifacts). +export const readFileForScan = (filePath: string): string => { + if (!existsSync(filePath)) { + return '' + } + try { + if (statSync(filePath).isDirectory()) { + return '' + } + } catch { + return '' + } + // Detect binary via grep -I (matches text-only); if grep says + // binary, fall back to `strings`. + const grepResult = spawnSync('grep', ['-qI', '', filePath]) + if (grepResult.status === 0) { + // Text file. + try { + return readFileSync(filePath, 'utf8') + } catch { + return '' + } + } + // Binary — extract strings. + const stringsResult = spawnSync('strings', [filePath], { + encoding: 'utf8', + }) + return stringsResult.stdout || '' +} + +// ── Git wrappers ─────────────────────────────────────────────────── + +export const git = (...args: string[]): string => { + const result = spawnSync('git', args, { encoding: 'utf8' }) + return result.stdout.trim() +} + +export const gitLines = (...args: string[]): string[] => { + const out = git(...args) + return out ? out.split('\n') : [] +} diff --git a/.git-hooks/_helpers.sh b/.git-hooks/_helpers.sh deleted file mode 100644 index 378333f1..00000000 --- a/.git-hooks/_helpers.sh +++ /dev/null @@ -1,39 +0,0 @@ -#!/bin/bash -# Shared helpers for git hooks. -# Sourced by .git-hooks/commit-msg, pre-commit, pre-push. -# -# Constants -# --------- -# ALLOWED_PUBLIC_KEY Real public API key shipped in socket-lib test -# fixtures. Safe to appear in commits anywhere. -# FAKE_TOKEN_MARKER Substring marker used in fleet test fixtures. -# FAKE_TOKEN_LEGACY Legacy lib-scoped marker — accepted during the -# rename from `socket-lib-test-fake-token` to -# `socket-test-fake-token`. Drop when socket-lib's -# fixture rename PR lands. -# SOCKET_SECURITY_ENV Env var name used in shell examples; not a token. -# -# Functions -# --------- -# filter_allowed_api_keys Reads stdin, drops allowlist matches. -# -# Colors: RED, GREEN, YELLOW, NC - -# shellcheck disable=SC2034 # constants sourced by other hooks -ALLOWED_PUBLIC_KEY="sktsec_t_--RAN5U4ivauy4w37-6aoKyYPDt5ZbaT5JBVMqiwKo_api" -FAKE_TOKEN_MARKER="socket-test-fake-token" -FAKE_TOKEN_LEGACY="socket-lib-test-fake-token" -SOCKET_SECURITY_ENV="SOCKET_SECURITY_API_KEY=" - -RED='\033[0;31m' -GREEN='\033[0;32m' -YELLOW='\033[1;33m' -NC='\033[0m' - -filter_allowed_api_keys() { - grep -v "$ALLOWED_PUBLIC_KEY" \ - | grep -v "$FAKE_TOKEN_MARKER" \ - | grep -v "$FAKE_TOKEN_LEGACY" \ - | grep -v "$SOCKET_SECURITY_ENV" \ - | grep -v '\.example' -} diff --git a/.git-hooks/commit-msg b/.git-hooks/commit-msg deleted file mode 100755 index 7acf4c56..00000000 --- a/.git-hooks/commit-msg +++ /dev/null @@ -1,90 +0,0 @@ -#!/bin/bash -# Socket Security Commit-msg Hook -# Additional security layer - validates commit even if pre-commit was bypassed. - -set -e - -# shellcheck source=./_helpers.sh -. "$(dirname "$0")/_helpers.sh" - -ERRORS=0 - -# Get files in this commit (for security checks). -COMMITTED_FILES=$(git diff --cached --name-only --diff-filter=ACM 2>/dev/null || printf "\n") - -# Quick checks for critical issues in committed files. -if [ -n "$COMMITTED_FILES" ]; then - for file in $COMMITTED_FILES; do - if [ -f "$file" ]; then - # Check for Socket API keys (except allowed). - if grep -E 'sktsec_[a-zA-Z0-9_-]+' "$file" 2>/dev/null | filter_allowed_api_keys | grep -q .; then - printf "${RED}✗ SECURITY: Potential API key detected in commit!${NC}\n" - printf "File: %s\n" "$file" - ERRORS=$((ERRORS + 1)) - fi - - # Check for .env files. - if echo "$file" | grep -qE '^\.env(\.[^/]+)?$' && ! echo "$file" | grep -qE '^\.env\.(example|test)$'; then - printf "${RED}✗ SECURITY: .env file in commit!${NC}\n" - ERRORS=$((ERRORS + 1)) - fi - fi - done -fi - -# Block Linear issue references in the commit message. -# Linear tracking lives in Linear; keep commit history tool-agnostic. -# Team keys enumerated from the Socket workspace. PATCH listed before PAT so -# the engine matches the longer prefix first on strings like "PATCH-123". -COMMIT_MSG_FILE="$1" -LINEAR_TEAM_KEYS='ASK|AUTO|BOT|CE|CORE|DAT|DES|DEV|ENG|INFRA|LAB|MAR|MET|OPS|PAR|PATCH|PAT|PLAT|REA|SALES|SBOM|SEC|SMO|SUP|TES|TI|WEB' -if [ -f "$COMMIT_MSG_FILE" ]; then - LINEAR_HITS=$(grep -vE '^#' "$COMMIT_MSG_FILE" 2>/dev/null \ - | grep -oE "(^|[^A-Za-z0-9_])($LINEAR_TEAM_KEYS)-[0-9]+($|[^A-Za-z0-9_])|linear\.app/[A-Za-z0-9/_-]+" \ - | head -5 || true) - if [ -n "$LINEAR_HITS" ]; then - printf "${RED}✗ Commit message references Linear issue(s):${NC}\n" - printf '%s\n' "$LINEAR_HITS" | sed 's/^/ /' - printf "${RED}Linear tracking lives in Linear. Remove the reference from the commit message.${NC}\n" - ERRORS=$((ERRORS + 1)) - fi -fi - -# Auto-strip AI attribution from commit message. -if [ -f "$COMMIT_MSG_FILE" ]; then - # Create a temporary file to store the cleaned message. - TEMP_FILE=$(mktemp) || { - printf "${RED}✗ Failed to create temporary file${NC}\n" >&2 - exit 1 - } - # Ensure cleanup on exit - trap 'rm -f "$TEMP_FILE"' EXIT - REMOVED_LINES=0 - - # Read the commit message line by line and filter out AI attribution. - while IFS= read -r line || [ -n "$line" ]; do - # Check if this line contains AI attribution patterns. - if echo "$line" | grep -qiE "(Generated with|Co-Authored-By: Claude|Co-Authored-By: AI|🤖 Generated|AI generated|Claude Code|@anthropic|Assistant:|Generated by Claude|Machine generated)"; then - REMOVED_LINES=$((REMOVED_LINES + 1)) - else - # Line doesn't contain AI attribution, keep it. - printf '%s\n' "$line" >> "$TEMP_FILE" - fi - done < "$COMMIT_MSG_FILE" - - # Replace the original commit message with the cleaned version. - if [ $REMOVED_LINES -gt 0 ]; then - mv "$TEMP_FILE" "$COMMIT_MSG_FILE" - printf "${GREEN}✓ Auto-stripped${NC} $REMOVED_LINES AI attribution line(s) from commit message\n" - else - # No lines were removed, just clean up the temp file. - rm -f "$TEMP_FILE" - fi -fi - -if [ $ERRORS -gt 0 ]; then - printf "${RED}✗ Commit blocked by security validation${NC}\n" - exit 1 -fi - -exit 0 diff --git a/.git-hooks/commit-msg.mts b/.git-hooks/commit-msg.mts new file mode 100644 index 00000000..ae9186e1 --- /dev/null +++ b/.git-hooks/commit-msg.mts @@ -0,0 +1,90 @@ +#!/usr/bin/env node +// Socket Security Commit-msg Hook +// +// Two responsibilities: +// 1. Block commits that introduce API keys / .env files (security +// layer that runs even when pre-commit is bypassed via +// `--no-verify`). +// 2. Auto-strip AI attribution lines from the commit message before +// git records the commit. +// +// Wired via .husky/commit-msg, which invokes this with the path to the +// commit message file as argv[2] (after the script path itself). + +import { existsSync, readFileSync, writeFileSync } from 'node:fs' + +import { basename } from 'node:path' +import process from 'node:process' + +import { + err, + gitLines, + green, + out, + red, + readFileForScan, + scanSocketApiKeys, + shouldSkipFile, + stripAiAttribution, +} from './_helpers.mts' + +const main = (): number => { + let errors = 0 + const committedFiles = gitLines( + 'diff', + '--cached', + '--name-only', + '--diff-filter=ACM', + ) + + for (const file of committedFiles) { + if (!file || shouldSkipFile(file)) { + continue + } + const text = readFileForScan(file) + if (!text) { + continue + } + + // Socket API keys (allowlist-aware). + const apiHits = scanSocketApiKeys(text) + if (apiHits.length > 0) { + out(red('✗ SECURITY: Potential API key detected in commit!')) + out(`File: ${file}`) + errors++ + } + + // .env files at any depth — allow only .env.example, .env.test, + // .env.precommit (templates / tracked placeholders). + const base = basename(file) + if ( + /^\.env(\.[^/]+)?$/.test(base) && + !/^\.env\.(example|test|precommit)$/.test(base) + ) { + out(red('✗ SECURITY: .env file in commit!')) + out(`File: ${file}`) + errors++ + } + } + + // Auto-strip AI attribution lines from the commit message. + const commitMsgFile = process.argv[2] + if (commitMsgFile && existsSync(commitMsgFile)) { + const original = readFileSync(commitMsgFile, 'utf8') + const { cleaned, removed } = stripAiAttribution(original) + if (removed > 0) { + writeFileSync(commitMsgFile, cleaned) + out( + `${green('✓ Auto-stripped')} ${removed} AI attribution line(s) from commit message`, + ) + } + } + + if (errors > 0) { + err(red('✗ Commit blocked by security validation')) + return 1 + } + return 0 +} + +process.exit(main()) diff --git a/.git-hooks/pre-commit b/.git-hooks/pre-commit deleted file mode 100755 index 46f0c7da..00000000 --- a/.git-hooks/pre-commit +++ /dev/null @@ -1,117 +0,0 @@ -#!/bin/bash -# Socket Security Checks -# Prevents committing sensitive data and common mistakes. - -set -e - -# shellcheck source=./_helpers.sh -. "$(dirname "$0")/_helpers.sh" - -echo "${GREEN}Running Socket Security checks...${NC}" - -# Get list of staged files. -STAGED_FILES=$(git diff --cached --name-only --diff-filter=ACM) - -if [ -z "$STAGED_FILES" ]; then - echo "${GREEN}✓ No files to check${NC}" - exit 0 -fi - -ERRORS=0 - -# Check for .DS_Store files. -echo "Checking for .DS_Store files..." -if echo "$STAGED_FILES" | grep -q '\.DS_Store'; then - echo "${RED}✗ ERROR: .DS_Store file detected!${NC}" - echo "$STAGED_FILES" | grep '\.DS_Store' - ERRORS=$((ERRORS + 1)) -fi - -# Check for log files. -echo "Checking for log files..." -if echo "$STAGED_FILES" | grep -E '\.log$' | grep -v 'test.*\.log'; then - echo "${RED}✗ ERROR: Log file detected!${NC}" - echo "$STAGED_FILES" | grep -E '\.log$' | grep -v 'test.*\.log' - ERRORS=$((ERRORS + 1)) -fi - -# Check for .env files. -echo "Checking for .env files..." -if echo "$STAGED_FILES" | grep -E '^\.env(\.local)?$'; then - echo "${RED}✗ ERROR: .env or .env.local file detected!${NC}" - echo "$STAGED_FILES" | grep -E '^\.env(\.local)?$' - echo "These files should never be committed. Use .env.example instead." - ERRORS=$((ERRORS + 1)) -fi - -# Check for hardcoded user paths (generic detection). -echo "Checking for hardcoded personal paths..." -for file in $STAGED_FILES; do - if [ -f "$file" ]; then - # Skip test files and hook scripts. - if echo "$file" | grep -qE '\.(test|spec)\.|/test/|/tests/|fixtures/|\.git-hooks/|\.husky/'; then - continue - fi - - # Check for common user path patterns. - if grep -E '(/Users/[^/\s]+/|/home/[^/\s]+/|C:\\Users\\[^\\]+\\)' "$file" 2>/dev/null | grep -q .; then - echo "${RED}✗ ERROR: Hardcoded personal path found in: $file${NC}" - grep -n -E '(/Users/[^/\s]+/|/home/[^/\s]+/|C:\\Users\\[^\\]+\\)' "$file" | head -3 - echo "Replace with relative paths or environment variables." - ERRORS=$((ERRORS + 1)) - fi - fi -done - -# Check for Socket API keys. -echo "Checking for API keys..." -for file in $STAGED_FILES; do - if [ -f "$file" ]; then - if grep -E 'sktsec_[a-zA-Z0-9_-]+' "$file" 2>/dev/null | filter_allowed_api_keys | grep -q .; then - echo "${YELLOW}⚠ WARNING: Potential API key found in: $file${NC}" - grep -n 'sktsec_' "$file" | filter_allowed_api_keys | head -3 - echo "If this is a real API key, DO NOT COMMIT IT." - fi - fi -done - -# Check for common secret patterns. -echo "Checking for potential secrets..." -for file in $STAGED_FILES; do - if [ -f "$file" ]; then - # Skip test files, example files, and hook scripts. - if echo "$file" | grep -qE '\.(test|spec)\.(m?[jt]s|tsx?)$|\.example$|/test/|/tests/|fixtures/|\.git-hooks/|\.husky/'; then - continue - fi - - # Check for AWS keys. - if grep -iE '(aws_access_key|aws_secret|AKIA[0-9A-Z]{16})' "$file" 2>/dev/null | grep -q .; then - echo "${RED}✗ ERROR: Potential AWS credentials found in: $file${NC}" - grep -n -iE '(aws_access_key|aws_secret|AKIA[0-9A-Z]{16})' "$file" | head -3 - ERRORS=$((ERRORS + 1)) - fi - - # Check for GitHub tokens. - if grep -E 'gh[ps]_[a-zA-Z0-9]{36}' "$file" 2>/dev/null | grep -q .; then - echo "${RED}✗ ERROR: Potential GitHub token found in: $file${NC}" - grep -n -E 'gh[ps]_[a-zA-Z0-9]{36}' "$file" | head -3 - ERRORS=$((ERRORS + 1)) - fi - - # Check for private keys. - if grep -E '-----BEGIN (RSA |EC |DSA )?PRIVATE KEY-----' "$file" 2>/dev/null | grep -q .; then - echo "${RED}✗ ERROR: Private key found in: $file${NC}" - ERRORS=$((ERRORS + 1)) - fi - fi -done - -if [ $ERRORS -gt 0 ]; then - echo "" - echo "${RED}✗ Security check failed with $ERRORS error(s).${NC}" - echo "Fix the issues above and try again." - exit 1 -fi - -echo "${GREEN}✓ All security checks passed!${NC}" -exit 0 diff --git a/.git-hooks/pre-commit.mts b/.git-hooks/pre-commit.mts new file mode 100644 index 00000000..61df1057 --- /dev/null +++ b/.git-hooks/pre-commit.mts @@ -0,0 +1,190 @@ +#!/usr/bin/env node +// Socket Security Pre-commit Hook +// +// Local-defense layer: scans staged files for sensitive content +// before git records the commit. Mandatory enforcement re-runs in +// pre-push for the final gate. +// +// Bypassable: --no-verify skips this hook entirely. Use sparingly +// (hotfixes, history operations, pre-build states). + +import { basename } from 'node:path' +import process from 'node:process' + +import { + err, + gitLines, + green, + out, + red, + readFileForScan, + scanAwsKeys, + scanGitHubTokens, + scanNpxDlx, + scanPersonalPaths, + scanPrivateKeys, + scanSocketApiKeys, + shouldSkipFile, + yellow, +} from './_helpers.mts' + +const main = (): number => { + out(green('Running Socket Security checks...')) + const stagedFiles = gitLines( + 'diff', + '--cached', + '--name-only', + '--diff-filter=ACM', + ) + if (stagedFiles.length === 0) { + out(green('✓ No files to check')) + return 0 + } + + let errors = 0 + + // .DS_Store files. + out('Checking for .DS_Store files...') + const dsStores = stagedFiles.filter(f => f.includes('.DS_Store')) + if (dsStores.length > 0) { + out(red('✗ ERROR: .DS_Store file detected!')) + dsStores.forEach(f => out(f)) + errors++ + } + + // Log files (ignore test logs). + out('Checking for log files...') + const logs = stagedFiles.filter( + f => f.endsWith('.log') && !/test.*\.log$/.test(f), + ) + if (logs.length > 0) { + out(red('✗ ERROR: Log file detected!')) + logs.forEach(f => out(f)) + errors++ + } + + // .env files at any depth — allow only .env.example, .env.test, + // .env.precommit (templates / tracked placeholders). Match the + // commit-msg.mts behavior: a nested .env.local is just as much a + // leak as a root-level one. basename() catches both. + out('Checking for .env files...') + const envFiles = stagedFiles.filter(f => { + const base = basename(f) + return ( + /^\.env(\.[^/]+)?$/.test(base) && + !/^\.env\.(example|test|precommit)$/.test(base) + ) + }) + if (envFiles.length > 0) { + out(red('✗ ERROR: .env file detected!')) + envFiles.forEach(f => out(f)) + out( + 'These files should never be committed. Use .env.example for templates.', + ) + errors++ + } + + // Hardcoded personal paths. + out('Checking for hardcoded personal paths...') + for (const file of stagedFiles) { + if (shouldSkipFile(file)) { + continue + } + const text = readFileForScan(file) + if (!text) { + continue + } + const hits = scanPersonalPaths(text) + if (hits.length > 0) { + out(red(`✗ ERROR: Hardcoded personal path found in: ${file}`)) + hits.slice(0, 3).forEach(h => out(`${h.lineNumber}:${h.line.trim()}`)) + out('Replace with relative paths or environment variables.') + errors++ + } + } + + // Socket API keys (warning, not blocking). + out('Checking for API keys...') + for (const file of stagedFiles) { + if (shouldSkipFile(file)) { + continue + } + const text = readFileForScan(file) + if (!text) { + continue + } + const hits = scanSocketApiKeys(text) + if (hits.length > 0) { + out(yellow(`⚠ WARNING: Potential API key found in: ${file}`)) + hits.slice(0, 3).forEach(h => out(`${h.lineNumber}:${h.line.trim()}`)) + out('If this is a real API key, DO NOT COMMIT IT.') + } + } + + // Other secret patterns (AWS, GitHub, private keys). + out('Checking for potential secrets...') + for (const file of stagedFiles) { + if (shouldSkipFile(file)) { + continue + } + const text = readFileForScan(file) + if (!text) { + continue + } + + const aws = scanAwsKeys(text) + if (aws.length > 0) { + out(red(`✗ ERROR: Potential AWS credentials found in: ${file}`)) + aws.slice(0, 3).forEach(h => out(`${h.lineNumber}:${h.line.trim()}`)) + errors++ + } + + const gh = scanGitHubTokens(text) + if (gh.length > 0) { + out(red(`✗ ERROR: Potential GitHub token found in: ${file}`)) + gh.slice(0, 3).forEach(h => out(`${h.lineNumber}:${h.line.trim()}`)) + errors++ + } + + const pk = scanPrivateKeys(text) + if (pk.length > 0) { + out(red(`✗ ERROR: Private key found in: ${file}`)) + errors++ + } + } + + // npx/dlx usage. + out('Checking for npx/dlx usage...') + for (const file of stagedFiles) { + if ( + file.includes('node_modules/') || + file.endsWith('pnpm-lock.yaml') || + file.includes('.git-hooks/') + ) { + continue + } + const text = readFileForScan(file) + if (!text) { + continue + } + const hits = scanNpxDlx(text) + if (hits.length > 0) { + out(red(`✗ ERROR: npx/dlx usage found in: ${file}`)) + hits.slice(0, 3).forEach(h => out(`${h.lineNumber}:${h.line.trim()}`)) + out("Use 'pnpm exec ' or 'pnpm run