fix(review): address 4 issues from independent code review

majiayu000 · majiayu000 · commit 2099c177f298 · 2026-04-13T21:43:28.000+08:00
Issue 1 (post-edit-guard suppressor bypass): vg_filter_suppressed now
tracks backtick template-literal depth and triple-double-quote string
depth across lines. A disable comment inside a multiline string/template
literal is ignored, closing the suppressor-bypass vector.

Issue 2 (learn-evaluator tail-1000 truncation): removed the tail -1000
cap on the vg-helper path. The 30-minute cutoff is already enforced
inside vg-helper session-metrics; the cap caused under-counting on busy
sessions that produce &gt;1000 log events per 30-minute window.

Issue 3 (session_metrics.rs missing fields): Rust implementation now
writes top_edited_files, avg_duration_ms, and slow_ops to match the
Python reference (hooks/_lib/session_metrics.py:196-198). Downstream
consumers (gc-scheduled.sh:369) no longer silently drop hot-file data.

Issue 4 (CI over-broad token): removed workflow-level contents:write /
pull-requests:write. Shell-heavy validate-and-test job now runs with
contents:read only. Benchmark reporting is isolated to a new
benchmark-report job that declares the write permissions it needs and
receives bench-output.json via upload-artifact/download-artifact rather
than inheriting a write token from the broad workflow scope.

Constraint: GitHub Actions does not support per-step permission scopes,
so job-level isolation via a separate job is the minimum granularity.
Tested: cargo check clean; 88/88 hook regression tests pass.
diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml
@@ -7,8 +7,7 @@ on:
       - main
 
 permissions:
-  contents: write
-  pull-requests: write
+  contents: read
 
 defaults:
   run:
@@ -158,8 +157,38 @@ jobs:
         shell: bash
         run: bash tests/bench_hook_latency.sh --sla=500 --runs=3
 
-      - name: Store benchmark results
+      - name: Upload benchmark results
         if: runner.os == 'Linux'
+        uses: actions/upload-artifact@v4
+        with:
+          name: bench-output
+          path: bench-output.json
+          if-no-files-found: ignore
+
+      - name: VibeGuard Benchmark (fast)
+        if: runner.os != 'Windows'
+        shell: bash
+        run: bash scripts/benchmark.sh --mode=fast
+
+  benchmark-report:
+    name: Benchmark Report
+    runs-on: ubuntu-latest
+    needs: validate-and-test
+    # Isolated job so write tokens are not exposed to the shell-heavy CI steps above.
+    permissions:
+      contents: write
+      pull-requests: write
+    steps:
+      - name: Checkout
+        uses: actions/checkout@v4
+
+      - name: Download benchmark results
+        uses: actions/download-artifact@v4
+        with:
+          name: bench-output
+          path: .
+
+      - name: Store benchmark results
         uses: benchmark-action/github-action-benchmark@v1
         with:
           name: Hook Latency (P95)
@@ -171,8 +200,3 @@ jobs:
           comment-on-alert: true
           fail-on-alert: false
           comment-always: ${{ github.event_name == 'pull_request' }}
-
-      - name: VibeGuard Benchmark (fast)
-        if: runner.os != 'Windows'
-        shell: bash
-        run: bash scripts/benchmark.sh --mode=fast
diff --git a/hooks/learn-evaluator.sh b/hooks/learn-evaluator.sh
@@ -28,8 +28,10 @@ fi
 
 # Collect session metrics for the last 30 minutes of the current project + correct signal detection
 if [[ -n "$_VG_HELPER" ]]; then
-  LEARN_SUGGESTION=$(tail -1000 "$VIBEGUARD_LOG_FILE" 2>/dev/null \
-    | "$_VG_HELPER" session-metrics "$VIBEGUARD_SESSION_ID" "$VIBEGUARD_PROJECT_LOG_DIR" 2>/dev/null || true)
+  # Pass the full log file — the 30-minute cutoff is enforced inside vg-helper,
+  # so tail-limiting here would under-count events on busy sessions (>1000 events/30 min).
+  LEARN_SUGGESTION=$("$_VG_HELPER" session-metrics "$VIBEGUARD_SESSION_ID" "$VIBEGUARD_PROJECT_LOG_DIR" \
+    < "$VIBEGUARD_LOG_FILE" 2>/dev/null || true)
 else
   _SESSION_METRICS_SCRIPT="$(dirname "$0")/_lib/session_metrics.py"
   LEARN_SUGGESTION=$(VIBEGUARD_LOG_FILE="$VIBEGUARD_LOG_FILE" \
diff --git a/hooks/post-edit-guard.sh b/hooks/post-edit-guard.sh
@@ -41,10 +41,23 @@ WARNINGS=""
 vg_filter_suppressed() {
   local rule="$1"
   awk -v rule="$rule" '
-    BEGIN { suppress = 0 }
+    BEGIN { suppress = 0; in_template = 0; in_triple_dq = 0 }
     {
+      # Record multiline-string state at the START of this line so a
+      # disable comment that is itself inside a string is not honoured.
+      start_in_ml = (in_template || in_triple_dq)
+
+      # Track JS/TS template-literal depth via backtick parity.
+      tmp = $0; n = gsub(/`/, "", tmp)
+      if (n % 2 == 1) in_template = 1 - in_template
+
+      # Track triple-double-quote multi-line strings (Python, Rust raw).
+      tmp = $0; n = gsub(/"""/, "", tmp)
+      if (n % 2 == 1) in_triple_dq = 1 - in_triple_dq
+
       if (suppress) { suppress = 0; next }
-      if ($0 ~ "^[[:space:]]*(//|#)[[:space:]]*vibeguard-disable-next-line[[:space:]]+" rule "([[:space:]]|--|$)") {
+      if (!start_in_ml &&
+          $0 ~ "^[[:space:]]*(//|#)[[:space:]]*vibeguard-disable-next-line[[:space:]]+" rule "([[:space:]]|--|$)") {
         suppress = 1
       }
       print
diff --git a/vg-helper/src/session_metrics.rs b/vg-helper/src/session_metrics.rs
@@ -100,6 +100,7 @@ pub fn run(args: &[String]) -> Result {
     let mut hooks: HashMap<String, u64> = HashMap::new();
     let mut tools: HashMap<String, u64> = HashMap::new();
     let mut edited_files: HashMap<String, u64> = HashMap::new();
+    let mut durations_ms: Vec<u64> = Vec::new();
 
     for e in &events {
         let d = e.get("decision").and_then(Value::as_str).unwrap_or("unknown");
@@ -116,8 +117,19 @@ pub fn run(args: &[String]) -> Result {
                 }
             }
         }
+
+        if let Some(d_ms) = e.get("duration_ms").and_then(Value::as_u64) {
+            durations_ms.push(d_ms);
+        }
     }
 
+    let avg_duration_ms: u64 = if durations_ms.is_empty() {
+        0
+    } else {
+        durations_ms.iter().sum::<u64>() / durations_ms.len() as u64
+    };
+    let slow_ops = durations_ms.iter().filter(|&&d| d > 5000).count();
+
     let total = events.len() as f64;
     let negative = *decisions.get("warn").unwrap_or(&0)
         + *decisions.get("block").unwrap_or(&0)
@@ -248,6 +260,15 @@ pub fn run(args: &[String]) -> Result {
         }
     }
 
+    // top_edited_files: top 5 by edit count (mirrors Python edited_files.most_common(5))
+    let mut top_files: Vec<_> = edited_files.iter().collect();
+    top_files.sort_by(|a, b| b.1.cmp(a.1));
+    let top_edited_files: serde_json::Map<String, Value> = top_files
+        .iter()
+        .take(5)
+        .map(|(k, v)| ((*k).clone(), json!(**v)))
+        .collect();
+
     // Write metrics
     let metrics = json!({
         "ts": chrono_now(),
@@ -256,6 +277,9 @@ pub fn run(args: &[String]) -> Result {
         "decisions": decisions,
         "hooks": hooks,
         "tools": tools,
+        "top_edited_files": top_edited_files,
+        "avg_duration_ms": avg_duration_ms,
+        "slow_ops": slow_ops,
         "correction_signals": signals,
         "warn_ratio": (warn_ratio * 100.0).round() / 100.0,
     });