From 93e79c3815c9d315ef584ae418e2723c937b3ac9 Mon Sep 17 00:00:00 2001 From: Connor Tsui Date: Sat, 25 Apr 2026 18:15:05 -0400 Subject: [PATCH 01/26] add benchmarks website v3 design overview and plan Signed-off-by: Connor Tsui --- benchmarks-website/planning/00-overview.md | 103 ++++++++ benchmarks-website/planning/01-schema.md | 222 ++++++++++++++++++ benchmarks-website/planning/02-contracts.md | 195 +++++++++++++++ benchmarks-website/planning/AGENTS.md | 86 +++++++ benchmarks-website/planning/README.md | 69 ++++++ .../planning/benchmark-mapping.md | 147 ++++++++++++ .../planning/components/emitter.md | 86 +++++++ .../planning/components/server.md | 70 ++++++ .../planning/components/web-ui.md | 62 +++++ benchmarks-website/planning/decisions.md | 90 +++++++ benchmarks-website/planning/deferred.md | 118 ++++++++++ 11 files changed, 1248 insertions(+) create mode 100644 benchmarks-website/planning/00-overview.md create mode 100644 benchmarks-website/planning/01-schema.md create mode 100644 benchmarks-website/planning/02-contracts.md create mode 100644 benchmarks-website/planning/AGENTS.md create mode 100644 benchmarks-website/planning/README.md create mode 100644 benchmarks-website/planning/benchmark-mapping.md create mode 100644 benchmarks-website/planning/components/emitter.md create mode 100644 benchmarks-website/planning/components/server.md create mode 100644 benchmarks-website/planning/components/web-ui.md create mode 100644 benchmarks-website/planning/decisions.md create mode 100644 benchmarks-website/planning/deferred.md diff --git a/benchmarks-website/planning/00-overview.md b/benchmarks-website/planning/00-overview.md new file mode 100644 index 00000000000..0fcb9557aab --- /dev/null +++ b/benchmarks-website/planning/00-overview.md @@ -0,0 +1,103 @@ + + +# 00 - Overview + +## What we're building + +A replacement for the current `bench.vortex.dev` site. The new +stack is a **single Rust binary** that owns a **DuckDB database** +on local disk and serves the website plus an `/api/ingest` route. +CI eventually POSTs new benchmark results there. There is no +separate ingester service, no S3 coordination layer for writes, no +client-side WASM. + +HTTP framework, templating engine, and module layout are the +server agent's call. + +## Phasing + +We build this in two phases. **Plan only the first.** + +### Alpha (this plan) + +The smallest end-to-end loop that proves the design: + +1. **Schema** locked enough to ingest one benchmark result. +2. **Server**: open DuckDB, accept a bearer-token-authenticated POST, + serve a couple of read routes. +3. **Emitter**: `vortex-bench --gh-json-v3` + a tiny POST script. +4. **Web UI**: one landing page + one chart page rendered against a + fixture DB. + +That's it. No production deploy, no historical data import, no CI +workflow integration, no admin tooling, no schema migration +framework, no auth beyond the shared bearer token. All of those +live in [`deferred.md`](./deferred.md). + +The alpha runs on a developer machine. v2 keeps running in +production unchanged. There is no cutover in alpha. + +### Phase 2 and beyond + +Once the alpha loop is green, we layer in production deploy, +historical migration, CI dual-write, and the rest of the v2-parity +work. Stubs are in [`deferred.md`](./deferred.md). + +## Architecture (alpha) + +One process, one DB file. The server is the API and the website. +The emitter writes JSONL of bare records; a small POST script +wraps and uploads them. CI isn't wired up yet; ingest happens +manually during alpha. + +## Components + +Three components for alpha. Each is one workstream, one branch, one +PR. + +| Component | Plan | Owns | +|---|---|---| +| Server | [components/server.md](./components/server.md) | DuckDB open + schema, bearer-auth ingest, read routes, HTML routes mounted from web-ui | +| Emitter | [components/emitter.md](./components/emitter.md) | `vortex-bench --gh-json-v3` + the post-ingest script | +| Web UI | [components/web-ui.md](./components/web-ui.md) | Landing page + chart page, against a fixture DuckDB | + +### Dependencies + +The schema feeds all three components. The contracts feed the +server and the emitter. With both stable, **all three components +can be worked on in parallel**. + +## Goals + +In priority order: + +1. **End-to-end alpha loop works.** Emit → POST → store → render. +2. **Schema is the right shape.** Five fact tables (one per + measurement family) plus a `commits` dim. See + [`01-schema.md`](./01-schema.md). +3. **Each component is small enough that one agent can finish it + in one PR.** No mega-PRs. + +Cutover, parity, and "faster than v2" are explicit non-goals at +alpha; they come back in phase 2. + +## Shared docs + +- [`00-overview.md`](./00-overview.md) (this file) +- [`01-schema.md`](./01-schema.md) - the five fact tables + `commits` +- [`02-contracts.md`](./02-contracts.md) - wire shapes + HTTP error + matrix + auth header +- [`benchmark-mapping.md`](./benchmark-mapping.md) - existing + benchmarks → fact tables +- [`decisions.md`](./decisions.md) - resolved decisions +- [`deferred.md`](./deferred.md) - phase-2 stubs + +## Status of v2 during alpha + +v2 stays in production untouched. Do not edit +`benchmarks-website/server.js` or `benchmarks-website/src/`. v3 +lives alongside under `benchmarks-website/` in a new Cargo crate +(path is the server agent's call). diff --git a/benchmarks-website/planning/01-schema.md b/benchmarks-website/planning/01-schema.md new file mode 100644 index 00000000000..8ddf71e2739 --- /dev/null +++ b/benchmarks-website/planning/01-schema.md @@ -0,0 +1,222 @@ + + +# 01 - DuckDB schema (alpha) + +The persistent data model. **One `commits` dim table plus five fact +tables, one per measurement family.** No lookup tables, no views, no +migration framework; those are deferred (see +[`deferred.md`](./deferred.md)). + +## Design principles + +1. **One fact table per (dim shape, value shape).** A row in any + fact table has every value column populated; NULLs only appear + in genuinely optional dimensions. +2. **No discriminator columns spanning families.** No `metric_kind` + enum forcing five shapes into one row. +3. **No JSON escape hatch.** New benchmark parameters become real + columns. Adding a nullable column is cheap; the readability win + is worth it. +4. **Hashed primary key per table.** Each fact table has a + `measurement_id` that is a deterministic 64-bit hash of that + table's dimensional tuple. Server-internal; not on the wire. +5. **`commits` is the only dim table.** Engine, format, dataset, + etc. stay as inline strings; DuckDB's dictionary encoding makes + a lookup table pointless. +6. **Ratios are not stored.** Computed at query time from + `compression_sizes`. + +## Why five fact tables, not one + +The five families have genuinely different shapes: + +| Table | Shape sketch | +|---|---| +| `query_measurements` | dataset + query_idx + engine + format + storage → timing **and** memory | +| `compression_times` | dataset + format + op∈{encode,decode} → timing | +| `compression_sizes` | dataset + format → bytes | +| `random_access_times` | dataset + format → timing (different dataset namespace) | +| `vector_search_runs` | dataset + layout + flavor + threshold → timing + counters | + +Forcing them into one table either bloats every row with columns +that are NULL for ~99% of rows (`layout`, `flavor`, `threshold`, +`matches`, `rows_scanned`, `bytes_scanned`) or splits scan results +across multiple rows that have to be re-joined to render one chart. + +## Group / chart / series fit + +The render-time view used by `/api/groups` and `/api/chart/:slug` +is mechanically derivable per table: + +| Table | Group key | Chart key | Series key | +|---|---|---|---| +| `query_measurements` | `(dataset, dataset_variant, scale_factor, storage)` | `(dataset, query_idx)` | `(engine, format)` | +| `compression_times` | constant `"Compression"` | `(dataset, dataset_variant)` | `(format, op)` | +| `compression_sizes` | constant `"Compression Size"` | `(dataset, dataset_variant)` | `format` | +| `random_access_times` | constant `"Random Access"` | `dataset` | `format` | +| `vector_search_runs` | `(dataset, layout)` | `(dataset, layout, threshold)` | `flavor` | + +The classifier logic in v2's `v2-classifier.js` mostly disappears - +each table already knows what suite it represents. + +## Tables + +DDL is the server's call. Below is the column contract: name, type +family, and whether it's NOT NULL. The server agent picks exact +DuckDB types, indexes, and constraint syntax. + +### `commits` (dim) + +| Column | Type | Required? | Notes | +|---|---|---|---| +| `commit_sha` | string | yes (PK) | 40-hex lowercase | +| `timestamp` | timestamptz | yes | | +| `message` | string | yes | first line only | +| `author_name` | string | yes | | +| `author_email` | string | yes | | +| `committer_name` | string | yes | | +| `committer_email` | string | yes | | +| `tree_sha` | string | yes | | +| `url` | string | yes | | + +Populated from the envelope on every `/api/ingest` call. + +### `query_measurements` + +SQL query suites: TPC-H, TPC-DS, ClickBench, StatPopGen, +PolarSignals, Fineweb, GhArchive, Public-BI. Memory columns are +populated when the run was instrumented for memory; NULL otherwise. +Timing and memory share the row because they're produced together +for the same query execution. + +| Column | Type | Required? | Notes | +|---|---|---|---| +| `measurement_id` | int64 | yes (PK) | hash of dim tuple | +| `commit_sha` | string | yes | FK to `commits` | +| `dataset` | string | yes | `tpch`, `tpcds`, `clickbench`, ... | +| `dataset_variant` | string | optional | ClickBench flavor, Public-BI name | +| `scale_factor` | string | optional | TPC SF; n_rows for StatPopGen / PolarSignals | +| `query_idx` | int32 | yes | 1-based | +| `storage` | string | yes | `nvme` or `s3` | +| `engine` | string | yes | `datafusion`, `duckdb`, `vortex`, `arrow` | +| `format` | string | yes | `vortex-file-compressed`, `parquet`, `lance`, ... | +| `value_ns` | int64 | yes | median timing, ns | +| `all_runtimes_ns` | list<int64> | yes | per-iteration timings | +| `peak_physical` | int64 | optional | bytes | +| `peak_virtual` | int64 | optional | bytes | +| `physical_delta` | int64 | optional | bytes | +| `virtual_delta` | int64 | optional | bytes | +| `env_triple` | string | optional | e.g. `x86_64-linux-gnu` | + +### `compression_times` + +Encode/decode timings from `compress-bench`. + +| Column | Type | Required? | Notes | +|---|---|---|---| +| `measurement_id` | int64 | yes (PK) | | +| `commit_sha` | string | yes | FK | +| `dataset` | string | yes | | +| `dataset_variant` | string | optional | | +| `format` | string | yes | | +| `op` | string | yes | `encode` or `decode` | +| `value_ns` | int64 | yes | | +| `all_runtimes_ns` | list<int64> | yes | | +| `env_triple` | string | optional | | + +### `compression_sizes` + +On-disk sizes from `compress-bench`. One-shot, no per-iteration data. +Compression ratios in v2 (`vortex:parquet-zstd ratio/...`) are a +SELECT over this table joined to itself; they're not stored. + +| Column | Type | Required? | Notes | +|---|---|---|---| +| `measurement_id` | int64 | yes (PK) | | +| `commit_sha` | string | yes | FK | +| `dataset` | string | yes | | +| `dataset_variant` | string | optional | | +| `format` | string | yes | | +| `value_bytes` | int64 | yes | | + +### `random_access_times` + +Take-time timings from `random-access-bench`. Different dataset +namespace from `compression_times` - kept in its own table so +dataset filters never have to disambiguate which suite a row +belongs to. + +| Column | Type | Required? | Notes | +|---|---|---|---| +| `measurement_id` | int64 | yes (PK) | | +| `commit_sha` | string | yes | FK | +| `dataset` | string | yes | | +| `format` | string | yes | | +| `value_ns` | int64 | yes | | +| `all_runtimes_ns` | list<int64> | yes | | +| `env_triple` | string | optional | | + +### `vector_search_runs` + +Cosine-similarity scans from `vector-search-bench`. The only family +that emits a timing **plus side counters** for the same scan; +keeping them in one row avoids a 1:N split that has to be re-joined +on read. + +| Column | Type | Required? | Notes | +|---|---|---|---| +| `measurement_id` | int64 | yes (PK) | | +| `commit_sha` | string | yes | FK | +| `dataset` | string | yes | e.g. `cohere-large-10m` | +| `layout` | string | yes | `TrainLayout`, e.g. `partitioned` | +| `flavor` | string | yes | `VectorFlavor`, e.g. `vortex-turboquant` | +| `threshold` | double | yes | cosine threshold | +| `value_ns` | int64 | yes | per-scan wall time | +| `all_runtimes_ns` | list<int64> | yes | | +| `matches` | int64 | yes | | +| `rows_scanned` | int64 | yes | | +| `bytes_scanned` | int64 | yes | | +| `iterations` | int32 | yes | not part of the dim hash | +| `env_triple` | string | optional | | + +## `measurement_id` hash + +Per-table xxhash64 over each table's dimensional tuple. The hash is +**server-internal** - the wire never carries it. The server's INSERT +path computes it before each `INSERT ... ON CONFLICT DO UPDATE`, +which gives idempotent upsert on re-emission of the same dim tuple. +Encoding details (input order, NULL handling, byte layout) are the +server's call, since the value never crosses a process boundary. + +When the historical migrator lands (deferred), it reuses the +server's hash function via a shared crate. + +## Storage values + +`storage` is `'nvme'` or `'s3'`. Legacy `gcs` is dropped. Only +`query_measurements` carries `storage` - the other families don't +fan out by storage backend. + +## Schema changes during alpha + +There is no migration framework. If you change the schema: + +1. Update this doc. +2. Update the server's DDL. +3. Delete any local `bench.duckdb` and re-run. + +A real forward-only migration framework lands post-alpha. See +[`deferred.md`](./deferred.md). + +## What's intentionally NOT here (deferred) + +- `schema_meta` and migration framework. +- `known_engines` / `known_formats` / `known_datasets` lookup + tables and seed SQL. +- Views (`v_compression_ratios`, `v_latest_per_group`, etc.). +- Pre-downsampled aliases. +- A `microbench_runs` table - reserved as the next family to add + when microbench results start landing. diff --git a/benchmarks-website/planning/02-contracts.md b/benchmarks-website/planning/02-contracts.md new file mode 100644 index 00000000000..9aba31fb73b --- /dev/null +++ b/benchmarks-website/planning/02-contracts.md @@ -0,0 +1,195 @@ + + +# 02 - Wire contracts (alpha) + +The cross-component glue between the emitter, the POST script, and +the server. Wire-format only - implementations are local to each +component. + +If two components disagree about a shape, **this file is right** +and both update. + +## Records are discriminated by `kind` + +Each record on the wire carries a `kind` field that picks one of +the [five fact tables](./01-schema.md#tables). The emitter never +decides "what column" - it decides "what kind", and the rest of the +row is that kind's flat field set. + +| `kind` | Destination table | +|---|---| +| `query_measurement` | `query_measurements` | +| `compression_time` | `compression_times` | +| `compression_size` | `compression_sizes` | +| `random_access_time` | `random_access_times` | +| `vector_search_run` | `vector_search_runs` | + +**Unknown `kind` values cause a 400.** Unknown fields within a known +`kind` also cause a 400. Version skew should fail loudly. + +## Per-kind record shapes + +All shared metadata first; per-kind fields after. + +### `query_measurement` + +| Field | Type | Required? | Notes | +|---|---|---|---| +| `kind` | `"query_measurement"` | yes | discriminator | +| `commit_sha` | string | yes | 40-hex lowercase | +| `dataset` | string | yes | `tpch`, `tpcds`, `clickbench`, ... | +| `dataset_variant` | string | optional | ClickBench flavor, Public-BI name | +| `scale_factor` | string | optional | TPC SF; n_rows for StatPopGen / PolarSignals | +| `query_idx` | integer | yes | 1-based | +| `storage` | enum string | yes | `nvme` or `s3` | +| `engine` | string | yes | `datafusion`, `duckdb`, `vortex`, `arrow` | +| `format` | string | yes | `vortex-file-compressed`, `parquet`, `lance`, ... | +| `value_ns` | integer | yes | median timing, ns | +| `all_runtimes_ns` | array<integer> | yes | per-iteration timings (may be empty) | +| `peak_physical` | integer | optional | bytes | +| `peak_virtual` | integer | optional | bytes | +| `physical_delta` | integer | optional | bytes | +| `virtual_delta` | integer | optional | bytes | +| `env_triple` | string | optional | e.g. `x86_64-linux-gnu` | + +The four memory fields are populated together (all four or none). + +### `compression_time` + +| Field | Type | Required? | Notes | +|---|---|---|---| +| `kind` | `"compression_time"` | yes | | +| `commit_sha` | string | yes | | +| `dataset` | string | yes | | +| `dataset_variant` | string | optional | | +| `format` | string | yes | | +| `op` | enum string | yes | `encode` or `decode` | +| `value_ns` | integer | yes | | +| `all_runtimes_ns` | array<integer> | yes | | +| `env_triple` | string | optional | | + +### `compression_size` + +| Field | Type | Required? | Notes | +|---|---|---|---| +| `kind` | `"compression_size"` | yes | | +| `commit_sha` | string | yes | | +| `dataset` | string | yes | | +| `dataset_variant` | string | optional | | +| `format` | string | yes | | +| `value_bytes` | integer | yes | | + +### `random_access_time` + +| Field | Type | Required? | Notes | +|---|---|---|---| +| `kind` | `"random_access_time"` | yes | | +| `commit_sha` | string | yes | | +| `dataset` | string | yes | random-access dataset name (e.g. `chimp`, `taxi`) | +| `format` | string | yes | | +| `value_ns` | integer | yes | | +| `all_runtimes_ns` | array<integer> | yes | | +| `env_triple` | string | optional | | + +### `vector_search_run` + +| Field | Type | Required? | Notes | +|---|---|---|---| +| `kind` | `"vector_search_run"` | yes | | +| `commit_sha` | string | yes | | +| `dataset` | string | yes | e.g. `cohere-large-10m` | +| `layout` | string | yes | `TrainLayout`, e.g. `partitioned` | +| `flavor` | string | yes | `VectorFlavor`, e.g. `vortex-turboquant` | +| `threshold` | number | yes | cosine threshold | +| `value_ns` | integer | yes | per-scan wall time (median of iterations) | +| `all_runtimes_ns` | array<integer> | yes | | +| `matches` | integer | yes | | +| `rows_scanned` | integer | yes | | +| `bytes_scanned` | integer | yes | | +| `iterations` | integer | yes | | +| `env_triple` | string | optional | | + +## Ingest envelope + +`/api/ingest` accepts one envelope per POST. The envelope wraps a +heterogeneous batch of records (any mix of `kind`s). Required +top-level fields: + +- `run_meta`: object with `benchmark_id` (string), `schema_version` + (integer; `1` at alpha), `started_at` (RFC 3339 timestamp). +- `commit`: object with the columns of the [`commits` + table](./01-schema.md#commits-dim), keyed by their column names + with `commit_sha` renamed to `sha`. The server upserts this row + before applying records. +- `records`: array of per-`kind` records as defined above. + +`vortex-bench --gh-json-v3 ` writes JSONL of bare records +only. The envelope (`run_meta` + `commit`) is added by the +post-ingest script before POSTing - this keeps the Rust emitter +dependency-light. + +The post-ingest script is responsible for filling the `commit` +fields. CI has the SHA from `${{ github.sha }}`; the rest comes +from `git show` or equivalent. See +[`components/emitter.md`](./components/emitter.md). + +## HTTP matrix for `POST /api/ingest` + +| Condition | Status | +|---|---| +| Happy path | 200 with `{ "inserted": N, "updated": M }` | +| Malformed JSON | 400 | +| Unknown `kind`, unknown field, or per-record validation failure | 400 with the offending record index | +| Missing/invalid bearer token | 401 | +| Schema version newer than server expects | 409 | +| Other server error | 500 | + +All-or-nothing per POST: a single failed record fails the whole +batch. The reported `inserted` and `updated` counts are aggregated +across all five tables. + +## Authentication header + +```text +Authorization: Bearer +``` + +Compared with constant-time equality on the server. Token comes from +the `INGEST_BEARER_TOKEN` env var. + +## Slug grammar (server ↔ web-ui) + +The web-ui receives slugs from `/api/groups` and feeds them back +into `/api/chart/:slug`. Slugs are **opaque strings** as far as the +web-ui is concerned: it never parses or constructs them itself, +only echoes what the API returned. The server is free to choose any +slug format, change it without breaking the web-ui, or make it +debuggable (e.g. `qm-tpch-q01-nvme-sf1`) - the only contract is +"`/api/chart/:slug` accepts any slug `/api/groups` returned." + +## Read API (alpha) + +Two routes - just enough to render one chart page. Field shapes are +not binding; refine during implementation. + +### `GET /api/groups` + +A flat list of distinct group keys derivable from the data, with +just enough metadata to link to a chart. The server walks each fact +table to produce the group keys defined in +[`01-schema.md`](./01-schema.md#group--chart--series-fit). Every +chart entry includes a `slug` that round-trips through +`/api/chart/:slug`. + +### `GET /api/chart/:slug` + +Returns the data for one chart: a `display_name`, a `unit`, an +ordered `commits` list (sha + timestamp + first-line message + url), +and a `series` map keyed by series name where each value is an +array aligned to `commits` (with `null` for missing data points). + +Per-commit page, zoom/pan, range queries, and the rest of the read +API are deferred. See [`deferred.md`](./deferred.md). diff --git a/benchmarks-website/planning/AGENTS.md b/benchmarks-website/planning/AGENTS.md new file mode 100644 index 00000000000..fe5cb69894e --- /dev/null +++ b/benchmarks-website/planning/AGENTS.md @@ -0,0 +1,86 @@ + + +# AGENTS.md - benchmarks-website v3 (alpha) + +Brief for coding agents working on this rewrite. Keep it short; +detail belongs in component plans. + +## What you're working on + +The **alpha** of v3 of `bench.vortex.dev`. Target: a single Rust +binary with **DuckDB on local disk**. The smallest end-to-end loop +that proves the design. + +The v2 site at `benchmarks-website/` is in production and stays +running unchanged. v3 lives alongside in a new crate under +`benchmarks-website/` (path is the server agent's call). + +Anything not listed in [`README.md`](./README.md) under +"Components" is **deferred**. See [`deferred.md`](./deferred.md). +Don't expand scope past your component plan. + +## Where to start + +1. [`README.md`](./README.md) - reading order. +2. [`00-overview.md`](./00-overview.md) - phases, components, + dependency map. +3. [`01-schema.md`](./01-schema.md) - the DuckDB schema (column + contracts; SQL is the server agent's call). +4. [`02-contracts.md`](./02-contracts.md) - wire shapes + HTTP + matrix + auth header. +5. [`benchmark-mapping.md`](./benchmark-mapping.md) - existing + benchmarks → fact tables (read this if you're working on the + emitter or eventual migration). +6. Your component plan in [`components/`](./components/). + +You **don't** need to read other components' plans. + +## Repository conventions + +See the root [`CLAUDE.md`](/CLAUDE.md) for Rust style, test layout, +and CI norms. Project-specific: + +- New crates go under `benchmarks-website/`. Add to root + `Cargo.toml` workspace members. +- All commits need a `Signed-off-by:` trailer. +- Run `cargo +nightly fmt --all` and narrow clippy on what you + changed. +- Public-API changes need `./scripts/public-api.sh`. +- Every new public item needs a doc comment. +- Tests return `VortexResult<()>` and use `?`. No `unwrap`. + +## Things to avoid + +- **Don't widen scope past your component plan.** If a feature + feels missing, check [`deferred.md`](./deferred.md) first - it + is almost certainly already deferred there. +- **Don't write a server-side classifier.** The emitter is + responsible for v3-shape records. +- **Don't drift from contracts.** Wire-shape changes are a + coordinated PR across the affected components. +- **Don't touch the v2 React/Node app.** It stays in production + unchanged through alpha and through phase 2 until cutover. +- **Don't reach for WASM.** + +## Working branches + +| Branch | Purpose | +|---|---| +| `develop` | Live v2 site. Don't break. | +| `claude/review-benchmarks-redesign-BO3la` | This planning branch. | +| `claude/benchmarks-v3-` | Per-workstream feature branches. | + +Component branches start from `develop`. + +## How to update this file + +Keep it short. If you've learned something a future agent will need: + +- Cross-component contract → [`02-contracts.md`](./02-contracts.md) +- Local detail → your component plan +- Decided → [`decisions.md`](./decisions.md) +- Not designing yet → [`deferred.md`](./deferred.md) +- Cross-cutting agent norm → here diff --git a/benchmarks-website/planning/README.md b/benchmarks-website/planning/README.md new file mode 100644 index 00000000000..34a379b8bd3 --- /dev/null +++ b/benchmarks-website/planning/README.md @@ -0,0 +1,69 @@ + + +# Benchmarks website v3 - Planning + +Planning docs for rebuilding `bench.vortex.dev` as a single Rust +binary with DuckDB on local disk. + +This plan is **alpha-only**. Everything beyond the smallest +end-to-end loop is deliberately punted to +[`deferred.md`](./deferred.md). + +## Reading order + +| File | Read when | +|---|---| +| [`00-overview.md`](./00-overview.md) | Always. The pitch, phases, and dependency map. | +| [`01-schema.md`](./01-schema.md) | Always. The five DuckDB fact tables + `commits` dim. | +| [`02-contracts.md`](./02-contracts.md) | Always. Wire shapes (one `kind` per fact table), HTTP error matrix, auth header. | +| [`benchmark-mapping.md`](./benchmark-mapping.md) | Always when working on the emitter or the historical migrator. Maps every existing benchmark to its target table. | +| [`decisions.md`](./decisions.md) | Skim once. What's pinned for alpha. | +| [`deferred.md`](./deferred.md) | Skim once. What we're not designing yet. | +| `components/.md` | The plan for your specific workstream. | +| `components/.md` | Avoid. If you're tempted, `02-contracts.md` probably needs an update. | + +## Components + +Three components for alpha. Each is one workstream, one branch, one +PR. After the schema and contracts are stable, **all three can be +worked on in parallel**. + +| Component | Plan | Branch | +|---|---|---| +| Server | [components/server.md](./components/server.md) | `claude/benchmarks-v3-server` | +| Emitter | [components/emitter.md](./components/emitter.md) | `claude/benchmarks-v3-emitter` | +| Web UI | [components/web-ui.md](./components/web-ui.md) | `claude/benchmarks-v3-web-ui` | + +## Working branches + +- `develop` - the v2 site, in production. **Do not touch.** +- `claude/review-benchmarks-redesign-BO3la` - this planning branch. +- Component branches above - one per workstream, branched from + `develop`. + +## What this plan is not + +- Not implementation instructions. Component plans are deliberately + high-level. +- Not a phase-2 plan. Phase-2 work is one paragraph each in + [`deferred.md`](./deferred.md). The path will be clearer once the + alpha loop is running. +- Not a parity-with-v2 plan. v2 keeps running unchanged through + alpha. + +## Updating these docs + +If you find a gap, prefer to: + +1. Update [`02-contracts.md`](./02-contracts.md) when the gap is at + a component boundary. +2. Update the relevant component plan when the gap is local. +3. Update [`decisions.md`](./decisions.md) when the gap is "we just + haven't decided yet, but we need to." +4. Update [`deferred.md`](./deferred.md) when the gap is "this is + real work but not for alpha." + +Don't add a new top-level numbered doc. diff --git a/benchmarks-website/planning/benchmark-mapping.md b/benchmarks-website/planning/benchmark-mapping.md new file mode 100644 index 00000000000..9216a45ebc4 --- /dev/null +++ b/benchmarks-website/planning/benchmark-mapping.md @@ -0,0 +1,147 @@ + + +# Existing benchmarks → fact-table mapping + +A cross-reference from today's benchmark code to the v3 fact tables +in [`01-schema.md`](./01-schema.md). Use this when implementing +emitter `to_v3_json` (component plan in +[`components/emitter.md`](./components/emitter.md)) or when sanity- +checking that the schema is expressive enough. + +If a benchmark in this repo is not listed here, it is either +deferred to phase 2 or out of scope for the bench website. + +## Source measurement type → target table + +The canonical mapping. The Rust types live in +`vortex-bench/src/measurements.rs` (and per-benchmark crates). + +| Source type | Wire `kind` | Target table | Notes | +|---|---|---|---| +| `QueryMeasurement` (paired with `MemoryMeasurement`) | `query_measurement` | `query_measurements` | The two structs collapse into **one** v3 record. Memory fields are omitted if `--track-memory` was off. | +| `TimingMeasurement` (only the random-access variant uses this today) | `random_access_time` | `random_access_times` | | +| `CompressionTimingMeasurement` | `compression_time` (with `op ∈ {encode, decode}`) | `compression_times` | The `op` is decided by which side of `compress-bench`'s timing loop produced it. | +| `CustomUnitMeasurement` with byte unit (sizes) | `compression_size` | `compression_sizes` | A new `CompressionSizeMeasurement` extraction lives in `vortex-bench/src/compress/mod.rs`; the emitter no longer rides on `CustomUnitMeasurement`. | +| `CustomUnitMeasurement` with `ratio` unit | **dropped** | none | Computed at read time from `compression_sizes`. | +| `ScanTiming` (vector-search) | `vector_search_run` | `vector_search_runs` | Carries timing **plus** the three counters in the same row. | + +## Per-binary inventory + +Every benchmark binary in this repo, the measurement structs it +produces today, and the v3 tables those measurements land in. + +### `benchmarks/datafusion-bench` + +Runs the SQL query suites with `engine = datafusion`, parameterized +over a `Format` (parquet, vortex-file-compressed, vortex-compact, +arrow, lance via the lance-bench wrapper). + +- Produces `QueryMeasurement` (+ `MemoryMeasurement` when + `--track-memory`) → **`query_measurements`**. +- One row per `(commit, dataset, dataset_variant, scale_factor, + query_idx, storage, engine = "datafusion", format)`. + +### `benchmarks/duckdb-bench` + +Same as `datafusion-bench` but with `engine = duckdb`. + +- Produces `QueryMeasurement` (+ `MemoryMeasurement` when tracking) + → **`query_measurements`**, with `engine = "duckdb"`. + +### `benchmarks/lance-bench` + +Three things in one crate: + +1. **Query runner** (`src/main.rs`): `engine = datafusion`, + `format = lance` only. Produces `QueryMeasurement` (+ + `MemoryMeasurement`) → **`query_measurements`**. +2. **Compression runner** (`src/compress.rs`): produces + `CompressionTimingMeasurement` + size `CustomUnitMeasurement` → + **`compression_times`** (with `op ∈ {encode, decode}`, + `format = lance`) and **`compression_sizes`** + (`format = lance`). +3. **Random-access runner** (`src/random_access.rs`): produces + `TimingMeasurement` → **`random_access_times`** with + `format = lance`. + +### `benchmarks/compress-bench` + +The compression suite. Per dataset, runs encode + decode against +each enabled `Format` and records the resulting on-disk size. + +- `CompressionTimingMeasurement` for encode → **`compression_times`** + with `op = "encode"`. +- `CompressionTimingMeasurement` for decode → **`compression_times`** + with `op = "decode"`. +- Byte-unit `CustomUnitMeasurement` (the size entries) → + **`compression_sizes`**. +- Ratio-unit `CustomUnitMeasurement` (the `vortex:parquet-zstd + ratio/...` entries) → **dropped**. The reader recomputes ratios + from `compression_sizes`. + +### `benchmarks/random-access-bench` + +The random-access "take" timing suite. Datasets here (chimp, taxi, +etc.) are a different namespace from the SQL query suites. + +- `TimingMeasurement` → **`random_access_times`**. +- `format` is one of `vortex-file-compressed`, `vortex-compact`, + `parquet`, `lance`. + +### `benchmarks/vector-search-bench` + +Cosine-similarity scan over a vector dataset. Each dataset/layout/ +flavor combination produces a single `ScanTiming` per scan +configuration. + +- `ScanTiming` → **`vector_search_runs`**. +- `dataset` from `VectorDataset` (e.g. `cohere-large-10m`). +- `layout` from `TrainLayout`. +- `flavor` from `VectorFlavor` (compression flavor; the vector- + search analogue of `format`). +- `threshold`, `iterations` are real columns. +- `query_seed` is **not** stored - it's a deterministic seed for + the query sampler and not a measurement dimension. + +## Per-suite dim values + +For SQL query suites (everything that flows through +`query_measurements`), the dim columns are populated as follows: + +| `BenchmarkArg` | `dataset` | `dataset_variant` | `scale_factor` | Notes | +|---|---|---|---|---| +| `TpcH` | `tpch` | NULL | TPC SF as string (`"1"`, `"10"`, `"100"`, `"1000"`) | | +| `TpcDS` | `tpcds` | NULL | TPC SF as string | | +| `ClickBench` | `clickbench` | flavor as string (`partitioned` / `single`) | NULL | The flavor lives in `dataset_variant`, not `dataset`. | +| `StatPopGen` | `statpopgen` | NULL | n_rows as string | `scale_factor` here is the row count; the per-dataset interpretation of SF is documented in [`01-schema.md`](./01-schema.md). | +| `PolarSignals` | `polarsignals` | NULL | n_rows as string | Same SF interpretation as StatPopGen. | +| `Fineweb` | `fineweb` | NULL | NULL | | +| `GhArchive` | `gharchive` | NULL | NULL | | +| `PublicBi` | `public-bi` | dataset name (e.g. `cms-provider`) | NULL | The Public-BI sub-dataset name lives in `dataset_variant`. | + +For non-query suites: + +- `compress-bench`: `dataset` is the compression dataset name; if + the suite later grows variants, `dataset_variant` is available. +- `random-access-bench`: `dataset` is the random-access dataset + name. No variant column on this table. +- `vector-search-bench`: see the [vector_search_runs + table](./01-schema.md#vector_search_runs). + +## What this implies for the emitter + +The mapping above is the contract `vortex-bench --gh-json-v3` +implements. Any v3 record an emitter writes today must land in +exactly one of the five tables; if a future measurement type +doesn't fit, that's the signal to add a sixth table (and a sixth +`kind`) rather than overload one of these. + +The **historical migrator** will use the same mapping when it lands +(it's deferred - see [`deferred.md`](./deferred.md#historical-data-migration)). +The v2 classifier on `develop` at `benchmarks-website/server.js` +becomes useful then, because the v2 S3 dump pre-dates the +discriminator and we'll have to recover `kind` from name strings. +For new ingest at alpha, no classifier is needed. diff --git a/benchmarks-website/planning/components/emitter.md b/benchmarks-website/planning/components/emitter.md new file mode 100644 index 00000000000..e462a9804c8 --- /dev/null +++ b/benchmarks-website/planning/components/emitter.md @@ -0,0 +1,86 @@ + + +# Component: Emitter (alpha) + +## Required reading + +- [`../00-overview.md`](../00-overview.md) +- [`../02-contracts.md`](../02-contracts.md) +- [`../benchmark-mapping.md`](../benchmark-mapping.md) - the + source-type → target-table mapping. + +## Goal + +Extend `vortex-bench` so it emits v3-shape JSON. Plus a small POST +script that wraps the JSONL in an envelope and sends it to a +running alpha server. + +This is **purely additive** to v2's emission path. Nothing in v2 is +touched. CI workflow integration, dual-write, the orchestrator +update, and the outbox safety net all wait until after the alpha +loop works end-to-end (see [`../deferred.md`](../deferred.md)). + +## In scope + +### Rust emitter + +- Add a `--gh-json-v3 ` CLI flag that writes JSONL of bare + v3 records (no envelope). The legacy `-d gh-json -o ...` form is + untouched - both work at alpha. +- Emit a record with the appropriate `kind` for every measurement + type produced today. The mapping from existing measurement + structs to wire `kind`s is the table in + [`../benchmark-mapping.md`](../benchmark-mapping.md). +- Two non-obvious points (everything else is mechanical): + - `QueryMeasurement` and the paired `MemoryMeasurement` collapse + into **one** `query_measurement` record with both `value_ns` + and the four memory fields. If memory wasn't tracked, omit the + memory fields. + - Vector-search's `ScanTiming` doesn't carry its own dataset / + layout / threshold (those live in the binary's `Args`). The + emitter has to plumb them through to the record. +- `CustomUnitMeasurement` cross-format ratios are **not emitted** - + ratios are computed in the read path. +- Snapshot tests per `kind` (any framework), scrubbing `commit_sha` + and `env_triple`. + +### Post-ingest script + +A small Python script (path of the agent's choosing, e.g. under +`scripts/`) that: + +- Reads JSONL of records. +- Fills the `commit` envelope fields by shelling out to `git show` + (or equivalent) for the SHA passed as an argument. +- Wraps the records in the envelope from + [`../02-contracts.md`](../02-contracts.md). +- POSTs to `/api/ingest` with the bearer token. +- Exits non-zero on 4xx / 5xx. **No retries, no spool, no S3 + outbox at alpha** - those land when CI starts using this. + +## Out of scope (deferred) + +- Replacing the v2 `-d`/`-o` CLI form. Both forms coexist at alpha. +- Removing the v2 `gh-json` emission path. +- Updating `bench-orchestrator` or any GitHub Actions workflows. + Alpha runs are manual. +- Retry / spool / outbox-drain on POST failures. + +See [`../deferred.md`](../deferred.md) for the post-alpha plan. + +## Acceptance criteria + +- `cargo test -p vortex-bench` passes; one snapshot per `kind`. +- Running a benchmark with `--gh-json-v3 ` writes valid JSONL + matching the wire shape from + [`../02-contracts.md`](../02-contracts.md). +- The post-ingest script round-trips a fixture file through a + running alpha server (200 with non-zero `inserted` on first run, + 200 with non-zero `updated` on second run). + +## Branch + +`claude/benchmarks-v3-emitter` diff --git a/benchmarks-website/planning/components/server.md b/benchmarks-website/planning/components/server.md new file mode 100644 index 00000000000..301bb7e4bf9 --- /dev/null +++ b/benchmarks-website/planning/components/server.md @@ -0,0 +1,70 @@ + + +# Component: Server (alpha) + +## Required reading + +- [`../00-overview.md`](../00-overview.md) +- [`../01-schema.md`](../01-schema.md) +- [`../02-contracts.md`](../02-contracts.md) + +## Goal + +A single Rust binary: an HTTP server that owns a DuckDB file on +local disk, accepts authenticated `/api/ingest` POSTs, and serves +enough of a read API to render one chart page. + +This is the **alpha** version. It runs locally or on a dev box; no +production deploy. Production deploy, backups, admin tooling, and +historical data import are deferred (see +[`../deferred.md`](../deferred.md)). + +The server crate lives at a path of the agent's choosing under +`benchmarks-website/`, registered as a workspace member. + +## In scope + +- Open the DuckDB file and apply the schema DDL on boot. No + migration framework yet - if the schema changes during alpha, + delete the file and re-run. +- Bearer-token middleware on `/api/ingest`. Token from + `INGEST_BEARER_TOKEN` env var, constant-time compared. +- `POST /api/ingest`: parse the envelope from + [`../02-contracts.md`](../02-contracts.md), upsert the commit, + dispatch each record to its destination fact table by `kind`, + enforce all-or-nothing per POST. Compute each row's + `measurement_id` server-side as part of the INSERT. Return + `{ inserted, updated }` aggregated across tables. +- `GET /api/groups` and `GET /api/chart/:slug`: enough to render + one chart page. Slugs round-trip; the agent picks the format. +- `GET /health`: enough to confirm the DB is open and ingest is + working (path, latest commit timestamp, per-table row counts - + exact shape is the agent's call). +- Mount whatever HTML routes the web-ui component contributes. + +Framework, templating engine (`maud` or `askama`), DuckDB driver +version, module layout, and DB-access concurrency model are the +agent's call. Pin the DuckDB crate version in `Cargo.toml`. + +## Out of scope (deferred) + +Schema migrations, lookup tables, pre-built views, multi-page read +API, admin endpoints, containerization, EBS mount, backups. See +[`../deferred.md`](../deferred.md). + +## Acceptance criteria + +- `cargo build` succeeds for the server crate. +- Integration test: POST a fixture envelope with a valid bearer → + 200; POST again → 200 with `updated > 0, inserted = 0`; POST + with no/wrong bearer → 401; POST with an unknown `kind` → 400. +- `GET /health` returns a coherent shape after an ingest. +- `cargo run` for the server, pointed at a fresh DuckDB file, + serves both read routes locally. + +## Branch + +`claude/benchmarks-v3-server` diff --git a/benchmarks-website/planning/components/web-ui.md b/benchmarks-website/planning/components/web-ui.md new file mode 100644 index 00000000000..abf9e9de4a0 --- /dev/null +++ b/benchmarks-website/planning/components/web-ui.md @@ -0,0 +1,62 @@ + + +# Component: Web UI (alpha) + +## Required reading + +- [`../00-overview.md`](../00-overview.md) +- [`../01-schema.md`](../01-schema.md) +- [`../02-contracts.md`](../02-contracts.md) - the JSON shapes you + render against. + +## Goal + +Get something on screen. **One landing page** that lists groups and +**one chart page** that renders a single chart. SSR HTML + a thin +Chart.js hydration. That's it for alpha. + +This component develops in parallel against a fixture-populated +DuckDB - no dependency on the live ingest path. + +## In scope + +- A fixture: a small DuckDB file (or a builder that produces one + from a JSONL fixture) covering all five fact tables with a + handful of records each. Used for dev and tests. +- Landing page (`GET /`): list of groups with links into chart + pages, derived from `/api/groups`. +- Chart page (`GET /chart/:slug`): one Chart.js line chart, data + embedded inline as a JSON ` +tpch sf=1 Q1 [nvme] — bench.vortex.dev

unit: ns · 2 series · 3 commits

From d1b44f8fceaaae5c488982282c31c7b95d85377e Mon Sep 17 00:00:00 2001 From: Connor Tsui Date: Sun, 26 Apr 2026 14:54:24 -0400 Subject: [PATCH 08/26] add duckdb to gitignore Signed-off-by: Connor Tsui --- .gitignore | 3 +++ REUSE.toml | 2 +- 2 files changed, 4 insertions(+), 1 deletion(-) diff --git a/.gitignore b/.gitignore index 7fa79fb2162..6a996cf96cc 100644 --- a/.gitignore +++ b/.gitignore @@ -242,3 +242,6 @@ trace*.pb # pytest-benchmark output vortex-python/.benchmarks/ +# For local benchmarks website server and things like the WAL +**.duckdb* + diff --git a/REUSE.toml b/REUSE.toml index 161f6e3086a..8e406c95c90 100644 --- a/REUSE.toml +++ b/REUSE.toml @@ -36,7 +36,7 @@ SPDX-FileCopyrightText = "Copyright the Vortex contributors" SPDX-License-Identifier = "CC-BY-4.0" [[annotations]] -path = ["**/.gitignore", ".gitmodules", ".python-version", "**/*.lock", "**/*.lockfile", "**/*.toml", "**/*.json", ".idea/**", ".github/**", "codecov.yml", "java/gradle/wrapper/gradle-wrapper.properties"] +path = ["**/.gitignore", ".gitmodules", ".python-version", "**/*.lock", "**/*.lockfile", "**/*.toml", "**/*.json", ".idea/**", ".github/**", "codecov.yml", "java/gradle/wrapper/gradle-wrapper.properties", "**.duckdb*"] precedence = "override" SPDX-FileCopyrightText = "Copyright the Vortex contributors" SPDX-License-Identifier = "Apache-2.0" From f4a9bf5a772930f12cb710f2fdf9ccf56bd223ae Mon Sep 17 00:00:00 2001 From: Connor Tsui <87130162+connortsui20@users.noreply.github.com> Date: Sun, 26 Apr 2026 16:12:29 -0400 Subject: [PATCH 09/26] [claude] Add vortex-bench-server v3 deployment infrastructure (#7644) This PR introduces the deployment infrastructure for vortex-bench-server v3, a new benchmarking server that runs alongside the existing v2 instance. The v3 server provides an ingest endpoint for benchmark results with bearer token authentication and uses DuckDB for data storage. 1. **GitHub Actions workflow** (`publish-bench-server.yml`): New CI pipeline that builds and publishes the vortex-bench-server Docker image to GHCR on changes to the server code, vortex-bench crate, or Cargo.lock. 2. **Dockerfile** (`benchmarks-website/server/Dockerfile`): Multi-stage Docker build that: - Compiles vortex-bench-server in a Rust 1.91 environment - Packages it with DuckDB CLI tools in a minimal Debian image - Targets ARM64 architecture for EC2 deployment 3. **Backup script** (`benchmarks-website/server/scripts/backup.sh`): Daily backup utility that: - Exports the DuckDB database from the running container - Uploads backups to S3 (`vortex-ci-benchmark-results/v3-backups/`) - Manages local disk space by retaining only the latest backup 4. **Docker Compose configuration**: Added vortex-bench-server service that: - Runs on port 3001 (v2 remains on port 80) - Mounts EBS-backed data directory for DuckDB persistence - Loads bearer token from `/etc/vortex-bench/secrets.env` - Integrates with existing watchtower for automatic image updates 5. **EC2 initialization guide** (`ec2-init.txt`): Comprehensive setup documentation covering: - Bearer token secret management - EBS volume preparation - Service startup and health checks - Cron-based backup scheduling - Token rotation procedures The v3 server is designed to run additively alongside v2, allowing for gradual DNS migration and dual-write support from CI. The Docker image build is validated by the GitHub Actions workflow on each push to develop. The backup script can be tested manually on the EC2 host before cron scheduling. Smoke tests are documented in the setup guide (curl against `/health` endpoint on port 3001). https://claude.ai/code/session_019mBcBdF4LhKDXyKwuKRAPV --------- Signed-off-by: Claude Co-authored-by: Claude Signed-off-by: Connor Tsui --- .github/workflows/publish-bench-server.yml | 46 ++ Cargo.lock | 504 +++++++------------- benchmarks-website/docker-compose.yml | 14 + benchmarks-website/ec2-init.txt | 55 ++- benchmarks-website/server/Cargo.toml | 3 +- benchmarks-website/server/Dockerfile | 46 ++ benchmarks-website/server/scripts/backup.sh | 46 ++ 7 files changed, 379 insertions(+), 335 deletions(-) create mode 100644 .github/workflows/publish-bench-server.yml create mode 100644 benchmarks-website/server/Dockerfile create mode 100755 benchmarks-website/server/scripts/backup.sh diff --git a/.github/workflows/publish-bench-server.yml b/.github/workflows/publish-bench-server.yml new file mode 100644 index 00000000000..0bfcb6d3293 --- /dev/null +++ b/.github/workflows/publish-bench-server.yml @@ -0,0 +1,46 @@ +name: Publish Bench Server + +on: + push: + branches: [develop] + paths: + - "benchmarks-website/server/**" + - "vortex-bench/**" + - "Cargo.lock" + - ".github/workflows/publish-bench-server.yml" + workflow_dispatch: + +jobs: + publish: + runs-on: ubuntu-latest + timeout-minutes: 30 + permissions: + contents: read + packages: write + id-token: write + steps: + - uses: actions/checkout@v6 + + - name: Log in to GHCR + uses: docker/login-action@v4 + with: + registry: ghcr.io + username: ${{ github.actor }} + password: ${{ secrets.GITHUB_TOKEN }} + + - name: Set up QEMU + uses: docker/setup-qemu-action@v4 + + - name: Set up Docker Buildx + uses: docker/setup-buildx-action@v4 + + - name: Build and push + uses: docker/build-push-action@v7 + with: + context: . + file: ./benchmarks-website/server/Dockerfile + platforms: linux/arm64 + push: true + tags: | + ghcr.io/${{ github.repository }}/vortex-bench-server:latest + ghcr.io/${{ github.repository }}/vortex-bench-server:${{ github.sha }} diff --git a/Cargo.lock b/Cargo.lock index 304c91a26b0..28c9677dbec 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -197,6 +197,9 @@ name = "arbitrary" version = "1.4.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "c3d036a3c4ab069c7b410a2ce876bd74808d2d0888a82667669f8e783a898bf1" +dependencies = [ + "derive_arbitrary", +] [[package]] name = "arc-swap" @@ -225,24 +228,6 @@ version = "0.7.6" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "7c02d123df017efcdfbd739ef81735b36c5ba83ec3c59c80a9d7ecc718f92e50" -[[package]] -name = "arrow" -version = "56.2.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "6e833808ff2d94ed40d9379848a950d995043c7fb3e81a30b383f4c6033821cc" -dependencies = [ - "arrow-arith 56.2.0", - "arrow-array 56.2.0", - "arrow-buffer 56.2.0", - "arrow-cast 56.2.0", - "arrow-data 56.2.0", - "arrow-ord 56.2.0", - "arrow-row 56.2.0", - "arrow-schema 56.2.0", - "arrow-select 56.2.0", - "arrow-string 56.2.0", -] - [[package]] name = "arrow" version = "57.3.0" @@ -285,20 +270,6 @@ dependencies = [ "arrow-string 58.1.0", ] -[[package]] -name = "arrow-arith" -version = "56.2.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ad08897b81588f60ba983e3ca39bda2b179bdd84dced378e7df81a5313802ef8" -dependencies = [ - "arrow-array 56.2.0", - "arrow-buffer 56.2.0", - "arrow-data 56.2.0", - "arrow-schema 56.2.0", - "chrono", - "num", -] - [[package]] name = "arrow-arith" version = "57.3.0" @@ -327,22 +298,6 @@ dependencies = [ "num-traits", ] -[[package]] -name = "arrow-array" -version = "56.2.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "8548ca7c070d8db9ce7aa43f37393e4bfcf3f2d3681df278490772fd1673d08d" -dependencies = [ - "ahash 0.8.12", - "arrow-buffer 56.2.0", - "arrow-data 56.2.0", - "arrow-schema 56.2.0", - "chrono", - "half", - "hashbrown 0.16.1", - "num", -] - [[package]] name = "arrow-array" version = "57.3.0" @@ -381,17 +336,6 @@ dependencies = [ "num-traits", ] -[[package]] -name = "arrow-buffer" -version = "56.2.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e003216336f70446457e280807a73899dd822feaf02087d31febca1363e2fccc" -dependencies = [ - "bytes", - "half", - "num", -] - [[package]] name = "arrow-buffer" version = "57.3.0" @@ -416,27 +360,6 @@ dependencies = [ "num-traits", ] -[[package]] -name = "arrow-cast" -version = "56.2.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "919418a0681298d3a77d1a315f625916cb5678ad0d74b9c60108eb15fd083023" -dependencies = [ - "arrow-array 56.2.0", - "arrow-buffer 56.2.0", - "arrow-data 56.2.0", - "arrow-schema 56.2.0", - "arrow-select 56.2.0", - "atoi", - "base64", - "chrono", - "comfy-table", - "half", - "lexical-core", - "num", - "ryu", -] - [[package]] name = "arrow-cast" version = "57.3.0" @@ -511,18 +434,6 @@ dependencies = [ "regex", ] -[[package]] -name = "arrow-data" -version = "56.2.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a5c64fff1d142f833d78897a772f2e5b55b36cb3e6320376f0961ab0db7bd6d0" -dependencies = [ - "arrow-buffer 56.2.0", - "arrow-schema 56.2.0", - "half", - "num", -] - [[package]] name = "arrow-data" version = "57.3.0" @@ -629,19 +540,6 @@ dependencies = [ "simdutf8", ] -[[package]] -name = "arrow-ord" -version = "56.2.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "3c8f82583eb4f8d84d4ee55fd1cb306720cddead7596edce95b50ee418edf66f" -dependencies = [ - "arrow-array 56.2.0", - "arrow-buffer 56.2.0", - "arrow-data 56.2.0", - "arrow-schema 56.2.0", - "arrow-select 56.2.0", -] - [[package]] name = "arrow-ord" version = "57.3.0" @@ -668,19 +566,6 @@ dependencies = [ "arrow-select 58.1.0", ] -[[package]] -name = "arrow-row" -version = "56.2.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9d07ba24522229d9085031df6b94605e0f4b26e099fb7cdeec37abd941a73753" -dependencies = [ - "arrow-array 56.2.0", - "arrow-buffer 56.2.0", - "arrow-data 56.2.0", - "arrow-schema 56.2.0", - "half", -] - [[package]] name = "arrow-row" version = "57.3.0" @@ -707,15 +592,6 @@ dependencies = [ "half", ] -[[package]] -name = "arrow-schema" -version = "56.2.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b3aa9e59c611ebc291c28582077ef25c97f1975383f1479b12f3b9ffee2ffabe" -dependencies = [ - "bitflags", -] - [[package]] name = "arrow-schema" version = "57.3.0" @@ -738,20 +614,6 @@ dependencies = [ "serde_json", ] -[[package]] -name = "arrow-select" -version = "56.2.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "8c41dbbd1e97bfcaee4fcb30e29105fb2c75e4d82ae4de70b792a5d3f66b2e7a" -dependencies = [ - "ahash 0.8.12", - "arrow-array 56.2.0", - "arrow-buffer 56.2.0", - "arrow-data 56.2.0", - "arrow-schema 56.2.0", - "num", -] - [[package]] name = "arrow-select" version = "57.3.0" @@ -780,23 +642,6 @@ dependencies = [ "num-traits", ] -[[package]] -name = "arrow-string" -version = "56.2.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "53f5183c150fbc619eede22b861ea7c0eebed8eaac0333eaa7f6da5205fd504d" -dependencies = [ - "arrow-array 56.2.0", - "arrow-buffer 56.2.0", - "arrow-data 56.2.0", - "arrow-schema 56.2.0", - "arrow-select 56.2.0", - "memchr", - "num", - "regex", - "regex-syntax", -] - [[package]] name = "arrow-string" version = "57.3.0" @@ -845,9 +690,9 @@ dependencies = [ [[package]] name = "async-compression" -version = "0.4.41" +version = "0.4.42" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d0f9ee0f6e02ffd7ad5816e9464499fba7b3effd01123b515c41d1697c43dad1" +checksum = "e79b3f8a79cccc2898f31920fc69f304859b3bd567490f75ebf51ae1c792a9ac" dependencies = [ "compression-codecs", "compression-core", @@ -1235,9 +1080,9 @@ dependencies = [ [[package]] name = "blake3" -version = "1.8.4" +version = "1.8.5" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "4d2d5991425dfd0785aed03aedcf0b321d61975c9b5b3689c774a2610ae0b51e" +checksum = "0aa83c34e62843d924f905e0f5c866eb1dd6545fc4d719e803d9ba6030371fce" dependencies = [ "arrayref", "arrayvec", @@ -1524,12 +1369,6 @@ version = "0.4.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "4f4c707c6a209cbe82d10abd08e1ea8995e9ea937d2550646e02798948992be0" -[[package]] -name = "cesu8" -version = "1.1.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "6d43a04d8753f35258c91f8ec639f792891f748a1edbd759cf1dcea3382ad83c" - [[package]] name = "cexpr" version = "0.6.0" @@ -1816,12 +1655,12 @@ dependencies = [ [[package]] name = "comfy-table" -version = "7.1.2" +version = "7.1.4" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e0d05af1e006a2407bedef5af410552494ce5be9090444dbbcb57258c1af3d56" +checksum = "4a65ebfec4fb190b6f90e944a817d60499ee0744e582530e2c9900a22e591d9a" dependencies = [ - "strum 0.26.3", - "strum_macros 0.26.4", + "crossterm 0.28.1", + "unicode-segmentation", "unicode-width 0.2.2", ] @@ -1863,9 +1702,9 @@ dependencies = [ [[package]] name = "compression-codecs" -version = "0.4.37" +version = "0.4.38" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "eb7b51a7d9c967fc26773061ba86150f19c50c0d65c887cb1fbe295fd16619b7" +checksum = "ce2548391e9c1929c21bf6aa2680af86fe4c1b33e6cea9ac1cfeec0bd11218cf" dependencies = [ "bzip2", "compression-core", @@ -1878,9 +1717,9 @@ dependencies = [ [[package]] name = "compression-core" -version = "0.4.31" +version = "0.4.32" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "75984efb6ed102a0d42db99afb6c1948f0380d1d91808d5529916e6c08b49d8d" +checksum = "cc14f565cf027a105f7a44ccf9e5b424348421a1d8952a8fc9d499d313107789" [[package]] name = "concurrent-queue" @@ -2121,6 +1960,19 @@ version = "0.8.21" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "d0a5c400df2834b80a4c3327b3aad3a4c4cd4de0629063962b03235697506a28" +[[package]] +name = "crossterm" +version = "0.28.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "829d955a0bb380ef178a640b91779e3987da38c9aea133b20614cfed8cdea9c6" +dependencies = [ + "bitflags", + "crossterm_winapi", + "parking_lot", + "rustix 0.38.44", + "winapi", +] + [[package]] name = "crossterm" version = "0.29.0" @@ -3761,6 +3613,17 @@ dependencies = [ "serde_core", ] +[[package]] +name = "derive_arbitrary" +version = "1.4.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1e567bd82dcff979e4b03460c307b3cdc9e96fde3d73bed1496d2bc75d9dd62a" +dependencies = [ + "proc-macro2", + "quote", + "syn 2.0.117", +] + [[package]] name = "derive_more" version = "2.1.1" @@ -3825,7 +3688,7 @@ dependencies = [ "libc", "option-ext", "redox_users", - "windows-sys 0.59.0", + "windows-sys 0.61.2", ] [[package]] @@ -3873,12 +3736,13 @@ checksum = "ab23e69df104e2fd85ee63a533a22d2132ef5975dc6b36f9f3e5a7305e4a8ed7" [[package]] name = "duckdb" -version = "1.4.1" +version = "1.10502.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "2a093eed1c714143b257b95fa323e38527fabf05fbf02bb0d5d2045275ffdaef" +checksum = "0fdc796383b176dd5a45353fbb5e64583c0ee4da12cb62c9e510b785324b2488" dependencies = [ - "arrow 56.2.0", + "arrow 58.1.0", "cast", + "comfy-table", "fallible-iterator", "fallible-streaming-iterator", "hashlink", @@ -4024,7 +3888,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "39cab71617ae0d63f51a36d69f866391735b51691dbda63cf6f96d042b63efeb" dependencies = [ "libc", - "windows-sys 0.59.0", + "windows-sys 0.61.2", ] [[package]] @@ -4700,9 +4564,9 @@ checksum = "135b12329e5e3ce057a9f972339ea52bc954fe1e9358ef27f95e89716fbc5424" [[package]] name = "hybrid-array" -version = "0.4.10" +version = "0.4.11" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "3944cf8cf766b40e2a1a333ee5e9b563f854d5fa49d6a8ca2764e97c6eddb214" +checksum = "08d46837a0ed51fe95bd3b05de33cd64a1ee88fc797477ca48446872504507c5" dependencies = [ "typenum", ] @@ -4743,6 +4607,7 @@ dependencies = [ "tokio", "tokio-rustls", "tower-service", + "webpki-roots", ] [[package]] @@ -4910,9 +4775,9 @@ dependencies = [ [[package]] name = "idna_adapter" -version = "1.2.1" +version = "1.2.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "3acae9609540aa318d1bc588455225fb2085b9ed0c4f6bd0d9d5bcd86f1a0344" +checksum = "cb68373c0d6620ef8105e855e7745e18b0d00d3bdb07fb532e434244cdb9a714" dependencies = [ "icu_normalizer", "icu_properties", @@ -5046,7 +4911,7 @@ checksum = "3640c1c38b8e4e43584d8df18be5fc6b0aa314ce6ebf51b53313d4306cca8e46" dependencies = [ "hermit-abi", "libc", - "windows-sys 0.59.0", + "windows-sys 0.61.2", ] [[package]] @@ -5118,7 +4983,7 @@ dependencies = [ "portable-atomic", "portable-atomic-util", "serde_core", - "windows-sys 0.59.0", + "windows-sys 0.61.2", ] [[package]] @@ -5147,22 +5012,6 @@ dependencies = [ "jiff-tzdb", ] -[[package]] -name = "jni" -version = "0.21.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1a87aa2bb7d2af34197c04845522473242e1aa17c12f4935d5856491a7fb8c97" -dependencies = [ - "cesu8", - "cfg-if", - "combine", - "jni-sys 0.3.1", - "log", - "thiserror 1.0.69", - "walkdir", - "windows-sys 0.45.0", -] - [[package]] name = "jni" version = "0.22.4" @@ -5173,7 +5022,7 @@ dependencies = [ "combine", "java-locator", "jni-macros", - "jni-sys 0.4.1", + "jni-sys", "libloading 0.8.9", "log", "simd_cesu8", @@ -5195,15 +5044,6 @@ dependencies = [ "syn 2.0.117", ] -[[package]] -name = "jni-sys" -version = "0.3.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "41a652e1f9b6e0275df1f15b32661cf0d4b78d4d87ddec5e0c3c20f097433258" -dependencies = [ - "jni-sys 0.4.1", -] - [[package]] name = "jni-sys" version = "0.4.1" @@ -5890,23 +5730,25 @@ checksum = "b3a6a8c165077efc8f3a971534c50ea6a1a18b329ef4a66e897a7e3a1494565f" [[package]] name = "libc" -version = "0.2.185" +version = "0.2.186" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "52ff2c0fe9bc6cb6b14a0592c2ff4fa9ceb83eea9db979b0487cd054946a2b8f" +checksum = "68ab91017fe16c622486840e4c83c9a37afeff978bd239b5293d61ece587de66" [[package]] name = "libduckdb-sys" -version = "1.4.1" +version = "1.10502.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "4b93c3ff279601516f01531cadf2ccba50394fbb5f7bf685c6e6b9b07c8dca6f" +checksum = "8d7401630ae2abcff642f7156294289e50f2d222e061c026ad797b01bf20c215" dependencies = [ "cc", "flate2", "pkg-config", + "reqwest 0.12.28", "serde", "serde_json", "tar", "vcpkg", + "zip 6.0.0", ] [[package]] @@ -6519,21 +6361,7 @@ version = "0.50.3" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "7957b9740744892f114936ab4a57b3f487491bbeafaf8083688b16841a4240e5" dependencies = [ - "windows-sys 0.59.0", -] - -[[package]] -name = "num" -version = "0.4.3" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "35bd024e8b2ff75562e5f34e7f4905839deb4b22955ef5e73d2fea1b9813cb23" -dependencies = [ - "num-bigint", - "num-complex", - "num-integer", - "num-iter", - "num-rational", - "num-traits", + "windows-sys 0.61.2", ] [[package]] @@ -6571,28 +6399,6 @@ dependencies = [ "num-traits", ] -[[package]] -name = "num-iter" -version = "0.1.45" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1429034a0490724d0075ebb2bc9e875d6503c3cf69e235a8941aa757d83ef5bf" -dependencies = [ - "autocfg", - "num-integer", - "num-traits", -] - -[[package]] -name = "num-rational" -version = "0.4.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f83d14da390562dca69fc84082e73e548e1ad308d24accdedd2720017cb37824" -dependencies = [ - "num-bigint", - "num-integer", - "num-traits", -] - [[package]] name = "num-traits" version = "0.2.19" @@ -7219,7 +7025,7 @@ version = "0.7.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "044b1fa4f259f4df9ad5078e587b208f5d288a25407575fcddb9face30c7c692" dependencies = [ - "rand 0.8.6", + "rand 0.9.4", "socket2", "thiserror 2.0.18", ] @@ -7718,7 +7524,7 @@ dependencies = [ "once_cell", "socket2", "tracing", - "windows-sys 0.59.0", + "windows-sys 0.60.2", ] [[package]] @@ -7944,7 +7750,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "577c9b9f652b4c121fb25c6a391dd06406d3b092ba68827e6d2f09550edc54b3" dependencies = [ "cfg-if", - "crossterm", + "crossterm 0.29.0", "instability", "ratatui-core", ] @@ -8183,13 +7989,14 @@ dependencies = [ "wasm-bindgen-futures", "wasm-streams 0.4.2", "web-sys", + "webpki-roots", ] [[package]] name = "reqwest" -version = "0.13.2" +version = "0.13.3" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ab3f43e3283ab1488b624b44b0e988d0acea0b3214e694730a055cb6b2efa801" +checksum = "62e0021ea2c22aed41653bc7e1419abb2c97e038ff2c33d0e1309e49a97deec0" dependencies = [ "base64", "bytes", @@ -8403,14 +8210,14 @@ dependencies = [ "errno", "libc", "linux-raw-sys 0.12.1", - "windows-sys 0.59.0", + "windows-sys 0.61.2", ] [[package]] name = "rustls" -version = "0.23.38" +version = "0.23.39" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "69f9466fb2c14ea04357e91413efb882e2a6d4a406e625449bc0a5d360d53a21" +checksum = "7c2c118cb077cca2822033836dfb1b975355dfb784b5e8da48f7b6c5db74e60e" dependencies = [ "aws-lc-rs", "once_cell", @@ -8435,9 +8242,9 @@ dependencies = [ [[package]] name = "rustls-pki-types" -version = "1.14.0" +version = "1.14.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "be040f8b0a225e40375822a563fa9524378b9d63112f53e19ffff34df5d33fdd" +checksum = "30a7197ae7eb376e574fe940d068c30fe0462554a3ddbe4eca7838e049c937a9" dependencies = [ "web-time", "zeroize", @@ -8445,13 +8252,13 @@ dependencies = [ [[package]] name = "rustls-platform-verifier" -version = "0.6.2" +version = "0.7.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1d99feebc72bae7ab76ba994bb5e121b8d83d910ca40b36e0921f53becc41784" +checksum = "26d1e2536ce4f35f4846aa13bff16bd0ff40157cdb14cc056c7b14ba41233ba0" dependencies = [ "core-foundation 0.10.1", "core-foundation-sys", - "jni 0.21.1", + "jni", "log", "once_cell", "rustls", @@ -8461,7 +8268,7 @@ dependencies = [ "security-framework", "security-framework-sys", "webpki-root-certs", - "windows-sys 0.59.0", + "windows-sys 0.61.2", ] [[package]] @@ -9571,7 +9378,7 @@ dependencies = [ "getrandom 0.4.2", "once_cell", "rustix 1.1.4", - "windows-sys 0.59.0", + "windows-sys 0.61.2", ] [[package]] @@ -9590,7 +9397,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "230a1b821ccbd75b185820a1f1ff7b14d21da1e442e22c0863ea5f08771a8874" dependencies = [ "rustix 1.1.4", - "windows-sys 0.59.0", + "windows-sys 0.61.2", ] [[package]] @@ -9609,7 +9416,7 @@ dependencies = [ "chrono", "num_cpus", "ping", - "reqwest 0.13.2", + "reqwest 0.13.3", "sysinfo", "test-with-derive", "uzers", @@ -9630,7 +9437,7 @@ dependencies = [ "proc-macro2", "quote", "regex", - "reqwest 0.13.2", + "reqwest 0.13.3", "syn 2.0.117", "sysinfo", "uzers", @@ -9960,9 +9767,9 @@ dependencies = [ [[package]] name = "tower" -version = "0.5.2" +version = "0.5.3" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d039ad9159c98b70ecfd540b2573b97f7f52c3e8d9f8ad57a24b916a536975f9" +checksum = "ebe5ef63511595f1344e2d5cfa636d973292adc0eec1f0ad45fae9f0851ab1d4" dependencies = [ "futures-core", "futures-util", @@ -10123,11 +9930,11 @@ checksum = "e421abadd41a4225275504ea4d6566923418b7f05506fbc9c0fe86ba7396114b" [[package]] name = "twox-hash" -version = "2.1.0" +version = "2.1.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e7b17f197b3050ba473acf9181f7b1d3b66d1cf7356c6cc57886662276e65908" +checksum = "9ea3136b675547379c4bd395ca6b938e5ad3c3d20fad76e7fe85f9e0d011419c" dependencies = [ - "rand 0.8.6", + "rand 0.9.4", ] [[package]] @@ -10536,7 +10343,7 @@ dependencies = [ "parquet 58.1.0", "rand 0.10.1", "regex", - "reqwest 0.13.2", + "reqwest 0.13.3", "serde", "serde_json", "sysinfo", @@ -10566,7 +10373,7 @@ dependencies = [ "duckdb", "insta", "maud", - "reqwest 0.13.2", + "reqwest 0.13.3", "serde", "serde_json", "subtle", @@ -10656,7 +10463,7 @@ dependencies = [ "clap", "futures", "parquet 58.1.0", - "reqwest 0.13.2", + "reqwest 0.13.3", "serde", "serde_json", "sha2 0.11.0", @@ -10838,7 +10645,7 @@ dependencies = [ "object_store 0.13.2", "parking_lot", "paste", - "reqwest 0.13.2", + "reqwest 0.13.3", "rstest", "tempfile", "tracing", @@ -10848,7 +10655,7 @@ dependencies = [ "vortex-runend", "vortex-sequence", "vortex-utils", - "zip", + "zip 8.6.0", ] [[package]] @@ -11060,7 +10867,7 @@ dependencies = [ "arrow-array 58.1.0", "arrow-schema 58.1.0", "futures", - "jni 0.22.4", + "jni", "object_store 0.13.2", "parking_lot", "thiserror 2.0.18", @@ -11141,7 +10948,7 @@ dependencies = [ "bindgen", "libloading 0.8.9", "liblzma", - "reqwest 0.13.2", + "reqwest 0.13.3", "tar", "vortex-cuda-macros", ] @@ -11352,7 +11159,7 @@ dependencies = [ "arrow-schema 58.1.0", "clap", "console_error_panic_hook", - "crossterm", + "crossterm 0.29.0", "datafusion 53.1.0", "env_logger", "flatbuffers", @@ -11621,6 +11428,15 @@ dependencies = [ "rustls-pki-types", ] +[[package]] +name = "webpki-roots" +version = "1.0.7" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "52f5ee44c96cf55f1b349600768e3ece3a8f26010c05265ab73f945bb1a2eb9d" +dependencies = [ + "rustls-pki-types", +] + [[package]] name = "which" version = "8.0.2" @@ -11652,7 +11468,7 @@ version = "0.1.11" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "c2a7b1c03c876122aa43f3020e6c3c3ee5c05081c9a00739faf7503aeba10d22" dependencies = [ - "windows-sys 0.59.0", + "windows-sys 0.61.2", ] [[package]] @@ -11773,15 +11589,6 @@ dependencies = [ "windows-link", ] -[[package]] -name = "windows-sys" -version = "0.45.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "75283be5efb2831d37ea142365f009c02ec203cd29a3ebecbc093d52315b66d0" -dependencies = [ - "windows-targets 0.42.2", -] - [[package]] name = "windows-sys" version = "0.52.0" @@ -11802,26 +11609,20 @@ dependencies = [ [[package]] name = "windows-sys" -version = "0.61.2" +version = "0.60.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ae137229bcbd6cdf0f7b80a31df61766145077ddf49416a728b02cb3921ff3fc" +checksum = "f2f500e4d28234f72040990ec9d39e3a6b950f9f22d3dba18416c35882612bcb" dependencies = [ - "windows-link", + "windows-targets 0.53.5", ] [[package]] -name = "windows-targets" -version = "0.42.2" +name = "windows-sys" +version = "0.61.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "8e5180c00cd44c9b1c88adb3693291f1cd93605ded80c250a75d472756b4d071" +checksum = "ae137229bcbd6cdf0f7b80a31df61766145077ddf49416a728b02cb3921ff3fc" dependencies = [ - "windows_aarch64_gnullvm 0.42.2", - "windows_aarch64_msvc 0.42.2", - "windows_i686_gnu 0.42.2", - "windows_i686_msvc 0.42.2", - "windows_x86_64_gnu 0.42.2", - "windows_x86_64_gnullvm 0.42.2", - "windows_x86_64_msvc 0.42.2", + "windows-link", ] [[package]] @@ -11833,7 +11634,7 @@ dependencies = [ "windows_aarch64_gnullvm 0.52.6", "windows_aarch64_msvc 0.52.6", "windows_i686_gnu 0.52.6", - "windows_i686_gnullvm", + "windows_i686_gnullvm 0.52.6", "windows_i686_msvc 0.52.6", "windows_x86_64_gnu 0.52.6", "windows_x86_64_gnullvm 0.52.6", @@ -11841,19 +11642,30 @@ dependencies = [ ] [[package]] -name = "windows-threading" -version = "0.2.1" +name = "windows-targets" +version = "0.53.5" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "3949bd5b99cafdf1c7ca86b43ca564028dfe27d66958f2470940f73d86d75b37" +checksum = "4945f9f551b88e0d65f3db0bc25c33b8acea4d9e41163edf90dcd0b19f9069f3" dependencies = [ "windows-link", + "windows_aarch64_gnullvm 0.53.1", + "windows_aarch64_msvc 0.53.1", + "windows_i686_gnu 0.53.1", + "windows_i686_gnullvm 0.53.1", + "windows_i686_msvc 0.53.1", + "windows_x86_64_gnu 0.53.1", + "windows_x86_64_gnullvm 0.53.1", + "windows_x86_64_msvc 0.53.1", ] [[package]] -name = "windows_aarch64_gnullvm" -version = "0.42.2" +name = "windows-threading" +version = "0.2.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "597a5118570b68bc08d8d59125332c54f1ba9d9adeedeef5b99b02ba2b0698f8" +checksum = "3949bd5b99cafdf1c7ca86b43ca564028dfe27d66958f2470940f73d86d75b37" +dependencies = [ + "windows-link", +] [[package]] name = "windows_aarch64_gnullvm" @@ -11862,10 +11674,10 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "32a4622180e7a0ec044bb555404c800bc9fd9ec262ec147edd5989ccd0c02cd3" [[package]] -name = "windows_aarch64_msvc" -version = "0.42.2" +name = "windows_aarch64_gnullvm" +version = "0.53.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e08e8864a60f06ef0d0ff4ba04124db8b0fb3be5776a5cd47641e942e58c4d43" +checksum = "a9d8416fa8b42f5c947f8482c43e7d89e73a173cead56d044f6a56104a6d1b53" [[package]] name = "windows_aarch64_msvc" @@ -11874,10 +11686,10 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "09ec2a7bb152e2252b53fa7803150007879548bc709c039df7627cabbd05d469" [[package]] -name = "windows_i686_gnu" -version = "0.42.2" +name = "windows_aarch64_msvc" +version = "0.53.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c61d927d8da41da96a81f029489353e68739737d3beca43145c8afec9a31a84f" +checksum = "b9d782e804c2f632e395708e99a94275910eb9100b2114651e04744e9b125006" [[package]] name = "windows_i686_gnu" @@ -11885,6 +11697,12 @@ version = "0.52.6" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "8e9b5ad5ab802e97eb8e295ac6720e509ee4c243f69d781394014ebfe8bbfa0b" +[[package]] +name = "windows_i686_gnu" +version = "0.53.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "960e6da069d81e09becb0ca57a65220ddff016ff2d6af6a223cf372a506593a3" + [[package]] name = "windows_i686_gnullvm" version = "0.52.6" @@ -11892,10 +11710,10 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "0eee52d38c090b3caa76c563b86c3a4bd71ef1a819287c19d586d7334ae8ed66" [[package]] -name = "windows_i686_msvc" -version = "0.42.2" +name = "windows_i686_gnullvm" +version = "0.53.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "44d840b6ec649f480a41c8d80f9c65108b92d89345dd94027bfe06ac444d1060" +checksum = "fa7359d10048f68ab8b09fa71c3daccfb0e9b559aed648a8f95469c27057180c" [[package]] name = "windows_i686_msvc" @@ -11904,10 +11722,10 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "240948bc05c5e7c6dabba28bf89d89ffce3e303022809e73deaefe4f6ec56c66" [[package]] -name = "windows_x86_64_gnu" -version = "0.42.2" +name = "windows_i686_msvc" +version = "0.53.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "8de912b8b8feb55c064867cf047dda097f92d51efad5b491dfb98f6bbb70cb36" +checksum = "1e7ac75179f18232fe9c285163565a57ef8d3c89254a30685b57d83a38d326c2" [[package]] name = "windows_x86_64_gnu" @@ -11916,10 +11734,10 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "147a5c80aabfbf0c7d901cb5895d1de30ef2907eb21fbbab29ca94c5b08b1a78" [[package]] -name = "windows_x86_64_gnullvm" -version = "0.42.2" +name = "windows_x86_64_gnu" +version = "0.53.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "26d41b46a36d453748aedef1486d5c7a85db22e56aff34643984ea85514e94a3" +checksum = "9c3842cdd74a865a8066ab39c8a7a473c0778a3f29370b5fd6b4b9aa7df4a499" [[package]] name = "windows_x86_64_gnullvm" @@ -11928,10 +11746,10 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "24d5b23dc417412679681396f2b49f3de8c1473deb516bd34410872eff51ed0d" [[package]] -name = "windows_x86_64_msvc" -version = "0.42.2" +name = "windows_x86_64_gnullvm" +version = "0.53.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9aec5da331524158c6d1a4ac0ab1541149c0b9505fde06423b02f5ef0106b9f0" +checksum = "0ffa179e2d07eee8ad8f57493436566c7cc30ac536a3379fdf008f47f6bb7ae1" [[package]] name = "windows_x86_64_msvc" @@ -11939,6 +11757,12 @@ version = "0.52.6" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "589f6da84c646204747d1270a2a5661ea66ed1cced2631d546fdfb155959f9ec" +[[package]] +name = "windows_x86_64_msvc" +version = "0.53.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d6bbff5f0aada427a1e5a6da5f1f98158182f26556f345ac9e04d36d0ebed650" + [[package]] name = "winnow" version = "0.7.15" @@ -12224,6 +12048,20 @@ dependencies = [ "num-traits", ] +[[package]] +name = "zip" +version = "6.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "eb2a05c7c36fde6c09b08576c9f7fb4cda705990f73b58fe011abf7dfb24168b" +dependencies = [ + "arbitrary", + "crc32fast", + "flate2", + "indexmap", + "memchr", + "zopfli", +] + [[package]] name = "zip" version = "8.6.0" diff --git a/benchmarks-website/docker-compose.yml b/benchmarks-website/docker-compose.yml index 4c2e9682329..b97482a230a 100644 --- a/benchmarks-website/docker-compose.yml +++ b/benchmarks-website/docker-compose.yml @@ -5,6 +5,20 @@ services: - "80:3000" restart: unless-stopped + vortex-bench-server: + image: ghcr.io/vortex-data/vortex/vortex-bench-server:latest + ports: + - "3001:3000" + environment: + VORTEX_BENCH_DB: "/app/data/bench.duckdb" + VORTEX_BENCH_BIND: "0.0.0.0:3000" + VORTEX_BENCH_LOG: "info,vortex_bench_server=debug" + env_file: + - /etc/vortex-bench/secrets.env + volumes: + - /opt/benchmarks-website/data:/app/data + restart: unless-stopped + watchtower: image: containrrr/watchtower volumes: diff --git a/benchmarks-website/ec2-init.txt b/benchmarks-website/ec2-init.txt index 1c2459b3bee..4e1377cc014 100644 --- a/benchmarks-website/ec2-init.txt +++ b/benchmarks-website/ec2-init.txt @@ -14,4 +14,57 @@ sudo mkdir -p /opt/benchmarks-website sudo cp docker-compose.yml /opt/benchmarks-website/ cd /opt/benchmarks-website - docker compose up -d \ No newline at end of file + docker compose up -d + + ==================================================================== + v3 (vortex-bench-server) — additive setup, runs alongside v2 + ==================================================================== + + v2 stays on port 80 until DNS is flipped. v3 runs on port 3001 from + the same docker-compose.yml on this host. + + 4. Create the bearer-token env file (root:root, mode 600) + sudo mkdir -p /etc/vortex-bench + sudo install -m 600 -o root -g root /dev/null /etc/vortex-bench/secrets.env + # Edit and set INGEST_BEARER_TOKEN=: + sudo vi /etc/vortex-bench/secrets.env + # File contents: + # INGEST_BEARER_TOKEN= + + 5. Create the EBS-backed DuckDB data directory + # Assumes an EBS volume is already mounted at /opt/benchmarks-website/data. + sudo mkdir -p /opt/benchmarks-website/data + sudo chown root:root /opt/benchmarks-website/data + sudo chmod 755 /opt/benchmarks-website/data + + 6. Pull and start v3 (watchtower already polls ghcr.io for refreshes) + cd /opt/benchmarks-website + docker compose pull vortex-bench-server + docker compose up -d vortex-bench-server + # Smoke-check on the host: + curl -sf http://127.0.0.1:3001/health || echo "v3 not responding" + + 7. Install the daily DuckDB backup cron + # Copy the backup script from the repo checkout to a stable location. + sudo install -m 755 -o root -g root \ + benchmarks-website/server/scripts/backup.sh \ + /usr/local/bin/vortex-bench-backup.sh + # Cron entry: 06:00 UTC daily, after the nightly bench finishes. + sudo tee /etc/cron.d/vortex-bench-backup >/dev/null <<'CRON' + 0 6 * * * root /usr/local/bin/vortex-bench-backup.sh >> /var/log/vortex-bench-backup.log 2>&1 + CRON + sudo chmod 644 /etc/cron.d/vortex-bench-backup + # The instance IAM role already permits writes to + # s3://vortex-ci-benchmark-results/ (same role v2's cat-s3.sh uses). + + 8. Bearer-token rotation procedure + # When rotating INGEST_BEARER_TOKEN: + # a. Generate a new token (e.g. `openssl rand -hex 32`). + # b. Update the GitHub Actions Environment secret INGEST_BEARER_TOKEN + # so CI dual-writes use the new value. + # c. On this EC2 host, edit the env file and restart only the v3 + # container so v2 traffic on port 80 is unaffected: + # sudo vi /etc/vortex-bench/secrets.env + # cd /opt/benchmarks-website + # docker compose up -d --force-recreate vortex-bench-server + # d. Verify with `curl` against /health and a token-gated endpoint. \ No newline at end of file diff --git a/benchmarks-website/server/Cargo.toml b/benchmarks-website/server/Cargo.toml index 5b501adf0cc..07d1746a5e5 100644 --- a/benchmarks-website/server/Cargo.toml +++ b/benchmarks-website/server/Cargo.toml @@ -26,7 +26,8 @@ path = "src/main.rs" anyhow = { workspace = true } axum = "0.8" base64 = "0.22" -duckdb = { version = "1.4", features = ["bundled"] } +# track vortex-duckdb's bundled engine version (build.rs) +duckdb = { version = "1.10502", features = ["bundled"] } maud = { version = "0.27", features = ["axum"] } serde = { workspace = true, features = ["derive"] } serde_json = { workspace = true } diff --git a/benchmarks-website/server/Dockerfile b/benchmarks-website/server/Dockerfile new file mode 100644 index 00000000000..81c2c4860b9 --- /dev/null +++ b/benchmarks-website/server/Dockerfile @@ -0,0 +1,46 @@ +# SPDX-License-Identifier: Apache-2.0 +# SPDX-FileCopyrightText: Copyright the Vortex contributors +# +# Build context: repository root (the server is a workspace member). +# Build: docker build -f benchmarks-website/server/Dockerfile . +# Toolchain pinned to match rust-toolchain.toml. + +FROM rust:1.91-bookworm AS build + +RUN apt-get update \ + && apt-get install -y --no-install-recommends \ + build-essential \ + cmake \ + pkg-config \ + && rm -rf /var/lib/apt/lists/* + +WORKDIR /build +COPY . . + +RUN cargo build --release -p vortex-bench-server --bin vortex-bench-server + +FROM debian:bookworm-slim + +# Keep this in lockstep with libduckdb-sys in Cargo.lock. +ARG DUCKDB_VERSION=1.5.2 + +RUN apt-get update \ + && apt-get install -y --no-install-recommends \ + ca-certificates \ + libstdc++6 \ + unzip \ + wget \ + && wget -q "https://github.com/duckdb/duckdb/releases/download/v${DUCKDB_VERSION}/duckdb_cli-linux-aarch64.zip" -O /tmp/duckdb.zip \ + && unzip -q /tmp/duckdb.zip -d /usr/local/bin/ \ + && chmod +x /usr/local/bin/duckdb \ + && rm /tmp/duckdb.zip \ + && apt-get purge -y --auto-remove unzip wget \ + && rm -rf /var/lib/apt/lists/* + +COPY --from=build /build/target/release/vortex-bench-server /usr/local/bin/vortex-bench-server + +WORKDIR /app/data + +EXPOSE 3000 + +CMD ["/usr/local/bin/vortex-bench-server"] diff --git a/benchmarks-website/server/scripts/backup.sh b/benchmarks-website/server/scripts/backup.sh new file mode 100755 index 00000000000..ca4a35f891f --- /dev/null +++ b/benchmarks-website/server/scripts/backup.sh @@ -0,0 +1,46 @@ +#!/usr/bin/env bash +# SPDX-License-Identifier: Apache-2.0 +# SPDX-FileCopyrightText: Copyright the Vortex contributors +# +# Daily DuckDB backup for the vortex-bench-server v3 instance. +# Runs on the EC2 host via cron (see benchmarks-website/ec2-init.txt). +# +# Exports the running container's DuckDB to a local directory and uploads +# it to s3://vortex-ci-benchmark-results/v3-backups//. The instance +# IAM role already grants write access to that bucket (it is the same +# bucket cat-s3.sh uses for v2). +# +# At alpha this is a convenience backup: the data is also reproducible +# from CI dual-writes to the v3 ingest endpoint, so RPO is bounded by +# what CI has posted, not by this script's cadence. + +set -euo pipefail + +CONTAINER="${CONTAINER:-vortex-bench-server}" +DB_PATH="${DB_PATH:-/app/data/bench.duckdb}" +DATA_DIR="${DATA_DIR:-/opt/benchmarks-website/data}" +S3_PREFIX="${S3_PREFIX:-s3://vortex-ci-benchmark-results/v3-backups}" + +date_stamp="$(date -u +%Y%m%d)" +export_dir="backup-${date_stamp}" +host_export_dir="${DATA_DIR}/${export_dir}" + +# Run EXPORT DATABASE inside the container so we hit the same DuckDB +# build that wrote the file. The container path mirrors the host path +# under /app/data, so the export lands on the EBS volume. +docker exec "${CONTAINER}" \ + duckdb "${DB_PATH}" \ + -c "EXPORT DATABASE '/app/data/${export_dir}'" + +aws s3 cp \ + --recursive \ + "${host_export_dir}" \ + "${S3_PREFIX}/${date_stamp}/" + +# Keep the latest local export, drop older ones to bound disk use. +find "${DATA_DIR}" \ + -maxdepth 1 \ + -type d \ + -name "backup-*" \ + ! -path "${host_export_dir}" \ + -exec rm -rf {} + From 7ba792dc50e0bdae063d80cb599471611937c011 Mon Sep 17 00:00:00 2001 From: Connor Tsui <87130162+connortsui20@users.noreply.github.com> Date: Sun, 26 Apr 2026 21:24:50 -0400 Subject: [PATCH 10/26] Benchmarks v3 migration to duckdb (#7646) This is a one-shot migration binary to take all of the data from `data.json.gz` and bring it into a duckdb database. Simply gathers and aggregates everything into memory and writes data in chunks with arrow arrays. Insert row-by-row took way too long, and the appender API in duckdb does not support `BIGINT[]` for some reason... --------- Signed-off-by: Claude Signed-off-by: Connor Tsui Co-authored-by: Claude Signed-off-by: Connor Tsui --- Cargo.lock | 60 ++ Cargo.toml | 1 + benchmarks-website/migrate/Cargo.toml | 41 + benchmarks-website/migrate/src/classifier.rs | 818 +++++++++++++++++ benchmarks-website/migrate/src/commits.rs | 100 +++ benchmarks-website/migrate/src/lib.rs | 21 + benchmarks-website/migrate/src/main.rs | 114 +++ benchmarks-website/migrate/src/migrate.rs | 836 ++++++++++++++++++ benchmarks-website/migrate/src/source.rs | 140 +++ benchmarks-website/migrate/src/v2.rs | 142 +++ benchmarks-website/migrate/src/verify.rs | 350 ++++++++ .../migrate/tests/classifier.rs | 439 +++++++++ .../migrate/tests/end_to_end.rs | 263 ++++++ 13 files changed, 3325 insertions(+) create mode 100644 benchmarks-website/migrate/Cargo.toml create mode 100644 benchmarks-website/migrate/src/classifier.rs create mode 100644 benchmarks-website/migrate/src/commits.rs create mode 100644 benchmarks-website/migrate/src/lib.rs create mode 100644 benchmarks-website/migrate/src/main.rs create mode 100644 benchmarks-website/migrate/src/migrate.rs create mode 100644 benchmarks-website/migrate/src/source.rs create mode 100644 benchmarks-website/migrate/src/v2.rs create mode 100644 benchmarks-website/migrate/src/verify.rs create mode 100644 benchmarks-website/migrate/tests/classifier.rs create mode 100644 benchmarks-website/migrate/tests/end_to_end.rs diff --git a/Cargo.lock b/Cargo.lock index 28c9677dbec..2b26f868282 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -3747,6 +3747,7 @@ dependencies = [ "fallible-streaming-iterator", "hashlink", "libduckdb-sys", + "num", "num-integer", "rust_decimal", "strum 0.27.2", @@ -6364,6 +6365,20 @@ dependencies = [ "windows-sys 0.61.2", ] +[[package]] +name = "num" +version = "0.4.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "35bd024e8b2ff75562e5f34e7f4905839deb4b22955ef5e73d2fea1b9813cb23" +dependencies = [ + "num-bigint", + "num-complex", + "num-integer", + "num-iter", + "num-rational", + "num-traits", +] + [[package]] name = "num-bigint" version = "0.4.6" @@ -6399,6 +6414,28 @@ dependencies = [ "num-traits", ] +[[package]] +name = "num-iter" +version = "0.1.45" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1429034a0490724d0075ebb2bc9e875d6503c3cf69e235a8941aa757d83ef5bf" +dependencies = [ + "autocfg", + "num-integer", + "num-traits", +] + +[[package]] +name = "num-rational" +version = "0.4.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f83d14da390562dca69fc84082e73e548e1ad308d24accdedd2720017cb37824" +dependencies = [ + "num-bigint", + "num-integer", + "num-traits", +] + [[package]] name = "num-traits" version = "0.2.19" @@ -10363,6 +10400,29 @@ dependencies = [ "vortex-tensor", ] +[[package]] +name = "vortex-bench-migrate" +version = "0.1.0-alpha.0" +dependencies = [ + "anyhow", + "arrow-array 58.1.0", + "arrow-buffer 58.1.0", + "arrow-schema 58.1.0", + "clap", + "duckdb", + "flate2", + "reqwest 0.13.3", + "rstest", + "serde", + "serde_json", + "tempfile", + "tokio", + "tracing", + "tracing-subscriber", + "vortex-bench-server", + "vortex-utils", +] + [[package]] name = "vortex-bench-server" version = "0.1.0-alpha.0" diff --git a/Cargo.toml b/Cargo.toml index 21a32d3d0be..4126243aa8f 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -62,6 +62,7 @@ members = [ "benchmarks/vector-search-bench", # Benchmarks website v3 (alpha) - leaf binary, not part of vortex-* API "benchmarks-website/server", + "benchmarks-website/migrate", ] exclude = ["java/testfiles", "wasm-test"] resolver = "2" diff --git a/benchmarks-website/migrate/Cargo.toml b/benchmarks-website/migrate/Cargo.toml new file mode 100644 index 00000000000..45a752df397 --- /dev/null +++ b/benchmarks-website/migrate/Cargo.toml @@ -0,0 +1,41 @@ +# SPDX-License-Identifier: Apache-2.0 +# SPDX-FileCopyrightText: Copyright the Vortex contributors + +[package] +name = "vortex-bench-migrate" +version = "0.1.0-alpha.0" +edition = "2024" +rust-version = "1.91.0" +license = "Apache-2.0" +description = "One-shot historical migrator from the v2 benchmarks S3 dataset to a v3 DuckDB file" +publish = false + +[[bin]] +name = "vortex-bench-migrate" +path = "src/main.rs" + +# Throwaway binary, not part of the vortex-* public API surface. +# Errors use anyhow, and the crate is intentionally outside the +# workspace public-api lockfile set. + +[dependencies] +anyhow = { workspace = true } +arrow-array = { workspace = true } +arrow-buffer = { workspace = true } +arrow-schema = { workspace = true } +clap = { workspace = true, features = ["derive"] } +# track vortex-duckdb's bundled engine version (build.rs) +duckdb = { version = "1.10502", features = ["bundled", "appender-arrow"] } +flate2 = "1.1" +reqwest = { workspace = true, features = ["json"] } +serde = { workspace = true, features = ["derive"] } +serde_json = { workspace = true } +tokio = { workspace = true, features = ["rt-multi-thread", "macros"] } +tracing = { workspace = true, features = ["std"] } +tracing-subscriber = { workspace = true, features = ["env-filter", "fmt"] } +vortex-bench-server = { path = "../server" } +vortex-utils = { workspace = true } + +[dev-dependencies] +rstest = { workspace = true } +tempfile = { workspace = true } diff --git a/benchmarks-website/migrate/src/classifier.rs b/benchmarks-website/migrate/src/classifier.rs new file mode 100644 index 00000000000..8a17b31fcd2 --- /dev/null +++ b/benchmarks-website/migrate/src/classifier.rs @@ -0,0 +1,818 @@ +// SPDX-License-Identifier: Apache-2.0 +// SPDX-FileCopyrightText: Copyright the Vortex contributors + +//! Bug-for-bug port of v2's `getGroup`, `formatQuery`, and +//! `normalizeChartName` from `benchmarks-website/server.js`, plus the +//! mapping from v2 group + name pattern to a v3 fact-table bin. +//! +//! The v2 classifier was the source of truth for what historical +//! records mean. It groups records by name prefix into one of: +//! "Random Access", "Compression", "Compression Size", or one of the +//! SQL query suites (with optional fan-out by storage and scale +//! factor for TPC-H/TPC-DS). This module reproduces that logic and +//! then hops to a v3 fact-table bin, since v3 stores dim values as +//! columns instead of name fragments. +//! +//! Engine and format strings stored in v3 columns are pulled from the +//! raw, pre-rename v2 record name. v2's `ENGINE_RENAMES` was a v2 +//! read-time UI concern (e.g. `vortex-file-compressed` rendered as +//! `vortex` and `parquet-tokio-local-disk` rendered as `parquet-nvme`). +//! v3 stores canonical `Format::name()` strings to match what the v3 +//! live emitter writes, so historical and live records share series. + +use crate::v2::V2Record; +use crate::v2::dataset_scale_factor; + +/// Static port of v2's `QUERY_SUITES`. +pub const QUERY_SUITES: &[QuerySuite] = &[ + QuerySuite { + prefix: "clickbench", + display_name: "Clickbench", + query_prefix: "CLICKBENCH", + dataset_key: None, + fan_out: false, + skip: false, + }, + QuerySuite { + prefix: "statpopgen", + display_name: "Statistical and Population Genetics", + query_prefix: "STATPOPGEN", + dataset_key: None, + fan_out: false, + skip: false, + }, + QuerySuite { + prefix: "polarsignals", + display_name: "PolarSignals Profiling", + query_prefix: "POLARSIGNALS", + dataset_key: None, + fan_out: false, + skip: false, + }, + QuerySuite { + prefix: "gharchive", + display_name: "GhArchive", + query_prefix: "GHARCHIVE", + dataset_key: None, + fan_out: false, + skip: false, + }, + QuerySuite { + prefix: "tpch", + display_name: "TPC-H", + query_prefix: "TPC-H", + dataset_key: Some("tpch"), + fan_out: true, + skip: false, + }, + QuerySuite { + prefix: "tpcds", + display_name: "TPC-DS", + query_prefix: "TPC-DS", + dataset_key: Some("tpcds"), + fan_out: true, + skip: false, + }, + QuerySuite { + prefix: "fineweb", + display_name: "Fineweb", + query_prefix: "FINEWEB", + dataset_key: None, + fan_out: false, + skip: false, + }, +]; + +/// Static port of v2's `ENGINE_RENAMES`. Applied to the "series" half +/// of a benchmark name (the part after the first `/`) before splitting +/// on `:` into engine/format. Order doesn't matter — keys are unique. +const ENGINE_RENAMES: &[(&str, &str)] = &[ + ("datafusion:vortex-file-compressed", "datafusion:vortex"), + ("datafusion:parquet", "datafusion:parquet"), + ("datafusion:arrow", "datafusion:in-memory-arrow"), + ("datafusion:lance", "datafusion:lance"), + ("datafusion:vortex-compact", "datafusion:vortex-compact"), + ("duckdb:vortex-file-compressed", "duckdb:vortex"), + ("duckdb:parquet", "duckdb:parquet"), + ("duckdb:duckdb", "duckdb:duckdb"), + ("duckdb:vortex-compact", "duckdb:vortex-compact"), + ("vortex-tokio-local-disk", "vortex-nvme"), + ("vortex-compact-tokio-local-disk", "vortex-compact-nvme"), + ("lance-tokio-local-disk", "lance-nvme"), + ("parquet-tokio-local-disk", "parquet-nvme"), + ("lance", "lance"), +]; + +/// One entry of `QUERY_SUITES`. +#[derive(Debug, Clone, Copy)] +pub struct QuerySuite { + pub prefix: &'static str, + pub display_name: &'static str, + pub query_prefix: &'static str, + pub dataset_key: Option<&'static str>, + pub fan_out: bool, + pub skip: bool, +} + +/// Group a v2 record falls into. Mirrors `getGroup` in `server.js`, +/// including the fan-out group naming for TPC-H/TPC-DS. +#[derive(Debug, Clone, PartialEq, Eq)] +pub enum V2Group { + RandomAccess, + Compression, + CompressionSize, + Query { + suite_index: usize, + /// `Some` for fan-out suites only. + storage: Option, + /// `Some` for fan-out suites only. + scale_factor: Option, + }, +} + +impl V2Group { + /// Display name as v2 served it from `/api/metadata`. + pub fn display_name(&self) -> String { + match self { + V2Group::RandomAccess => "Random Access".into(), + V2Group::Compression => "Compression".into(), + V2Group::CompressionSize => "Compression Size".into(), + V2Group::Query { + suite_index, + storage, + scale_factor, + } => { + let suite = &QUERY_SUITES[*suite_index]; + if let (Some(storage), Some(sf)) = (storage, scale_factor) { + format!("{} ({}) (SF={})", suite.display_name, storage, sf) + } else { + suite.display_name.to_string() + } + } + } + } +} + +/// Apply v2's `ENGINE_RENAMES`. Reproduces the JS `rename`: +/// `RENAMES[s.toLowerCase()] || RENAMES[s] || s`. +pub fn rename_engine(s: &str) -> String { + let lower = s.to_lowercase(); + for (k, v) in ENGINE_RENAMES { + if *k == lower { + return (*v).to_string(); + } + } + for (k, v) in ENGINE_RENAMES { + if *k == s { + return (*v).to_string(); + } + } + s.to_string() +} + +/// Faithful port of v2's `formatQuery`: maps `clickbench_q07` → +/// `"CLICKBENCH Q7"`. Returns the original (uppercased, +/// `-` and `_` replaced with spaces) when no suite matches. +pub fn format_query(q: &str) -> String { + let lower = q.to_lowercase(); + for suite in QUERY_SUITES { + if suite.skip { + continue; + } + let prefix = suite.prefix; + if let Some(rest) = lower.strip_prefix(prefix) + && let Some(idx) = parse_query_index(rest) + { + return format!("{} Q{}", suite.query_prefix, idx); + } + } + let mut out = q.to_uppercase(); + out = out.replace(['_', '-'], " "); + out +} + +/// Parse the `_q07` / ` q7` / `q42` tail used by `format_query`. +/// Returns the integer query index if the tail matches the v2 regex +/// `^[_ ]?q(\d+)`. +fn parse_query_index(rest: &str) -> Option { + let after_sep = rest + .strip_prefix('_') + .or_else(|| rest.strip_prefix(' ')) + .unwrap_or(rest); + let after_q = after_sep + .strip_prefix('q') + .or_else(|| after_sep.strip_prefix('Q'))?; + let digits: String = after_q.chars().take_while(|c| c.is_ascii_digit()).collect(); + if digits.is_empty() { + return None; + } + digits.parse().ok() +} + +/// Faithful port of v2's `normalizeChartName`. +pub fn normalize_chart_name(group: &V2Group, chart_name: &str) -> String { + if matches!(group, V2Group::CompressionSize) && chart_name == "VORTEX FILE COMPRESSED SIZE" { + return "VORTEX SIZE".into(); + } + chart_name.to_string() +} + +/// Port of v2's `getGroup`. Returns `None` for skipped suites +/// (e.g. `fineweb`) or names that match nothing. +pub fn get_group(record: &V2Record) -> Option { + let lower = record.name.to_lowercase(); + + if lower.starts_with("random-access/") || lower.starts_with("random access/") { + return Some(V2Group::RandomAccess); + } + + if lower.starts_with("vortex size/") + || lower.starts_with("vortex-file-compressed size/") + || lower.starts_with("parquet size/") + || lower.starts_with("parquet-zstd size/") + || lower.starts_with("lance size/") + || lower.contains(":raw size/") + || lower.contains(":parquet-zstd size/") + || lower.contains(":lance size/") + { + return Some(V2Group::CompressionSize); + } + + if lower.starts_with("compress time/") + || lower.starts_with("decompress time/") + || lower.starts_with("parquet_rs-zstd compress") + || lower.starts_with("parquet_rs-zstd decompress") + || lower.starts_with("lance compress") + || lower.starts_with("lance decompress") + || lower.starts_with("vortex:lance ratio") + || lower.starts_with("vortex:parquet-zstd ratio") + // Typo'd v2 emitter wrote `parquet-zst` (no `d`) for some + // ratio records; match both spellings so they classify as + // derived ratios instead of falling through to Unknown. + || lower.starts_with("vortex:parquet-zst ratio") + || lower.starts_with("vortex:raw ratio") + { + return Some(V2Group::Compression); + } + + for (i, suite) in QUERY_SUITES.iter().enumerate() { + let prefix_q = format!("{}_q", suite.prefix); + let prefix_slash = format!("{}/", suite.prefix); + if !lower.starts_with(&prefix_q) && !lower.starts_with(&prefix_slash) { + continue; + } + if suite.skip { + return None; + } + if !suite.fan_out { + return Some(V2Group::Query { + suite_index: i, + storage: None, + scale_factor: None, + }); + } + let storage = match record.storage.as_deref().map(str::to_uppercase).as_deref() { + Some("S3") => "S3", + _ => "NVMe", + }; + let dataset_key = suite.dataset_key.unwrap_or(suite.prefix); + let raw_sf = record + .dataset + .as_ref() + .and_then(|d| dataset_scale_factor(d, dataset_key)); + let sf = raw_sf + .as_deref() + .and_then(|s| s.parse::().ok()) + .map(|f| f.round() as i64) + .unwrap_or(1); + return Some(V2Group::Query { + suite_index: i, + storage: Some(storage.into()), + scale_factor: Some(sf.to_string()), + }); + } + + None +} + +/// Group + chart + series breakdown for a v2 record, using the same +/// rules `server.js` applies in `refresh()`. Equivalent to v2's +/// `(group, chartName, seriesName)` triple after rename / skip rules. +#[derive(Debug, Clone, PartialEq, Eq)] +pub struct V2Classification { + pub group: V2Group, + pub chart: String, + pub series: String, +} + +/// Apply the same chart / series naming v2's `refresh()` does, plus +/// the throughput / `PARQUET-UNC` skip rules. +pub fn classify_v2(record: &V2Record) -> Option { + if record.name.contains(" throughput") { + return None; + } + let group = get_group(record)?; + let parts: Vec<&str> = record.name.split('/').collect(); + let (chart, series) = match (&group, parts.len()) { + (V2Group::RandomAccess, 4) => { + let chart = format!("{}/{}", parts[1], parts[2]) + .to_uppercase() + .replace(['_', '-'], " "); + let series = rename_engine(if parts[3].is_empty() { + "default" + } else { + parts[3] + }); + (chart, series) + } + (V2Group::RandomAccess, 2) => ( + "RANDOM ACCESS".to_string(), + rename_engine(if parts[1].is_empty() { + "default" + } else { + parts[1] + }), + ), + (V2Group::RandomAccess, _) => return None, + _ => { + let series_raw = if parts.len() >= 2 && !parts[1].is_empty() { + parts[1] + } else { + "default" + }; + let series = rename_engine(series_raw); + let chart = format_query(parts[0]); + (chart, series) + } + }; + let chart = normalize_chart_name(&group, &chart); + if chart.contains("PARQUET-UNC") { + return None; + } + Some(V2Classification { + group, + chart, + series, + }) +} + +/// Mapping target: which v3 fact table a v2 record lands in, plus the +/// dim values that table needs. +#[derive(Debug, Clone, PartialEq)] +pub enum V3Bin { + Query { + dataset: String, + dataset_variant: Option, + scale_factor: Option, + query_idx: i32, + storage: String, + engine: String, + format: String, + }, + CompressionTime { + dataset: String, + dataset_variant: Option, + format: String, + op: String, + }, + CompressionSize { + dataset: String, + dataset_variant: Option, + format: String, + }, + RandomAccess { + dataset: String, + format: String, + }, +} + +/// Top-level entry point. Combines `classify_v2` with the v3 fact-table +/// mapping. Returns `None` for records that: +/// +/// - Don't match any v2 group (uncategorized prefix). +/// - Are explicitly skipped by v2 (throughput, PARQUET-UNC, fineweb). +/// - Are computed-at-read-time ratios that v3 derives from +/// `compression_sizes` (`vortex:parquet-zstd ratio …`, +/// `vortex:lance ratio …`, `vortex:raw ratio …`, +/// `vortex:* size/…`). +pub fn classify(record: &V2Record) -> Option { + let cls = classify_v2(record)?; + match &cls.group { + V2Group::RandomAccess => bin_random_access(&cls, record), + V2Group::Compression => bin_compression_time(&cls, record), + V2Group::CompressionSize => bin_compression_size(&cls, record), + V2Group::Query { .. } => bin_query(&cls, record), + } +} + +/// Reason the classifier dropped a record. Intentional skips (v2 +/// patterns v3 deliberately doesn't store) are NOT errors; they don't +/// count against the uncategorized gate. +#[derive(Debug, Clone, Copy, PartialEq, Eq)] +pub enum Skip { + /// `vortex:* ratio …` and `vortex:* size` — derived in v3 from + /// `compression_sizes` joined to itself. + DerivedRatio, + /// `throughput` records — v2 derived these from latencies. + Throughput, + /// A v2 query suite marked `skip: true` in QUERY_SUITES. + SkippedSuite, + /// random-access record with an unsupported part count. + UnsupportedShape, + /// Record had no `value` field. + NoValue, + /// Dim outside the v3 emitter's allowlist (e.g. `parquet-zstd`, + /// historical-only suites no longer in CI). + Deprecated, + /// v2 memory measurements (`*_memory/*` records). Carry top-level + /// `peak_physical_memory` / `peak_virtual_memory` / + /// `physical_memory_delta` / `virtual_memory_delta` fields that + /// `V2Record` doesn't deserialize. Not migrated for alpha; merging + /// into the corresponding QueryMeasurement row is future work. + HistoricalMemory, +} + +/// Engines the v3 emitter produces today. Anything else is historical +/// and gets bucketed as `Skip::Deprecated`. +/// +/// ORCHESTRATOR NOTE: confirm against `vortex-bench`'s `Engine` enum +/// before handing off; edit if the live set differs. +const V3_ENGINES: &[&str] = &["datafusion", "duckdb", "vortex", "arrow"]; + +/// Formats the v3 emitter produces today (`Format::name()` values). +/// +/// ORCHESTRATOR NOTE: confirm against `vortex-bench/src/lib.rs` +/// `Format::name()` before handing off. +const V3_FORMATS: &[&str] = &[ + "vortex-file-compressed", + "vortex-compact", + "parquet", + "lance", + "csv", + "arrow", + "duckdb", +]; + +/// Query suites the v3 CI runs today. Suites outside this list still +/// classify (so historical analyses stay coherent) but get bucketed +/// as `Skip::Deprecated` so they don't render as orphan charts in v3. +/// +/// `fineweb` is included because `.github/workflows/sql-benchmarks.yml` +/// still has `fineweb` and `fineweb-s3` matrix entries. `gharchive` +/// stays excluded — it's defined in `vortex-bench` but no current +/// workflow runs it. +const V3_QUERY_SUITES: &[&str] = &[ + "clickbench", + "tpch", + "tpcds", + "statpopgen", + "polarsignals", + "fineweb", +]; + +/// Returns true if every dim that v3 stores as a column is on the +/// emitter's current allowlist. Dim values outside the allowlist mean +/// historical-only formats / engines that the v3 UI has nothing to +/// render against. +fn is_v3_dim(bin: &V3Bin) -> bool { + match bin { + V3Bin::Query { engine, format, .. } => { + V3_ENGINES.contains(&engine.as_str()) && V3_FORMATS.contains(&format.as_str()) + } + V3Bin::CompressionTime { format, .. } + | V3Bin::CompressionSize { format, .. } + | V3Bin::RandomAccess { format, .. } => V3_FORMATS.contains(&format.as_str()), + } +} + +/// Outcome of running the classifier on a v2 record. Distinguishes +/// "we know we don't want this" (`Skip`) from "we don't recognize this" +/// (`Unknown`); the migrator's 5% gate fires only on the latter. +#[derive(Debug, Clone)] +pub enum Outcome { + Bin(V3Bin), + Skip(Skip), + Unknown, +} + +/// Like [`classify`], but reports *why* a record was dropped. Intended +/// for the migrator so the 5% uncategorized gate doesn't trip on +/// records v2 deliberately doesn't render (ratios, throughput, +/// skipped suites). +pub fn classify_outcome(record: &V2Record) -> Outcome { + if record.name.contains(" throughput") { + return Outcome::Skip(Skip::Throughput); + } + // v2 memory records: e.g. "clickbench_q07_memory/datafusion:parquet". + // Match the `_memory/` infix BEFORE the engine/format split, so they + // route to a known Skip variant instead of slipping through to + // Outcome::Unknown and tripping the 5% gate. + let lower = record.name.to_lowercase(); + if let Some((head, _)) = lower.split_once('/') + && head.ends_with("_memory") + { + return Outcome::Skip(Skip::HistoricalMemory); + } + let Some(group) = get_group(record) else { + return Outcome::Unknown; + }; + if let V2Group::Query { suite_index, .. } = &group + && QUERY_SUITES[*suite_index].skip + { + return Outcome::Skip(Skip::SkippedSuite); + } + let Some(cls) = classify_v2(record) else { + // get_group succeeded but classify_v2 didn't — shape mismatch. + return Outcome::Skip(Skip::UnsupportedShape); + }; + let derived = match &cls.group { + V2Group::Compression => { + let lc = cls.chart.to_lowercase(); + lc.contains("ratio") || lc.contains(':') + } + V2Group::CompressionSize => cls.chart.to_lowercase().contains(':'), + _ => false, + }; + if derived { + return Outcome::Skip(Skip::DerivedRatio); + } + let bin = match &cls.group { + V2Group::RandomAccess => bin_random_access(&cls, record), + V2Group::Compression => bin_compression_time(&cls, record), + V2Group::CompressionSize => bin_compression_size(&cls, record), + V2Group::Query { .. } => bin_query(&cls, record), + }; + let Some(bin) = bin else { + return Outcome::Unknown; + }; + if !is_v3_dim(&bin) { + return Outcome::Skip(Skip::Deprecated); + } + if let V2Group::Query { suite_index, .. } = &group + && !V3_QUERY_SUITES.contains(&QUERY_SUITES[*suite_index].prefix) + { + return Outcome::Skip(Skip::Deprecated); + } + Outcome::Bin(bin) +} + +fn bin_random_access(cls: &V2Classification, record: &V2Record) -> Option { + // v2 chart name shape: "RANDOM ACCESS" or "DATASET/PATTERN" (uppercase). + // We store it as the v3 dataset value verbatim, lowercased so + // `/api/groups` returns canonical lowercase names. + let dataset = cls.chart.to_lowercase(); + if dataset.is_empty() { + return None; + } + // Pull format from the raw, pre-rename v2 name so v3 stores the + // canonical `Format::name()` string (matching what the v3 live + // emitter writes). Raw shape is + // `random-access///-tokio-local-disk` + // (4-part) or `random-access/-tokio-local-disk` (2-part + // legacy). After stripping the `-tokio-local-disk` suffix, map the + // v2 random-access ext label (`vortex`, from `Format::ext()`) to + // the canonical name (`vortex-file-compressed`, from + // `Format::name()`). `parquet` and `lance` match between ext and + // name. The `vortex` ext is shared by both `OnDiskVortex` (name + // `vortex-file-compressed`) and `VortexCompact` (name + // `vortex-compact`), but v2's random-access bench only emitted + // `OnDiskVortex`, so mapping to `vortex-file-compressed` is + // correct for all historical data. + let parts: Vec<&str> = record.name.split('/').collect(); + let raw = match parts.len() { + 4 => parts[3], + 2 => parts[1], + _ => return None, + }; + if raw.is_empty() || raw == "default" { + return None; + } + let stripped = raw.strip_suffix("-tokio-local-disk").unwrap_or(raw); + let format = match stripped { + "vortex" => "vortex-file-compressed".to_string(), + other => other.to_lowercase(), + }; + Some(V3Bin::RandomAccess { dataset, format }) +} + +fn bin_compression_time(cls: &V2Classification, _record: &V2Record) -> Option { + // v2 compression chart names look like (after format_query): + // "COMPRESS TIME" [vortex/encode] + // "DECOMPRESS TIME" [vortex/decode] + // "PARQUET RS ZSTD COMPRESS TIME" [parquet/encode] + // "PARQUET RS ZSTD DECOMPRESS TIME" [parquet/decode] + // "LANCE COMPRESS TIME" [lance/encode] + // "LANCE DECOMPRESS TIME" [lance/decode] + // "VORTEX:LANCE RATIO COMPRESS TIME" [drop] + // "VORTEX:PARQUET-ZSTD RATIO COMPRESS TIME" [drop] + // "VORTEX:RAW RATIO COMPRESS TIME" [drop] + let lc = cls.chart.to_lowercase(); + if lc.contains("ratio") || lc.contains(':') { + // Ratios are computed at read time from compression_sizes. + return None; + } + let (format, op) = if lc.starts_with("compress time") { + ("vortex-file-compressed", "encode") + } else if lc.starts_with("decompress time") { + ("vortex-file-compressed", "decode") + } else if lc.starts_with("parquet rs zstd compress time") { + ("parquet", "encode") + } else if lc.starts_with("parquet rs zstd decompress time") { + ("parquet", "decode") + } else if lc.starts_with("lance compress time") { + ("lance", "encode") + } else if lc.starts_with("lance decompress time") { + ("lance", "decode") + } else { + return None; + }; + let dataset = cls.series.to_lowercase(); + if dataset.is_empty() || dataset == "default" { + return None; + } + Some(V3Bin::CompressionTime { + dataset, + dataset_variant: None, + format: format.to_string(), + op: op.to_string(), + }) +} + +fn bin_compression_size(cls: &V2Classification, record: &V2Record) -> Option { + let lc = cls.chart.to_lowercase(); + // Ratios like "VORTEX:PARQUET ZSTD SIZE" / "VORTEX:LANCE SIZE" / + // "VORTEX:RAW SIZE" are derived from compression_sizes at read + // time, not stored. + if lc.contains(':') { + return None; + } + // `parquet-zstd size` shares a leading "parquet" with `parquet size`, + // so check the more specific prefix first. `format_query` upper-cases + // and replaces `-`/`_` with spaces, so the chart we match against is + // `"PARQUET ZSTD SIZE"` (no hyphen) — same convention as the existing + // `"parquet rs zstd compress time"` branches above. + let format = if lc.starts_with("vortex size") { + "vortex-file-compressed" + } else if lc.starts_with("parquet zstd size") { + "parquet-zstd" + } else if lc.starts_with("parquet size") { + "parquet" + } else if lc.starts_with("lance size") { + "lance" + } else { + return None; + }; + let dataset = cls.series.to_lowercase(); + if dataset.is_empty() || dataset == "default" { + return None; + } + // Mirror the file-sizes ingest path's dataset_variant derivation + // (see `migrate::migrate_file_sizes`): pull the SF out of the v2 + // record's `dataset` object when present, drop empty / "1.0". + // Without this both code paths produce the same `mid` only by + // accident, so SF=10 file-sizes rows wouldn't merge with the + // matching data.json.gz "vortex size/tpch" rows. + let dataset_variant = record + .dataset + .as_ref() + .and_then(|d| crate::v2::dataset_scale_factor(d, dataset.as_str())) + .filter(|s| !s.is_empty() && s.as_str() != "1.0"); + Some(V3Bin::CompressionSize { + dataset, + dataset_variant, + format: format.to_string(), + }) +} + +fn bin_query(cls: &V2Classification, record: &V2Record) -> Option { + let V2Group::Query { + suite_index, + storage, + scale_factor, + } = &cls.group + else { + return None; + }; + let suite = &QUERY_SUITES[*suite_index]; + + // Pull the query index from the *raw* name's first part instead of + // the formatted chart, so we don't have to round-trip "Q07". + let raw_first = record.name.split('/').next().unwrap_or(""); + let query_idx = parse_query_index_from_first(raw_first)?; + + // Pull engine:format from the raw, pre-rename second segment so v3 + // stores canonical `Format::name()` strings (e.g. + // `vortex-file-compressed`) that match what the v3 live emitter + // writes. `cls.series` has been through v2's `ENGINE_RENAMES` for + // UI display and is not appropriate for v3 columns. + // + // Older v2 records emitted display-case engines (e.g. `DataFusion`, + // `DuckDB`); newer ones emit lowercase. Lowercase here so dedup + // collapses both spellings into a single canonical row. + let raw_series = record.name.split('/').nth(1)?; + let (engine, format) = split_engine_format(raw_series)?; + let engine = engine.to_lowercase(); + let format = format.to_lowercase(); + + let storage_v3 = match storage.as_deref() { + Some("S3") => "s3".to_string(), + Some("NVMe") => "nvme".to_string(), + _ => "nvme".to_string(), + }; + + // ClickBench's "flavor" lives in dataset_variant per benchmark-mapping.md + // - we don't have it from a v2 name string, so we leave it None. + Some(V3Bin::Query { + dataset: suite.prefix.to_string(), + dataset_variant: None, + scale_factor: scale_factor.clone(), + query_idx, + storage: storage_v3, + engine, + format, + }) +} + +/// Pull the integer query index out of the leading name part, which is +/// always `_q` or ` q` for SQL query records. +fn parse_query_index_from_first(first: &str) -> Option { + let lower = first.to_lowercase(); + for suite in QUERY_SUITES { + if let Some(rest) = lower.strip_prefix(suite.prefix) + && let Some(idx) = parse_query_index(rest) + { + return Some(idx as i32); + } + } + None +} + +/// Split a renamed series like `datafusion:parquet` into +/// `(engine, format)`. Returns `None` for series with no `:` since +/// v3 requires both columns. +fn split_engine_format(series: &str) -> Option<(String, String)> { + let mut split = series.splitn(2, ':'); + let engine = split.next()?.trim().to_string(); + let format = split.next()?.trim().to_string(); + if engine.is_empty() || format.is_empty() { + return None; + } + Some((engine, format)) +} + +#[cfg(test)] +mod tests { + use super::*; + + fn record(name: &str) -> V2Record { + V2Record { + name: name.to_string(), + commit_id: Some("deadbeef".into()), + unit: None, + value: None, + storage: None, + dataset: None, + all_runtimes: None, + env_triple: None, + } + } + + #[test] + fn format_query_round_trips() { + assert_eq!(format_query("clickbench_q07"), "CLICKBENCH Q7"); + assert_eq!(format_query("tpch_q01"), "TPC-H Q1"); + assert_eq!(format_query("tpcds_q42"), "TPC-DS Q42"); + assert_eq!(format_query("statpopgen_q3"), "STATPOPGEN Q3"); + assert_eq!(format_query("foo bar"), "FOO BAR"); + } + + #[test] + fn rename_engine_canonicalizes_disk_names() { + assert_eq!(rename_engine("vortex-tokio-local-disk"), "vortex-nvme"); + assert_eq!( + rename_engine("datafusion:vortex-file-compressed"), + "datafusion:vortex" + ); + assert_eq!(rename_engine("unknown-engine"), "unknown-engine"); + } + + #[test] + fn parse_query_index_handles_separators() { + assert_eq!(parse_query_index("_q07"), Some(7)); + assert_eq!(parse_query_index(" q7"), Some(7)); + assert_eq!(parse_query_index("q42"), Some(42)); + assert_eq!(parse_query_index("xq7"), None); + } + + #[test] + fn random_access_bins_dataset_pattern() { + let bin = classify(&record("random-access/taxi/take/parquet")).unwrap(); + assert_eq!( + bin, + V3Bin::RandomAccess { + dataset: "taxi/take".into(), + format: "parquet".into(), + } + ); + } +} diff --git a/benchmarks-website/migrate/src/commits.rs b/benchmarks-website/migrate/src/commits.rs new file mode 100644 index 00000000000..28d63a5bd19 --- /dev/null +++ b/benchmarks-website/migrate/src/commits.rs @@ -0,0 +1,100 @@ +// SPDX-License-Identifier: Apache-2.0 +// SPDX-FileCopyrightText: Copyright the Vortex contributors + +//! Commit upserts. Adapts a [`crate::v2::V2Commit`] into the v3 +//! `commits` row shape (a [`vortex_bench_server::records::CommitInfo`]). + +use anyhow::Context as _; +use anyhow::Result; +use duckdb::Transaction; +use duckdb::params; + +use crate::v2::V2Commit; + +/// Insert a v3 `commits` row for one v2 commit. Missing fields are +/// filled with the empty string, matching the v3 schema's `NOT NULL` +/// constraints; the call site logs a warning for each fallback so +/// the operator can spot bad inputs. +pub fn upsert_commit(tx: &Transaction<'_>, commit: &V2Commit) -> Result { + let mut warnings = Vec::new(); + let timestamp = require_field(&commit.timestamp, "timestamp", &commit.id, &mut warnings); + let message = require_field(&commit.message, "message", &commit.id, &mut warnings); + let author_name = require_field( + &commit.author.as_ref().and_then(|p| p.name.clone()), + "author.name", + &commit.id, + &mut warnings, + ); + let author_email = require_field( + &commit.author.as_ref().and_then(|p| p.email.clone()), + "author.email", + &commit.id, + &mut warnings, + ); + let committer_name = require_field( + &commit.committer.as_ref().and_then(|p| p.name.clone()), + "committer.name", + &commit.id, + &mut warnings, + ); + let committer_email = require_field( + &commit.committer.as_ref().and_then(|p| p.email.clone()), + "committer.email", + &commit.id, + &mut warnings, + ); + let tree_sha = require_field(&commit.tree_id, "tree_id", &commit.id, &mut warnings); + let url = require_field(&commit.url, "url", &commit.id, &mut warnings); + + tx.execute( + r#" + INSERT INTO commits ( + commit_sha, timestamp, message, author_name, author_email, + committer_name, committer_email, tree_sha, url + ) VALUES (?, CAST(? AS TIMESTAMPTZ), ?, ?, ?, ?, ?, ?, ?) + ON CONFLICT (commit_sha) DO UPDATE SET + timestamp = excluded.timestamp, + message = excluded.message, + author_name = excluded.author_name, + author_email = excluded.author_email, + committer_name = excluded.committer_name, + committer_email = excluded.committer_email, + tree_sha = excluded.tree_sha, + url = excluded.url + "#, + params![ + commit.id, + timestamp, + message, + author_name, + author_email, + committer_name, + committer_email, + tree_sha, + url, + ], + ) + .with_context(|| format!("upserting commit {}", commit.id))?; + Ok(UpsertOutcome { warnings }) +} + +fn require_field( + field: &Option, + name: &str, + sha: &str, + warnings: &mut Vec, +) -> String { + match field { + Some(s) => s.clone(), + None => { + warnings.push(format!("commit {sha} missing {name}")); + String::new() + } + } +} + +/// Per-call warning bag returned to the caller for logging. +#[derive(Debug, Default)] +pub struct UpsertOutcome { + pub warnings: Vec, +} diff --git a/benchmarks-website/migrate/src/lib.rs b/benchmarks-website/migrate/src/lib.rs new file mode 100644 index 00000000000..5e8d9c64907 --- /dev/null +++ b/benchmarks-website/migrate/src/lib.rs @@ -0,0 +1,21 @@ +// SPDX-License-Identifier: Apache-2.0 +// SPDX-FileCopyrightText: Copyright the Vortex contributors + +//! One-shot historical migrator from v2's S3-hosted benchmark dataset +//! to a v3 DuckDB file. +//! +//! The v2 dataset is JSONL of bare benchmark records keyed by name string. +//! v3 uses five typed fact tables with explicit dim columns. This crate +//! ports v2's `getGroup` classifier (in `benchmarks-website/server.js`) +//! bug-for-bug so that historical rows survive the migration with the +//! same group / chart / series structure as the live v2 server. +//! +//! The migrator is throwaway: once v3 cuts over, both the binary and +//! the classifier go away. + +pub mod classifier; +pub mod commits; +pub mod migrate; +pub mod source; +pub mod v2; +pub mod verify; diff --git a/benchmarks-website/migrate/src/main.rs b/benchmarks-website/migrate/src/main.rs new file mode 100644 index 00000000000..366834ed441 --- /dev/null +++ b/benchmarks-website/migrate/src/main.rs @@ -0,0 +1,114 @@ +// SPDX-License-Identifier: Apache-2.0 +// SPDX-FileCopyrightText: Copyright the Vortex contributors + +//! `vortex-bench-migrate` CLI: a one-shot historical migrator from +//! v2's S3 dataset into a v3 DuckDB file, plus a structural diff +//! against the live v2 `/api/metadata` endpoint for spotting +//! classifier regressions. + +use std::path::PathBuf; +use std::process::ExitCode; + +use anyhow::Context as _; +use anyhow::Result; +use clap::Parser; +use clap::Subcommand; +use clap::ValueEnum; +use tracing_subscriber::EnvFilter; +use vortex_bench_migrate::migrate; +use vortex_bench_migrate::source::Source; +use vortex_bench_migrate::verify; + +/// One-shot historical migrator from v2's S3 dataset to v3 DuckDB. +#[derive(Debug, Parser)] +#[command(name = "vortex-bench-migrate", version, about)] +struct Cli { + #[command(subcommand)] + command: Command, +} + +#[derive(Debug, Subcommand)] +enum Command { + /// Read v2's data.json.gz / commits.json / file-sizes-*.json.gz + /// and write a fully populated v3 DuckDB at `--output`. + Run { + /// Path to write the v3 DuckDB to. Created if absent. + #[arg(long)] + output: PathBuf, + /// Where to fetch v2 dumps from. + #[arg(long, value_enum, default_value_t = SourceKind::PublicS3)] + source: SourceKind, + /// For `--source=local`, the directory containing + /// `data.json.gz`, `commits.json`, and `file-sizes-*.json.gz`. + #[arg(long, required_if_eq("source", "local"))] + source_dir: Option, + }, + /// Diff a migrated DuckDB against the live v2 `/api/metadata` + /// endpoint. Exits 0 if every v2 group is present in v3, 1 + /// otherwise so this can gate a CI step. + Verify { + /// HTTPS root of a running v2 server (e.g. `https://bench.vortex.dev`). + #[arg(long)] + against: String, + /// Path to the migrated v3 DuckDB. + #[arg(long)] + duckdb: PathBuf, + }, +} + +#[derive(Debug, Clone, Copy, ValueEnum)] +enum SourceKind { + PublicS3, + Local, +} + +fn main() -> ExitCode { + if let Err(err) = run() { + eprintln!("error: {err:#}"); + return ExitCode::from(2); + } + ExitCode::SUCCESS +} + +fn run() -> Result<()> { + tracing_subscriber::fmt() + .with_env_filter( + EnvFilter::try_from_env("VORTEX_BENCH_LOG").unwrap_or_else(|_| EnvFilter::new("info")), + ) + .init(); + + let cli = Cli::parse(); + match cli.command { + Command::Run { + output, + source, + source_dir, + } => { + let source = match source { + SourceKind::PublicS3 => Source::PublicS3, + SourceKind::Local => { + Source::Local(source_dir.context("--source=local requires --source-dir")?) + } + }; + let summary = migrate::run(&source, &output)?; + print!("{summary}"); + if summary.uncategorized_fraction() > 0.05 { + anyhow::bail!( + "uncategorized records ({:.2}%) exceed the 5% gate; \ + stop and report unmatched prefixes (see summary above) \ + before proceeding", + 100.0 * summary.uncategorized_fraction() + ); + } + Ok(()) + } + Command::Verify { against, duckdb } => { + let report = verify::run(&against, &duckdb)?; + print!("{report}"); + if !report.v2_groups_covered() { + std::process::exit(1); + } + Ok(()) + } + } +} diff --git a/benchmarks-website/migrate/src/migrate.rs b/benchmarks-website/migrate/src/migrate.rs new file mode 100644 index 00000000000..7b3b32bb51c --- /dev/null +++ b/benchmarks-website/migrate/src/migrate.rs @@ -0,0 +1,836 @@ +// SPDX-License-Identifier: Apache-2.0 +// SPDX-FileCopyrightText: Copyright the Vortex contributors + +//! End-to-end migration of one v2 dataset into a v3 DuckDB file. +//! +//! Streams `data.json.gz` line-by-line, runs each record through the +//! [`classifier`], and writes one row per record into the appropriate v3 fact table. +//! Every row's `measurement_id` is computed via the server's `measurement_id_*` functions so the +//! result is byte-compatible with what fresh `/api/ingest` would have produced. +//! +//! Bulk-load shape: rows are accumulated in memory as parallel column +//! vectors, deduplicated by `measurement_id`, then flushed to DuckDB +//! via `Appender::append_record_batch` as one Arrow `RecordBatch` per +//! fact table. + +use std::collections::BTreeMap; +use std::io::BufRead; +use std::path::Path; +use std::sync::Arc; +use std::time::Duration; +use std::time::Instant; + +use anyhow::Context as _; +use anyhow::Result; +use arrow_array::ArrayRef; +use arrow_array::Int32Array; +use arrow_array::Int64Array; +use arrow_array::ListArray; +use arrow_array::RecordBatch; +use arrow_array::StringArray; +use arrow_buffer::OffsetBuffer; +use arrow_schema::DataType; +use arrow_schema::Field; +use arrow_schema::Schema; +use duckdb::Connection; +use tracing::info; +use tracing::warn; +use vortex_bench_server::db::measurement_id_compression_size; +use vortex_bench_server::db::measurement_id_compression_time; +use vortex_bench_server::db::measurement_id_query; +use vortex_bench_server::db::measurement_id_random_access; +use vortex_bench_server::records::CompressionSize; +use vortex_bench_server::records::CompressionTime; +use vortex_bench_server::records::QueryMeasurement; +use vortex_bench_server::records::RandomAccessTime; +use vortex_bench_server::schema::SCHEMA_DDL; +use vortex_utils::aliases::hash_map::HashMap; + +use crate::classifier; +use crate::classifier::V3Bin; +use crate::commits::upsert_commit; +use crate::source::Source; +use crate::v2::V2Commit; +use crate::v2::V2FileSize; +use crate::v2::V2Record; +use crate::v2::index_commits; +use crate::v2::runtime_as_i64; +use crate::v2::value_as_f64; + +/// Per-table insert counts, plus skip / missing counts. +#[derive(Debug, Default, Clone)] +pub struct MigrationSummary { + pub records_read: u64, + pub query_inserted: u64, + pub compression_time_inserted: u64, + pub compression_size_inserted: u64, + pub random_access_inserted: u64, + pub file_size_inserted: u64, + pub uncategorized: u64, + pub uncategorized_prefixes: BTreeMap, + pub missing_commit: u64, + pub commit_warnings: u64, + pub skipped_no_value: u64, + pub skipped_intentional: u64, + pub commits_inserted: u64, + pub deduped: u64, + /// Number of records dropped by dedup whose `value_ns` (or + /// `value_bytes` for compression_sizes' replace path) differed + /// from the kept row's. Non-zero is a smell worth investigating. + pub deduped_with_conflict: u64, +} + +impl MigrationSummary { + /// Total `data.json.gz` records that landed in some v3 fact table. + pub fn total_inserted(&self) -> u64 { + self.query_inserted + + self.compression_time_inserted + + self.compression_size_inserted + + self.random_access_inserted + } + + /// Fraction of records that were uncategorized. The orchestrator + /// stops if this exceeds the documented 5% threshold. + pub fn uncategorized_fraction(&self) -> f64 { + if self.records_read == 0 { + return 0.0; + } + self.uncategorized as f64 / self.records_read as f64 + } +} + +/// Open or create a DuckDB at `path` and apply the v3 schema. The +/// migrator is a one-shot fresh load; the bulk-append flush is pure +/// insert (no `ON CONFLICT`), so any stale rows in `path` would clash +/// with the next run on the same primary keys. Delete both the +/// database file and its WAL companion up front so every run starts +/// from a known-empty state. +pub fn open_target_db(path: &Path) -> Result { + remove_if_exists(path)?; + let wal = wal_path(path); + remove_if_exists(&wal)?; + let conn = + Connection::open(path).with_context(|| format!("opening DuckDB at {}", path.display()))?; + conn.execute_batch(SCHEMA_DDL) + .context("applying v3 schema DDL")?; + Ok(conn) +} + +fn remove_if_exists(path: &Path) -> Result<()> { + match std::fs::remove_file(path) { + Ok(()) => { + info!(path = %path.display(), "removed pre-existing target file"); + Ok(()) + } + Err(e) if e.kind() == std::io::ErrorKind::NotFound => Ok(()), + Err(e) => Err(e).with_context(|| format!("removing {}", path.display())), + } +} + +/// DuckDB writes its write-ahead log next to the database file with a +/// `.wal` suffix appended (e.g. `v3.duckdb` -> `v3.duckdb.wal`). +fn wal_path(path: &Path) -> std::path::PathBuf { + let mut name = path.as_os_str().to_owned(); + name.push(".wal"); + std::path::PathBuf::from(name) +} + +/// Run the whole migration: commits, data.json.gz, and every +/// file-sizes-*.json.gz under the source. +pub fn run(source: &Source, target: &Path) -> Result { + let mut conn = open_target_db(target)?; + let mut summary = MigrationSummary::default(); + + info!(source = %source.describe(), "Reading commits.json"); + let commits = read_commits(source)?; + info!(commits = commits.len(), "Loaded commits"); + summary.commits_inserted = upsert_all_commits(&mut conn, &commits, &mut summary)?; + + let mut q = QueryAccum::default(); + let mut ct = CompressionTimeAccum::default(); + let mut cs = CompressionSizeAccum::default(); + let mut ra = RandomAccessAccum::default(); + + info!("Migrating data.json.gz"); + migrate_data_jsonl( + source, + &commits, + &mut summary, + &mut q, + &mut ct, + &mut cs, + &mut ra, + )?; + info!(records = summary.records_read, "data.json.gz done"); + + for name in source.list_file_sizes()? { + info!(name = %name, "Migrating file-sizes"); + if let Err(e) = migrate_file_sizes(source, &name, &commits, &mut summary, &mut cs) { + warn!("file-sizes file {name} failed: {e:#}"); + } + } + + info!("Flushing accumulators to DuckDB"); + summary.query_inserted = q.measurement_id.len() as u64; + summary.compression_time_inserted = ct.measurement_id.len() as u64; + summary.random_access_inserted = ra.measurement_id.len() as u64; + summary.compression_size_inserted = cs.rows.len() as u64; + + flush(&conn, "query_measurements", build_query_batch(q)?)?; + flush( + &conn, + "compression_times", + build_compression_time_batch(ct)?, + )?; + flush(&conn, "random_access_times", build_random_access_batch(ra)?)?; + flush( + &conn, + "compression_sizes", + build_compression_size_batch(cs)?, + )?; + + Ok(summary) +} + +fn read_commits(source: &Source) -> Result> { + let reader = source.open_commits_jsonl()?; + let mut commits: Vec = Vec::new(); + for line in reader.lines() { + let line = line?; + let trimmed = line.trim(); + if trimmed.is_empty() { + continue; + } + match serde_json::from_str::(trimmed) { + Ok(c) => commits.push(c), + Err(e) => warn!("skipping malformed commits.json line: {e}"), + } + } + Ok(index_commits(commits)) +} + +fn upsert_all_commits( + conn: &mut Connection, + commits: &BTreeMap, + summary: &mut MigrationSummary, +) -> Result { + let tx = conn.transaction().context("begin commits transaction")?; + let mut count = 0u64; + for commit in commits.values() { + let outcome = upsert_commit(&tx, commit)?; + for w in outcome.warnings { + warn!("{w}"); + summary.commit_warnings += 1; + } + count += 1; + } + tx.commit().context("commit commits transaction")?; + Ok(count) +} + +/// Stream `data.json.gz` and push classified records into the +/// per-table accumulators. Dedup happens inside each accumulator's +/// `push` method by `measurement_id`. +fn migrate_data_jsonl( + source: &Source, + commits: &BTreeMap, + summary: &mut MigrationSummary, + q: &mut QueryAccum, + ct: &mut CompressionTimeAccum, + cs: &mut CompressionSizeAccum, + ra: &mut RandomAccessAccum, +) -> Result<()> { + let reader = source.open_data_jsonl()?; + let started = Instant::now(); + let mut last_log = Instant::now(); + for line in reader.lines() { + let line = line?; + let trimmed = line.trim(); + if trimmed.is_empty() { + continue; + } + summary.records_read += 1; + let record: V2Record = match serde_json::from_str(trimmed) { + Ok(r) => r, + Err(e) => { + warn!("skipping malformed data.json line: {e}"); + continue; + } + }; + apply_v2_record(&record, commits, summary, q, ct, cs, ra); + if last_log.elapsed() >= Duration::from_secs(5) { + let elapsed = started.elapsed().as_secs_f64(); + let rate = summary.records_read as f64 / elapsed.max(0.001); + info!( + records = summary.records_read, + rate = format!("{rate:.0}/s"), + query = q.measurement_id.len(), + compression_time = ct.measurement_id.len(), + compression_size = cs.rows.len(), + random_access = ra.measurement_id.len(), + "migration progress", + ); + last_log = Instant::now(); + } + } + Ok(()) +} + +fn apply_v2_record( + record: &V2Record, + commits: &BTreeMap, + summary: &mut MigrationSummary, + q: &mut QueryAccum, + ct: &mut CompressionTimeAccum, + cs: &mut CompressionSizeAccum, + ra: &mut RandomAccessAccum, +) { + let Some(sha) = record.commit_id.clone() else { + summary.missing_commit += 1; + return; + }; + if !commits.contains_key(&sha) { + summary.missing_commit += 1; + return; + } + + let bin = match classifier::classify_outcome(record) { + classifier::Outcome::Bin(b) => b, + classifier::Outcome::Skip(_) => { + summary.skipped_intentional += 1; + return; + } + classifier::Outcome::Unknown => { + summary.uncategorized += 1; + let prefix = record.name.split('/').next().unwrap_or("").to_string(); + *summary.uncategorized_prefixes.entry(prefix).or_insert(0) += 1; + return; + } + }; + + let env_triple = record.env_triple.as_ref().and_then(|t| t.to_triple()); + let runtimes = record + .all_runtimes + .as_ref() + .map(|v| v.iter().filter_map(runtime_as_i64).collect::>()) + .unwrap_or_default(); + let value_f64 = match record.value.as_ref().and_then(value_as_f64) { + Some(v) => v, + None => { + summary.skipped_no_value += 1; + return; + } + }; + + match bin { + V3Bin::Query { + dataset, + dataset_variant, + scale_factor, + query_idx, + storage, + engine, + format, + } => { + let qm = QueryMeasurement { + commit_sha: sha, + dataset, + dataset_variant, + scale_factor, + query_idx, + storage, + engine, + format, + value_ns: value_f64 as i64, + all_runtimes_ns: runtimes, + peak_physical: None, + peak_virtual: None, + physical_delta: None, + virtual_delta: None, + env_triple, + }; + let mid = measurement_id_query(&qm); + q.push(mid, qm, summary); + } + V3Bin::CompressionTime { + dataset, + dataset_variant, + format, + op, + } => { + let ctr = CompressionTime { + commit_sha: sha, + dataset, + dataset_variant, + format, + op, + value_ns: value_f64 as i64, + all_runtimes_ns: runtimes, + env_triple, + }; + let mid = measurement_id_compression_time(&ctr); + ct.push(mid, ctr, summary); + } + V3Bin::CompressionSize { + dataset, + dataset_variant, + format, + } => { + let csr = CompressionSize { + commit_sha: sha, + dataset, + dataset_variant, + format, + value_bytes: value_f64 as i64, + }; + let mid = measurement_id_compression_size(&csr); + cs.push_replace(mid, csr, summary); + } + V3Bin::RandomAccess { dataset, format } => { + let rar = RandomAccessTime { + commit_sha: sha, + dataset, + format, + value_ns: value_f64 as i64, + all_runtimes_ns: runtimes, + env_triple, + }; + let mid = measurement_id_random_access(&rar); + ra.push(mid, rar, summary); + } + } +} + +fn migrate_file_sizes( + source: &Source, + name: &str, + commits: &BTreeMap, + summary: &mut MigrationSummary, + cs: &mut CompressionSizeAccum, +) -> Result<()> { + let reader = source.open_file_sizes(name)?; + let dataset_fallback = name + .strip_prefix("file-sizes-") + .and_then(|s| s.strip_suffix(".json.gz")) + .unwrap_or(name) + .to_string(); + let started = Instant::now(); + let mut last_log = Instant::now(); + for line in reader.lines() { + let line = line?; + let trimmed = line.trim(); + if trimmed.is_empty() { + continue; + } + let sz: V2FileSize = match serde_json::from_str(trimmed) { + Ok(r) => r, + Err(e) => { + warn!("skipping malformed {name} line: {e}"); + continue; + } + }; + if !commits.contains_key(&sz.commit_id) { + summary.missing_commit += 1; + continue; + } + let dataset = if sz.benchmark.is_empty() { + dataset_fallback.clone() + } else { + sz.benchmark.clone() + }; + let dataset_variant = sz + .scale_factor + .as_ref() + .filter(|s| !s.is_empty() && s.as_str() != "1.0") + .cloned(); + let csr = CompressionSize { + commit_sha: sz.commit_id.clone(), + dataset, + dataset_variant, + format: sz.format.clone(), + value_bytes: sz.size_bytes, + }; + let mid = measurement_id_compression_size(&csr); + cs.push_sum(mid, csr); + summary.file_size_inserted += 1; + if last_log.elapsed() >= Duration::from_secs(5) { + let elapsed = started.elapsed().as_secs_f64(); + let rate = summary.file_size_inserted as f64 / elapsed.max(0.001); + info!( + name = %name, + file_sizes = summary.file_size_inserted, + rate = format!("{rate:.0}/s"), + "file-sizes progress", + ); + last_log = Instant::now(); + } + } + Ok(()) +} + +/// Append an Arrow `RecordBatch` to a DuckDB table via `Appender`. +fn flush(conn: &Connection, table: &str, batch: RecordBatch) -> Result<()> { + let mut app = conn + .appender(table) + .with_context(|| format!("opening appender for {table}"))?; + app.append_record_batch(batch) + .with_context(|| format!("appending record batch to {table}"))?; + drop(app); + Ok(()) +} + +#[derive(Default)] +struct QueryAccum { + measurement_id: Vec, + commit_sha: Vec, + dataset: Vec, + dataset_variant: Vec>, + scale_factor: Vec>, + query_idx: Vec, + storage: Vec, + engine: Vec, + format: Vec, + value_ns: Vec, + all_runtimes_ns: Vec>, + peak_physical: Vec>, + peak_virtual: Vec>, + physical_delta: Vec>, + virtual_delta: Vec>, + env_triple: Vec>, + /// `mid` -> index in the parallel column vecs. Lets us look up the + /// kept row's `value_ns` on collision so we can flag conflicts. + seen: HashMap, +} + +impl QueryAccum { + fn push(&mut self, mid: i64, r: QueryMeasurement, summary: &mut MigrationSummary) { + if let Some(&idx) = self.seen.get(&mid) { + summary.deduped += 1; + if self.value_ns[idx] != r.value_ns { + summary.deduped_with_conflict += 1; + } + return; + } + let idx = self.measurement_id.len(); + self.seen.insert(mid, idx); + self.measurement_id.push(mid); + self.commit_sha.push(r.commit_sha); + self.dataset.push(r.dataset); + self.dataset_variant.push(r.dataset_variant); + self.scale_factor.push(r.scale_factor); + self.query_idx.push(r.query_idx); + self.storage.push(r.storage); + self.engine.push(r.engine); + self.format.push(r.format); + self.value_ns.push(r.value_ns); + self.all_runtimes_ns.push(r.all_runtimes_ns); + self.peak_physical.push(r.peak_physical); + self.peak_virtual.push(r.peak_virtual); + self.physical_delta.push(r.physical_delta); + self.virtual_delta.push(r.virtual_delta); + self.env_triple.push(r.env_triple); + } +} + +#[derive(Default)] +struct CompressionTimeAccum { + measurement_id: Vec, + commit_sha: Vec, + dataset: Vec, + dataset_variant: Vec>, + format: Vec, + op: Vec, + value_ns: Vec, + all_runtimes_ns: Vec>, + env_triple: Vec>, + seen: HashMap, +} + +impl CompressionTimeAccum { + fn push(&mut self, mid: i64, r: CompressionTime, summary: &mut MigrationSummary) { + if let Some(&idx) = self.seen.get(&mid) { + summary.deduped += 1; + if self.value_ns[idx] != r.value_ns { + summary.deduped_with_conflict += 1; + } + return; + } + let idx = self.measurement_id.len(); + self.seen.insert(mid, idx); + self.measurement_id.push(mid); + self.commit_sha.push(r.commit_sha); + self.dataset.push(r.dataset); + self.dataset_variant.push(r.dataset_variant); + self.format.push(r.format); + self.op.push(r.op); + self.value_ns.push(r.value_ns); + self.all_runtimes_ns.push(r.all_runtimes_ns); + self.env_triple.push(r.env_triple); + } +} + +#[derive(Default)] +struct RandomAccessAccum { + measurement_id: Vec, + commit_sha: Vec, + dataset: Vec, + format: Vec, + value_ns: Vec, + all_runtimes_ns: Vec>, + env_triple: Vec>, + seen: HashMap, +} + +impl RandomAccessAccum { + fn push(&mut self, mid: i64, r: RandomAccessTime, summary: &mut MigrationSummary) { + if let Some(&idx) = self.seen.get(&mid) { + summary.deduped += 1; + if self.value_ns[idx] != r.value_ns { + summary.deduped_with_conflict += 1; + } + return; + } + let idx = self.measurement_id.len(); + self.seen.insert(mid, idx); + self.measurement_id.push(mid); + self.commit_sha.push(r.commit_sha); + self.dataset.push(r.dataset); + self.format.push(r.format); + self.value_ns.push(r.value_ns); + self.all_runtimes_ns.push(r.all_runtimes_ns); + self.env_triple.push(r.env_triple); + } +} + +/// `compression_sizes` is fed by both data.json.gz (replace-on-collision) +/// and file-sizes-*.json.gz (sum-on-collision). Stored as a map; converted +/// to a `RecordBatch` at flush time. +#[derive(Default)] +struct CompressionSizeAccum { + rows: HashMap, +} + +impl CompressionSizeAccum { + /// data.json.gz path: latest write wins, mirroring the prior + /// `ON CONFLICT DO UPDATE SET value_bytes = excluded.value_bytes`. + /// Bumps `deduped_with_conflict` when an existing row's + /// `value_bytes` differs from the incoming row's, so silent + /// value-corruption is observable. + fn push_replace(&mut self, mid: i64, r: CompressionSize, summary: &mut MigrationSummary) { + if let Some(existing) = self.rows.get(&mid) + && existing.value_bytes != r.value_bytes + { + summary.deduped_with_conflict += 1; + } + self.rows.insert(mid, r); + } + + /// file-sizes-*.json.gz path: per-file rows aggregate into one + /// `(commit, dataset, dataset_variant, format)` row by summing, + /// mirroring the prior `value_bytes = compression_sizes.value_bytes + /// + excluded.value_bytes`. + fn push_sum(&mut self, mid: i64, r: CompressionSize) { + let add = r.value_bytes; + self.rows + .entry(mid) + .and_modify(|x| x.value_bytes += add) + .or_insert(r); + } +} + +fn build_query_batch(a: QueryAccum) -> Result { + let schema = Arc::new(Schema::new(vec![ + Field::new("measurement_id", DataType::Int64, false), + Field::new("commit_sha", DataType::Utf8, false), + Field::new("dataset", DataType::Utf8, false), + Field::new("dataset_variant", DataType::Utf8, true), + Field::new("scale_factor", DataType::Utf8, true), + Field::new("query_idx", DataType::Int32, false), + Field::new("storage", DataType::Utf8, false), + Field::new("engine", DataType::Utf8, false), + Field::new("format", DataType::Utf8, false), + Field::new("value_ns", DataType::Int64, false), + Field::new( + "all_runtimes_ns", + DataType::List(Arc::new(Field::new("item", DataType::Int64, false))), + false, + ), + Field::new("peak_physical", DataType::Int64, true), + Field::new("peak_virtual", DataType::Int64, true), + Field::new("physical_delta", DataType::Int64, true), + Field::new("virtual_delta", DataType::Int64, true), + Field::new("env_triple", DataType::Utf8, true), + ])); + let cols: Vec = vec![ + Arc::new(Int64Array::from(a.measurement_id)), + Arc::new(StringArray::from(a.commit_sha)), + Arc::new(StringArray::from(a.dataset)), + Arc::new(StringArray::from(a.dataset_variant)), + Arc::new(StringArray::from(a.scale_factor)), + Arc::new(Int32Array::from(a.query_idx)), + Arc::new(StringArray::from(a.storage)), + Arc::new(StringArray::from(a.engine)), + Arc::new(StringArray::from(a.format)), + Arc::new(Int64Array::from(a.value_ns)), + Arc::new(build_list_int64(a.all_runtimes_ns)), + Arc::new(Int64Array::from(a.peak_physical)), + Arc::new(Int64Array::from(a.peak_virtual)), + Arc::new(Int64Array::from(a.physical_delta)), + Arc::new(Int64Array::from(a.virtual_delta)), + Arc::new(StringArray::from(a.env_triple)), + ]; + Ok(RecordBatch::try_new(schema, cols)?) +} + +fn build_compression_time_batch(a: CompressionTimeAccum) -> Result { + let schema = Arc::new(Schema::new(vec![ + Field::new("measurement_id", DataType::Int64, false), + Field::new("commit_sha", DataType::Utf8, false), + Field::new("dataset", DataType::Utf8, false), + Field::new("dataset_variant", DataType::Utf8, true), + Field::new("format", DataType::Utf8, false), + Field::new("op", DataType::Utf8, false), + Field::new("value_ns", DataType::Int64, false), + Field::new( + "all_runtimes_ns", + DataType::List(Arc::new(Field::new("item", DataType::Int64, false))), + false, + ), + Field::new("env_triple", DataType::Utf8, true), + ])); + let cols: Vec = vec![ + Arc::new(Int64Array::from(a.measurement_id)), + Arc::new(StringArray::from(a.commit_sha)), + Arc::new(StringArray::from(a.dataset)), + Arc::new(StringArray::from(a.dataset_variant)), + Arc::new(StringArray::from(a.format)), + Arc::new(StringArray::from(a.op)), + Arc::new(Int64Array::from(a.value_ns)), + Arc::new(build_list_int64(a.all_runtimes_ns)), + Arc::new(StringArray::from(a.env_triple)), + ]; + Ok(RecordBatch::try_new(schema, cols)?) +} + +fn build_random_access_batch(a: RandomAccessAccum) -> Result { + let schema = Arc::new(Schema::new(vec![ + Field::new("measurement_id", DataType::Int64, false), + Field::new("commit_sha", DataType::Utf8, false), + Field::new("dataset", DataType::Utf8, false), + Field::new("format", DataType::Utf8, false), + Field::new("value_ns", DataType::Int64, false), + Field::new( + "all_runtimes_ns", + DataType::List(Arc::new(Field::new("item", DataType::Int64, false))), + false, + ), + Field::new("env_triple", DataType::Utf8, true), + ])); + let cols: Vec = vec![ + Arc::new(Int64Array::from(a.measurement_id)), + Arc::new(StringArray::from(a.commit_sha)), + Arc::new(StringArray::from(a.dataset)), + Arc::new(StringArray::from(a.format)), + Arc::new(Int64Array::from(a.value_ns)), + Arc::new(build_list_int64(a.all_runtimes_ns)), + Arc::new(StringArray::from(a.env_triple)), + ]; + Ok(RecordBatch::try_new(schema, cols)?) +} + +fn build_compression_size_batch(a: CompressionSizeAccum) -> Result { + let n = a.rows.len(); + let mut measurement_id = Vec::with_capacity(n); + let mut commit_sha = Vec::with_capacity(n); + let mut dataset = Vec::with_capacity(n); + let mut dataset_variant = Vec::with_capacity(n); + let mut format = Vec::with_capacity(n); + let mut value_bytes = Vec::with_capacity(n); + for (mid, cs) in a.rows { + measurement_id.push(mid); + commit_sha.push(cs.commit_sha); + dataset.push(cs.dataset); + dataset_variant.push(cs.dataset_variant); + format.push(cs.format); + value_bytes.push(cs.value_bytes); + } + let schema = Arc::new(Schema::new(vec![ + Field::new("measurement_id", DataType::Int64, false), + Field::new("commit_sha", DataType::Utf8, false), + Field::new("dataset", DataType::Utf8, false), + Field::new("dataset_variant", DataType::Utf8, true), + Field::new("format", DataType::Utf8, false), + Field::new("value_bytes", DataType::Int64, false), + ])); + let cols: Vec = vec![ + Arc::new(Int64Array::from(measurement_id)), + Arc::new(StringArray::from(commit_sha)), + Arc::new(StringArray::from(dataset)), + Arc::new(StringArray::from(dataset_variant)), + Arc::new(StringArray::from(format)), + Arc::new(Int64Array::from(value_bytes)), + ]; + Ok(RecordBatch::try_new(schema, cols)?) +} + +/// Build a non-nullable `List` Arrow array from one inner Vec +/// per row. The outer list is non-null; inner i64 values are non-null. +fn build_list_int64(values: Vec>) -> ListArray { + let mut offsets: Vec = Vec::with_capacity(values.len() + 1); + offsets.push(0); + let mut flat: Vec = Vec::new(); + for inner in values { + flat.extend_from_slice(&inner); + offsets.push(flat.len() as i32); + } + let values_arr = Int64Array::from(flat); + let field = Arc::new(Field::new("item", DataType::Int64, false)); + ListArray::new( + field, + OffsetBuffer::new(offsets.into()), + Arc::new(values_arr), + None, + ) +} + +/// Print the summary in a human-readable form. Returned by the CLI. +impl std::fmt::Display for MigrationSummary { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + writeln!(f, "Records read: {}", self.records_read)?; + writeln!(f, "Commits upserted: {}", self.commits_inserted)?; + writeln!(f, "Commit warnings: {}", self.commit_warnings)?; + writeln!(f, "Inserted (query): {}", self.query_inserted)?; + writeln!( + f, + "Inserted (compress t): {}", + self.compression_time_inserted + )?; + writeln!( + f, + "Inserted (compress s): {}", + self.compression_size_inserted + )?; + writeln!(f, "Inserted (random acc): {}", self.random_access_inserted)?; + writeln!(f, "Inserted (file sizes): {}", self.file_size_inserted)?; + writeln!(f, "Missing commit: {}", self.missing_commit)?; + writeln!(f, "Skipped (no value): {}", self.skipped_no_value)?; + writeln!(f, "Skipped (intentional): {}", self.skipped_intentional)?; + writeln!(f, "Deduplicated: {}", self.deduped)?; + writeln!(f, "Dedup w/ value diff: {}", self.deduped_with_conflict)?; + writeln!( + f, + "Uncategorized: {} ({:.2}%)", + self.uncategorized, + 100.0 * self.uncategorized_fraction() + )?; + if !self.uncategorized_prefixes.is_empty() { + let mut top: Vec<_> = self.uncategorized_prefixes.iter().collect(); + top.sort_by(|a, b| b.1.cmp(a.1)); + writeln!(f, "Top uncategorized prefixes:")?; + for (prefix, n) in top.iter().take(20) { + writeln!(f, " {prefix:>32} : {n}")?; + } + } + Ok(()) + } +} diff --git a/benchmarks-website/migrate/src/source.rs b/benchmarks-website/migrate/src/source.rs new file mode 100644 index 00000000000..c18e86a63ca --- /dev/null +++ b/benchmarks-website/migrate/src/source.rs @@ -0,0 +1,140 @@ +// SPDX-License-Identifier: Apache-2.0 +// SPDX-FileCopyrightText: Copyright the Vortex contributors + +//! Streaming readers for v2's public S3 bucket. +//! +//! The bucket is `--no-sign-request`, so we fetch the underlying +//! HTTPS URL directly and stream-decompress with `flate2`. The +//! downloads are wrapped in [`reqwest::blocking`] to keep the read +//! path synchronous; the binary's hot path is single-threaded +//! per-source already (DuckDB is a single-writer). +//! +//! For tests and offline runs, [`Source::Local`] accepts a local +//! directory of dumps; the migrator's `--source` flag picks the +//! variant. + +use std::fs::File; +use std::io::BufRead; +use std::io::BufReader; +use std::io::Read; +use std::path::Path; +use std::path::PathBuf; + +use anyhow::Context as _; +use anyhow::Result; +use flate2::read::GzDecoder; +use tracing::info; + +/// Public S3 bucket the live v2 server reads from. +pub const PUBLIC_BUCKET_BASE: &str = "https://vortex-ci-benchmark-results.s3.amazonaws.com"; + +/// Where to read the v2 dataset from. Either the public S3 bucket +/// (the live deployment) or a local directory of dumps. +#[derive(Debug, Clone)] +pub enum Source { + /// HTTPS GETs against `s3.amazonaws.com`. + PublicS3, + /// A directory containing `data.json.gz`, `commits.json`, and + /// `file-sizes-*.json.gz` files. + Local(PathBuf), +} + +impl Source { + /// Short human-readable description for log messages. + pub fn describe(&self) -> String { + match self { + Source::PublicS3 => "public S3 bucket".to_string(), + Source::Local(p) => format!("local dir {}", p.display()), + } + } + + /// Open `data.json.gz` for streaming, decompressing on the fly. + pub fn open_data_jsonl(&self) -> Result> { + let stream = self.open_raw("data.json.gz")?; + Ok(Box::new(BufReader::new(GzDecoder::new(stream)))) + } + + /// Open `commits.json` (uncompressed). + pub fn open_commits_jsonl(&self) -> Result> { + let stream = self.open_raw("commits.json")?; + Ok(Box::new(BufReader::new(stream))) + } + + /// Enumerate `file-sizes-*.json.gz` files. For local sources this + /// is a directory glob; for the public bucket we hit the documented + /// suite ids. + pub fn list_file_sizes(&self) -> Result> { + match self { + Source::Local(dir) => { + let mut out = Vec::new(); + for entry in std::fs::read_dir(dir)? { + let entry = entry?; + let name = entry.file_name(); + let s = name.to_string_lossy(); + if s.starts_with("file-sizes-") && s.ends_with(".json.gz") { + out.push(s.into_owned()); + } + } + out.sort(); + Ok(out) + } + Source::PublicS3 => { + // The S3 bucket's ListObjects is denied for unsigned + // requests, so we hit the documented per-suite keys + // emitted by `.github/workflows/sql-benchmarks.yml`. + Ok(KNOWN_FILE_SIZES_SUITES + .iter() + .map(|id| format!("file-sizes-{id}.json.gz")) + .collect()) + } + } + } + + /// Open one `file-sizes-*.json.gz` for streaming. + pub fn open_file_sizes(&self, name: &str) -> Result> { + let stream = self.open_raw(name)?; + Ok(Box::new(BufReader::new(GzDecoder::new(stream)))) + } + + fn open_raw(&self, name: &str) -> Result> { + match self { + Source::Local(dir) => open_local(&dir.join(name)), + Source::PublicS3 => open_s3(name), + } + } +} + +fn open_local(path: &Path) -> Result> { + let f = File::open(path).with_context(|| format!("opening {}", path.display()))?; + Ok(Box::new(f)) +} + +fn open_s3(name: &str) -> Result> { + let url = format!("{PUBLIC_BUCKET_BASE}/{name}"); + info!(url = %url, "GET"); + let resp = reqwest::blocking::get(&url).with_context(|| format!("GET {url}"))?; + if !resp.status().is_success() { + anyhow::bail!("GET {url} returned {}", resp.status()); + } + Ok(Box::new(resp)) +} + +/// Suite IDs we know publish a `file-sizes-{id}.json.gz` to S3. +/// +/// Source of truth: the `matrix.id` values in +/// `.github/workflows/sql-benchmarks.yml`'s `benchmark_matrix` default. +/// The post-bench `file-sizes` step uploads `file-sizes-${{ matrix.id +/// }}.json.gz`, so this list must match those IDs verbatim. Adding a +/// new matrix entry to that workflow means adding the same ID here. +const KNOWN_FILE_SIZES_SUITES: &[&str] = &[ + "clickbench-nvme", + "tpch-nvme", + "tpch-s3", + "tpch-nvme-10", + "tpch-s3-10", + "tpcds-nvme", + "statpopgen", + "fineweb", + "fineweb-s3", + "polarsignals", +]; diff --git a/benchmarks-website/migrate/src/v2.rs b/benchmarks-website/migrate/src/v2.rs new file mode 100644 index 00000000000..2a9d3bdf5d0 --- /dev/null +++ b/benchmarks-website/migrate/src/v2.rs @@ -0,0 +1,142 @@ +// SPDX-License-Identifier: Apache-2.0 +// SPDX-FileCopyrightText: Copyright the Vortex contributors + +//! Wire shapes of the v2 benchmark dataset on S3. +//! +//! These types capture only the fields the migrator reads. v2 records +//! are serialized by `vortex-bench` (see `vortex-bench/src/measurements.rs`) +//! and by older non-Rust scripts; the union of fields is loose, so we +//! deserialize permissively (`serde(default)`, untyped `serde_json::Value` +//! for the polymorphic `dataset` field). + +use std::collections::BTreeMap; + +use serde::Deserialize; + +/// One JSONL line of `data.json.gz`. +/// +/// The shape is the union of every emitter's output. Most fields are +/// optional because different benches emit different subsets. +#[derive(Debug, Clone, Deserialize)] +pub struct V2Record { + pub name: String, + #[serde(default)] + pub commit_id: Option, + #[serde(default)] + pub unit: Option, + #[serde(default)] + pub value: Option, + #[serde(default)] + pub storage: Option, + #[serde(default)] + pub dataset: Option, + #[serde(default)] + pub all_runtimes: Option>, + #[serde(default)] + pub env_triple: Option, +} + +/// `dataset` in v2 records is sometimes a string, sometimes an object +/// keyed by suite name (`{ "tpch": { "scale_factor": "10" } }`). +/// This helper looks up the scale factor for a given suite without +/// assuming a particular shape. +pub fn dataset_scale_factor(dataset: &serde_json::Value, key: &str) -> Option { + let obj = dataset.as_object()?; + let entry = obj.get(key)?; + let sf = entry.get("scale_factor")?; + match sf { + serde_json::Value::String(s) => Some(s.clone()), + serde_json::Value::Number(n) => Some(n.to_string()), + _ => None, + } +} + +/// Best-effort numeric coercion for the polymorphic `value` field. +pub fn value_as_f64(value: &serde_json::Value) -> Option { + match value { + serde_json::Value::Number(n) => n.as_f64(), + serde_json::Value::String(s) => s.parse().ok(), + _ => None, + } +} + +/// Best-effort coercion of a runtime entry to nanoseconds. +pub fn runtime_as_i64(value: &serde_json::Value) -> Option { + match value { + serde_json::Value::Number(n) => { + if let Some(i) = n.as_i64() { + Some(i) + } else { + n.as_f64().map(|f| f as i64) + } + } + serde_json::Value::String(s) => s.parse().ok(), + _ => None, + } +} + +/// Triple block as emitted by `vortex-bench`'s `--gh-json` path. v2 +/// stored it as an object; we serialize it back out as `arch-os-env`. +#[derive(Debug, Clone, Deserialize)] +pub struct V2EnvTriple { + #[serde(default)] + pub architecture: Option, + #[serde(default)] + pub operating_system: Option, + #[serde(default)] + pub environment: Option, +} + +impl V2EnvTriple { + /// Format as the `arch-os-env` triple used by v3's `env_triple` column. + pub fn to_triple(&self) -> Option { + let arch = self.architecture.as_deref()?; + let os = self.operating_system.as_deref()?; + let env = self.environment.as_deref()?; + Some(format!("{arch}-{os}-{env}")) + } +} + +/// One JSONL line of `commits.json`. +#[derive(Debug, Clone, Deserialize)] +pub struct V2Commit { + pub id: String, + #[serde(default)] + pub timestamp: Option, + #[serde(default)] + pub message: Option, + #[serde(default)] + pub author: Option, + #[serde(default)] + pub committer: Option, + #[serde(default)] + pub tree_id: Option, + #[serde(default)] + pub url: Option, +} + +#[derive(Debug, Clone, Deserialize)] +pub struct V2Person { + #[serde(default)] + pub name: Option, + #[serde(default)] + pub email: Option, +} + +/// One JSONL line of `file-sizes-*.json.gz` produced by +/// `scripts/capture-file-sizes.py`. +#[derive(Debug, Clone, Deserialize)] +pub struct V2FileSize { + pub commit_id: String, + pub benchmark: String, + #[serde(default)] + pub scale_factor: Option, + pub format: String, + pub file: String, + pub size_bytes: i64, +} + +/// Build a sha-keyed map of commits. +pub fn index_commits(commits: Vec) -> BTreeMap { + commits.into_iter().map(|c| (c.id.clone(), c)).collect() +} diff --git a/benchmarks-website/migrate/src/verify.rs b/benchmarks-website/migrate/src/verify.rs new file mode 100644 index 00000000000..eb4caef6df7 --- /dev/null +++ b/benchmarks-website/migrate/src/verify.rs @@ -0,0 +1,350 @@ +// SPDX-License-Identifier: Apache-2.0 +// SPDX-FileCopyrightText: Copyright the Vortex contributors + +//! Structural diff between a migrated v3 DuckDB and the live v2 +//! `/api/metadata` endpoint. +//! +//! Compares group / chart structure only; values aren't compared +//! because v2 converts ns → ms and bytes → MiB on read while v3 +//! stores raw and the chart query divides. Group/chart structural +//! equivalence is enough to spot classifier regressions before +//! cutover. + +use std::collections::BTreeMap; +use std::collections::BTreeSet; +use std::path::Path; + +use anyhow::Context as _; +use anyhow::Result; +use duckdb::Connection; +use serde::Deserialize; + +use crate::classifier::QUERY_SUITES; + +/// Result of one `verify` run. +#[derive(Debug, Default)] +pub struct VerifyReport { + pub matched_groups: Vec, + pub only_in_v3: Vec, + pub only_in_v2: Vec, + pub chart_diffs: Vec, +} + +#[derive(Debug, Clone)] +pub struct ChartDiff { + pub group: String, + pub v2_count: usize, + pub v3_count: usize, +} + +impl VerifyReport { + /// True if every v2 group is represented in v3. The CLI's exit + /// code reflects this. + pub fn v2_groups_covered(&self) -> bool { + self.only_in_v2.is_empty() + } +} + +impl std::fmt::Display for VerifyReport { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + writeln!(f, "Groups in both v2 and v3:")?; + for g in &self.matched_groups { + writeln!(f, " + {g}")?; + } + if !self.only_in_v2.is_empty() { + writeln!(f, "Groups only in v2 (regression candidates):")?; + for g in &self.only_in_v2 { + writeln!(f, " - {g}")?; + } + } + if !self.only_in_v3.is_empty() { + writeln!(f, "Groups only in v3:")?; + for g in &self.only_in_v3 { + writeln!(f, " + {g}")?; + } + } + if !self.chart_diffs.is_empty() { + writeln!(f, "Chart count diffs:")?; + for d in &self.chart_diffs { + writeln!( + f, + " {} : v2={} v3={} (delta={})", + d.group, + d.v2_count, + d.v3_count, + d.v3_count as i64 - d.v2_count as i64, + )?; + } + } + Ok(()) + } +} + +/// v2's `/api/metadata` reply — only the fields we need. +#[derive(Debug, Deserialize)] +struct V2Metadata { + groups: BTreeMap, +} + +#[derive(Debug, Deserialize)] +struct V2GroupMeta { + #[serde(default)] + charts: Vec, +} + +#[derive(Debug, Deserialize)] +struct V2ChartMeta { + #[serde(default)] + name: String, +} + +/// Open the migrated DuckDB at `duckdb_path`, fetch `/api/metadata`, +/// and produce a structural diff. +pub fn run(v2_server: &str, duckdb_path: &Path) -> Result { + let v3 = collect_v3_groups(duckdb_path)?; + let v2 = fetch_v2_metadata(v2_server)?; + Ok(diff(&v2, &v3)) +} + +fn collect_v3_groups(duckdb_path: &Path) -> Result>> { + let conn = Connection::open(duckdb_path) + .with_context(|| format!("opening DuckDB at {}", duckdb_path.display()))?; + let mut groups: BTreeMap> = BTreeMap::new(); + + // query_measurements: chart per (dataset, query_idx); group per + // (dataset, dataset_variant, scale_factor, storage). We want v2 + // group display names so the verifier can compare apples to + // apples, so we re-format them here using the same suite table. + let mut stmt = conn.prepare( + r#" + SELECT dataset, dataset_variant, scale_factor, storage, query_idx + FROM query_measurements + GROUP BY dataset, dataset_variant, scale_factor, storage, query_idx + "#, + )?; + let rows = stmt.query_map([], |row| { + Ok(( + row.get::<_, String>(0)?, + row.get::<_, Option>(1)?, + row.get::<_, Option>(2)?, + row.get::<_, String>(3)?, + row.get::<_, i32>(4)?, + )) + })?; + for row in rows { + let (dataset, _variant, sf, storage, query_idx) = row?; + let group_name = display_query_group(&dataset, sf.as_deref(), &storage); + let chart_name = chart_name_query(&dataset, query_idx); + groups + .entry(group_name) + .or_default() + .insert(normalize_chart(&chart_name)); + } + + // compression_times: group "Compression", charts per dataset. + let mut stmt = conn.prepare( + r#" + SELECT dataset, format, op + FROM compression_times + GROUP BY dataset, format, op + "#, + )?; + let rows = stmt.query_map([], |row| { + Ok(( + row.get::<_, String>(0)?, + row.get::<_, String>(1)?, + row.get::<_, String>(2)?, + )) + })?; + for row in rows { + let (dataset, format, op) = row?; + let chart = chart_name_compression_time(&format, &op, &dataset); + groups + .entry("Compression".to_string()) + .or_default() + .insert(normalize_chart(&chart)); + } + + let mut stmt = conn.prepare( + r#" + SELECT dataset, format + FROM compression_sizes + GROUP BY dataset, format + "#, + )?; + let rows = stmt.query_map([], |row| { + Ok((row.get::<_, String>(0)?, row.get::<_, String>(1)?)) + })?; + for row in rows { + let (_dataset, format) = row?; + let chart = chart_name_compression_size(&format); + groups + .entry("Compression Size".to_string()) + .or_default() + .insert(normalize_chart(&chart)); + } + + let mut stmt = conn.prepare( + r#" + SELECT DISTINCT dataset + FROM random_access_times + "#, + )?; + let rows = stmt.query_map([], |row| row.get::<_, String>(0))?; + for row in rows { + let dataset = row?; + groups + .entry("Random Access".to_string()) + .or_default() + .insert(normalize_chart(&dataset)); + } + + Ok(groups) +} + +fn fetch_v2_metadata(server: &str) -> Result>> { + let url = format!("{}/api/metadata", server.trim_end_matches('/')); + let body = reqwest::blocking::get(&url) + .with_context(|| format!("GET {url}"))? + .error_for_status() + .with_context(|| format!("non-2xx from {url}"))? + .json::() + .with_context(|| format!("parsing {url} as v2 /api/metadata"))?; + let mut out: BTreeMap> = BTreeMap::new(); + for (name, group) in body.groups { + let charts = group + .charts + .into_iter() + .map(|c| normalize_chart(&c.name)) + .collect(); + out.insert(name, charts); + } + Ok(out) +} + +fn diff( + v2: &BTreeMap>, + v3: &BTreeMap>, +) -> VerifyReport { + let mut report = VerifyReport::default(); + let v2_keys: BTreeSet<&String> = v2.keys().collect(); + let v3_keys: BTreeSet<&String> = v3.keys().collect(); + for g in v2_keys.intersection(&v3_keys) { + report.matched_groups.push((**g).clone()); + let v2_charts = &v2[*g]; + let v3_charts = &v3[*g]; + if v2_charts.len() != v3_charts.len() { + report.chart_diffs.push(ChartDiff { + group: (**g).clone(), + v2_count: v2_charts.len(), + v3_count: v3_charts.len(), + }); + } + } + for g in v3_keys.difference(&v2_keys) { + report.only_in_v3.push((**g).clone()); + } + for g in v2_keys.difference(&v3_keys) { + report.only_in_v2.push((**g).clone()); + } + report.matched_groups.sort(); + report.only_in_v3.sort(); + report.only_in_v2.sort(); + report +} + +fn display_query_group(dataset: &str, scale_factor: Option<&str>, storage: &str) -> String { + let suite = QUERY_SUITES + .iter() + .find(|s| s.prefix.eq_ignore_ascii_case(dataset)) + .copied(); + match suite { + Some(suite) if suite.fan_out => { + let storage_disp = match storage { + "s3" | "S3" => "S3", + _ => "NVMe", + }; + let sf = scale_factor.unwrap_or("1"); + format!("{} ({}) (SF={})", suite.display_name, storage_disp, sf) + } + Some(suite) => suite.display_name.to_string(), + None => format!("{dataset} ({storage})"), + } +} + +fn chart_name_query(dataset: &str, query_idx: i32) -> String { + let suite = QUERY_SUITES + .iter() + .find(|s| s.prefix.eq_ignore_ascii_case(dataset)) + .copied(); + match suite { + Some(suite) => format!("{} Q{}", suite.query_prefix, query_idx), + None => format!("{} Q{}", dataset.to_uppercase(), query_idx), + } +} + +fn chart_name_compression_time(format: &str, op: &str, _dataset: &str) -> String { + // Re-derive the v2 chart name (the metric, not the dataset) so we + // can compare. v2's chart axis is the metric; series is the + // dataset. v3 inverts that. For structural comparison, we project + // back to v2's per-chart key. + match (format, op) { + ("vortex-file-compressed", "encode") => "COMPRESS TIME".into(), + ("vortex-file-compressed", "decode") => "DECOMPRESS TIME".into(), + ("parquet", "encode") => "PARQUET RS ZSTD COMPRESS TIME".into(), + ("parquet", "decode") => "PARQUET RS ZSTD DECOMPRESS TIME".into(), + ("lance", "encode") => "LANCE COMPRESS TIME".into(), + ("lance", "decode") => "LANCE DECOMPRESS TIME".into(), + _ => format!("{} {} TIME", format.to_uppercase(), op.to_uppercase()), + } +} + +fn chart_name_compression_size(format: &str) -> String { + match format { + "vortex-file-compressed" => "VORTEX SIZE".into(), + "parquet" => "PARQUET SIZE".into(), + "lance" => "LANCE SIZE".into(), + _ => format!("{} SIZE", format.to_uppercase()), + } +} + +/// Strip casing and `_-` differences between v2 and v3 chart names. +/// v2 displays uppercase; v3 stores raw values. Comparing in this +/// canonical form is enough for structural verification. +fn normalize_chart(s: &str) -> String { + s.trim() + .to_uppercase() + .replace(['_', '-'], " ") + .split_whitespace() + .collect::>() + .join(" ") +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn normalize_chart_canonicalizes() { + assert_eq!(normalize_chart("taxi/take"), "TAXI/TAKE"); + assert_eq!(normalize_chart("TAXI/TAKE"), "TAXI/TAKE"); + assert_eq!(normalize_chart("tpc-h q1"), "TPC H Q1"); + assert_eq!(normalize_chart("tpc h q1"), "TPC H Q1"); + } + + #[test] + fn display_query_group_handles_fan_out() { + assert_eq!( + display_query_group("tpch", Some("10"), "s3"), + "TPC-H (S3) (SF=10)" + ); + assert_eq!( + display_query_group("tpch", Some("100"), "nvme"), + "TPC-H (NVMe) (SF=100)" + ); + assert_eq!( + display_query_group("clickbench", None, "nvme"), + "Clickbench" + ); + } +} diff --git a/benchmarks-website/migrate/tests/classifier.rs b/benchmarks-website/migrate/tests/classifier.rs new file mode 100644 index 00000000000..cddca0c517c --- /dev/null +++ b/benchmarks-website/migrate/tests/classifier.rs @@ -0,0 +1,439 @@ +// SPDX-License-Identifier: Apache-2.0 +// SPDX-FileCopyrightText: Copyright the Vortex contributors + +//! Classifier behavior pinned by representative v2 names from each +//! group in `benchmarks-website/server.js`'s `getGroup`. + +use rstest::rstest; +use serde_json::json; +use vortex_bench_migrate::classifier::Outcome; +use vortex_bench_migrate::classifier::Skip; +use vortex_bench_migrate::classifier::V3Bin; +use vortex_bench_migrate::classifier::classify; +use vortex_bench_migrate::classifier::classify_outcome; +use vortex_bench_migrate::classifier::format_query; +use vortex_bench_migrate::classifier::rename_engine; +use vortex_bench_migrate::v2::V2Record; + +fn record(name: &str) -> V2Record { + V2Record { + name: name.to_string(), + commit_id: Some("deadbeef".into()), + unit: Some("ns".into()), + value: Some(json!(123)), + storage: None, + dataset: None, + all_runtimes: None, + env_triple: None, + } +} + +fn record_with_storage_and_sf(name: &str, storage: &str, suite: &str, sf: &str) -> V2Record { + let mut r = record(name); + r.storage = Some(storage.into()); + r.dataset = Some(json!({ suite: { "scale_factor": sf } })); + r +} + +#[rstest] +#[case::clickbench( + "clickbench_q07/datafusion:parquet", + V3Bin::Query { + dataset: "clickbench".into(), + dataset_variant: None, + scale_factor: None, + query_idx: 7, + storage: "nvme".into(), + engine: "datafusion".into(), + format: "parquet".into(), + }, +)] +#[case::clickbench_vortex_renamed( + "clickbench_q12/datafusion:vortex-file-compressed", + V3Bin::Query { + dataset: "clickbench".into(), + dataset_variant: None, + scale_factor: None, + query_idx: 12, + storage: "nvme".into(), + engine: "datafusion".into(), + format: "vortex-file-compressed".into(), + }, +)] +#[case::statpopgen( + "statpopgen_q3/datafusion:parquet", + V3Bin::Query { + dataset: "statpopgen".into(), + dataset_variant: None, + scale_factor: None, + query_idx: 3, + storage: "nvme".into(), + engine: "datafusion".into(), + format: "parquet".into(), + }, +)] +#[case::polarsignals( + "polarsignals_q1/duckdb:parquet", + V3Bin::Query { + dataset: "polarsignals".into(), + dataset_variant: None, + scale_factor: None, + query_idx: 1, + storage: "nvme".into(), + engine: "duckdb".into(), + format: "parquet".into(), + }, +)] +fn non_fan_out_query_records(#[case] name: &str, #[case] expected: V3Bin) { + let r = record(name); + assert_eq!(classify(&r), Some(expected)); +} + +#[rstest] +#[case::tpch_s3_sf100( + "tpch_q01/datafusion:parquet", + "S3", + "tpch", + "100", + V3Bin::Query { + dataset: "tpch".into(), + dataset_variant: None, + scale_factor: Some("100".into()), + query_idx: 1, + storage: "s3".into(), + engine: "datafusion".into(), + format: "parquet".into(), + }, +)] +#[case::tpch_nvme_sf1( + "tpch_q22/duckdb:vortex-file-compressed", + "NVMe", + "tpch", + "1", + V3Bin::Query { + dataset: "tpch".into(), + dataset_variant: None, + scale_factor: Some("1".into()), + query_idx: 22, + storage: "nvme".into(), + engine: "duckdb".into(), + format: "vortex-file-compressed".into(), + }, +)] +#[case::tpcds_nvme_sf10( + "tpcds_q05/datafusion:vortex-file-compressed", + "NVMe", + "tpcds", + "10", + V3Bin::Query { + dataset: "tpcds".into(), + dataset_variant: None, + scale_factor: Some("10".into()), + query_idx: 5, + storage: "nvme".into(), + engine: "datafusion".into(), + format: "vortex-file-compressed".into(), + }, +)] +fn fan_out_query_records( + #[case] name: &str, + #[case] storage: &str, + #[case] suite: &str, + #[case] sf: &str, + #[case] expected: V3Bin, +) { + let r = record_with_storage_and_sf(name, storage, suite, sf); + assert_eq!(classify(&r), Some(expected)); +} + +#[rstest] +#[case::random_access_4_part( + "random-access/taxi/take/parquet-tokio-local-disk", + V3Bin::RandomAccess { + dataset: "taxi/take".into(), + format: "parquet".into(), + }, +)] +#[case::random_access_4_part_vortex( + "random-access/chimp/take/vortex-tokio-local-disk", + V3Bin::RandomAccess { + dataset: "chimp/take".into(), + format: "vortex-file-compressed".into(), + }, +)] +#[case::random_access_2_part_legacy( + "random-access/parquet-tokio-local-disk", + V3Bin::RandomAccess { + dataset: "random access".into(), + format: "parquet".into(), + }, +)] +#[case::random_access_4_part_lance( + "random-access/taxi/take/lance-tokio-local-disk", + V3Bin::RandomAccess { + dataset: "taxi/take".into(), + format: "lance".into(), + }, +)] +fn random_access_records(#[case] name: &str, #[case] expected: V3Bin) { + let r = record(name); + assert_eq!(classify(&r), Some(expected)); +} + +#[rstest] +#[case::compress_time_vortex( + "compress time/clickbench", + V3Bin::CompressionTime { + dataset: "clickbench".into(), + dataset_variant: None, + format: "vortex-file-compressed".into(), + op: "encode".into(), + }, +)] +#[case::decompress_time_vortex( + "decompress time/tpch_lineitem", + V3Bin::CompressionTime { + dataset: "tpch_lineitem".into(), + dataset_variant: None, + format: "vortex-file-compressed".into(), + op: "decode".into(), + }, +)] +#[case::parquet_compress( + "parquet_rs-zstd compress time/clickbench", + V3Bin::CompressionTime { + dataset: "clickbench".into(), + dataset_variant: None, + format: "parquet".into(), + op: "encode".into(), + }, +)] +#[case::lance_decompress( + "lance decompress time/clickbench", + V3Bin::CompressionTime { + dataset: "clickbench".into(), + dataset_variant: None, + format: "lance".into(), + op: "decode".into(), + }, +)] +fn compression_time_records(#[case] name: &str, #[case] expected: V3Bin) { + let r = record(name); + assert_eq!(classify(&r), Some(expected)); +} + +#[rstest] +#[case::vortex_size( + "vortex size/clickbench", + V3Bin::CompressionSize { + dataset: "clickbench".into(), + dataset_variant: None, + format: "vortex-file-compressed".into(), + }, +)] +#[case::vortex_file_compressed_size_normalizes( + "vortex-file-compressed size/clickbench", + V3Bin::CompressionSize { + dataset: "clickbench".into(), + dataset_variant: None, + format: "vortex-file-compressed".into(), + }, +)] +#[case::parquet_size( + "parquet size/clickbench", + V3Bin::CompressionSize { + dataset: "clickbench".into(), + dataset_variant: None, + format: "parquet".into(), + }, +)] +#[case::lance_size( + "lance size/tpch_lineitem", + V3Bin::CompressionSize { + dataset: "tpch_lineitem".into(), + dataset_variant: None, + format: "lance".into(), + }, +)] +fn compression_size_records(#[case] name: &str, #[case] expected: V3Bin) { + let r = record(name); + assert_eq!(classify(&r), Some(expected)); +} + +#[rstest] +#[case::ratio_vortex_parquet("vortex:parquet-zstd ratio compress time/clickbench")] +#[case::ratio_vortex_lance("vortex:lance ratio decompress time/clickbench")] +#[case::ratio_size_vortex_parquet("vortex:parquet-zstd size/clickbench")] +#[case::ratio_size_vortex_raw("vortex:raw size/clickbench")] +#[case::throughput("compress throughput/clickbench")] +#[case::nonsense_prefix("not-a-known-bench/series")] +fn unmapped_records_yield_none(#[case] name: &str) { + let r = record(name); + assert_eq!( + classify(&r), + None, + "expected {name:?} to classify as None (drop)", + ); +} + +#[test] +fn parquet_zstd_size_is_deprecated() { + // `parquet-zstd` is not on the v3 emitter's format allowlist, so + // historical `parquet-zstd size/...` records bucket under + // Skip::Deprecated and don't render as orphan charts in v3. + let r = record("parquet-zstd size/clickbench"); + assert!(matches!( + classify_outcome(&r), + Outcome::Skip(Skip::Deprecated) + )); +} + +#[test] +fn vortex_parquet_zstd_ratio_is_intentional_skip() { + let r = record("vortex:parquet-zstd ratio compress time/clickbench"); + assert!(matches!( + classify_outcome(&r), + Outcome::Skip(Skip::DerivedRatio) + )); +} + +#[test] +fn vortex_parquet_zst_typo_ratio_is_intentional_skip() { + // `parquet-zst` (no trailing `d`) was emitted by some v2 runs. + // Both spellings should classify as derived ratios. + for name in [ + "vortex:parquet-zst ratio compress time/clickbench", + "vortex:parquet-zst ratio decompress time/clickbench", + ] { + let r = record(name); + assert!( + matches!(classify_outcome(&r), Outcome::Skip(Skip::DerivedRatio)), + "{name:?} should be DerivedRatio", + ); + } +} + +#[test] +fn throughput_is_intentional_skip() { + let r = record("compress throughput/clickbench"); + assert!(matches!( + classify_outcome(&r), + Outcome::Skip(Skip::Throughput) + )); +} + +#[test] +fn unknown_prefix_is_unknown() { + let r = record("not-a-known-bench/series"); + assert!(matches!(classify_outcome(&r), Outcome::Unknown)); +} + +#[test] +fn gharchive_q00_is_deprecated() { + // gharchive isn't on the v3 query-suite allowlist, so historical + // gharchive query records bucket as Skip::Deprecated. + let r = record("gharchive_q00/datafusion:parquet"); + assert!(matches!( + classify_outcome(&r), + Outcome::Skip(Skip::Deprecated) + )); +} + +#[test] +fn fineweb_q00_classifies() { + // fineweb is on V3_QUERY_SUITES (still emitted by v3 CI per + // .github/workflows/sql-benchmarks.yml's `fineweb` matrix entry), + // so historical fineweb records ingest like any other suite. + let r = record("fineweb_q00/datafusion:parquet"); + assert!(matches!( + classify_outcome(&r), + Outcome::Bin(V3Bin::Query { .. }) + )); +} + +#[test] +fn memory_record_is_historical_memory_skip() { + // v2 emitted `_q_memory/:` records that + // carry top-level memory fields V2Record doesn't deserialize. + // Skip them with a known variant so they don't trip the 5% gate. + let r = record("clickbench_q07_memory/datafusion:parquet"); + assert!(matches!( + classify_outcome(&r), + Outcome::Skip(Skip::HistoricalMemory) + )); +} + +#[test] +fn tpch_compression_size_carries_scale_factor() { + // The data.json.gz "vortex size/tpch" path needs to derive + // dataset_variant from the v2 record's `dataset` object, the same + // way the file-sizes path does. Otherwise SF=10 rows from the two + // sources never collide on `mid` and produce duplicate rows. + let mut r = record("vortex size/tpch"); + r.dataset = Some(serde_json::json!({ "tpch": { "scale_factor": "10" } })); + let outcome = classify_outcome(&r); + let Outcome::Bin(V3Bin::CompressionSize { + dataset, + dataset_variant, + format, + }) = outcome + else { + panic!("expected Bin(CompressionSize), got {outcome:?}"); + }; + assert_eq!(dataset, "tpch"); + assert_eq!(dataset_variant, Some("10".into())); + assert_eq!(format, "vortex-file-compressed"); +} + +#[test] +fn tpch_compression_size_drops_default_scale_factor() { + // SF "1.0" matches the file-sizes path's filter and collapses to + // dataset_variant: None. + let mut r = record("vortex size/tpch"); + r.dataset = Some(serde_json::json!({ "tpch": { "scale_factor": "1.0" } })); + let outcome = classify_outcome(&r); + let Outcome::Bin(V3Bin::CompressionSize { + dataset_variant, .. + }) = outcome + else { + panic!("expected Bin(CompressionSize), got {outcome:?}"); + }; + assert_eq!(dataset_variant, None); +} + +#[test] +fn engine_casing_lowercased() { + // Older v2 records emitted display-case engines like `DataFusion` + // and `DuckDB`. The classifier lowercases at push time so dedup + // collapses display-case rows into the canonical lowercase ones. + let r = record("clickbench_q07/DataFusion:parquet"); + let outcome = classify_outcome(&r); + let Outcome::Bin(V3Bin::Query { engine, format, .. }) = outcome else { + panic!("expected Bin(Query), got {outcome:?}"); + }; + assert_eq!(engine, "datafusion"); + assert_eq!(format, "parquet"); +} + +#[test] +fn rename_engine_pins_canonical_outputs() { + assert_eq!(rename_engine("vortex-tokio-local-disk"), "vortex-nvme"); + assert_eq!( + rename_engine("datafusion:vortex-file-compressed"), + "datafusion:vortex" + ); + assert_eq!(rename_engine("LANCE"), "lance"); +} + +#[test] +fn format_query_pins_v2_display() { + assert_eq!(format_query("clickbench_q00"), "CLICKBENCH Q0"); + assert_eq!(format_query("tpch_q22"), "TPC-H Q22"); + assert_eq!(format_query("tpcds_q42"), "TPC-DS Q42"); + assert_eq!(format_query("polarsignals_q1"), "POLARSIGNALS Q1"); + // Names that don't match a suite fall back to upper + " " replace. + assert_eq!( + format_query("vortex-file-compressed size"), + "VORTEX FILE COMPRESSED SIZE" + ); +} diff --git a/benchmarks-website/migrate/tests/end_to_end.rs b/benchmarks-website/migrate/tests/end_to_end.rs new file mode 100644 index 00000000000..210092a4058 --- /dev/null +++ b/benchmarks-website/migrate/tests/end_to_end.rs @@ -0,0 +1,263 @@ +// SPDX-License-Identifier: Apache-2.0 +// SPDX-FileCopyrightText: Copyright the Vortex contributors + +//! Inline JSONL fixtures driven through the full migration into a +//! tempdir DuckDB. No live S3. + +use std::fs::File; +use std::io::Write; +use std::path::Path; + +use duckdb::Connection; +use flate2::Compression; +use flate2::write::GzEncoder; +use tempfile::TempDir; +use vortex_bench_migrate::migrate; +use vortex_bench_migrate::source::Source; + +const COMMITS_JSONL: &str = r#"{"id":"deadbeef","timestamp":"2026-04-25T00:00:00Z","message":"fixture commit","author":{"name":"A","email":"a@example.com"},"committer":{"name":"C","email":"c@example.com"},"tree_id":"abcd0001","url":"https://example.com/commit/deadbeef"} +"#; + +const DATA_JSONL: &str = r#"{"name":"clickbench_q07/datafusion:parquet","commit_id":"deadbeef","unit":"ns","value":42000,"all_runtimes":[41000,42000,43000]} +{"name":"compress time/clickbench","commit_id":"deadbeef","unit":"ns","value":99} +{"name":"vortex size/clickbench","commit_id":"deadbeef","unit":"bytes","value":1024} +{"name":"random-access/taxi/take/parquet-tokio-local-disk","commit_id":"deadbeef","unit":"ns","value":777,"all_runtimes":[700,777,800]} +"#; + +/// Build a local-source fixture directory. Caller supplies the contents +/// of `commits.json`, `data.json.gz`, and any number of +/// `file-sizes-*.json.gz` files (name → contents). +fn build_fixture(commits: &str, data: &str, file_sizes: &[(&str, &str)]) -> TempDir { + let dir = TempDir::new().expect("tempdir"); + write_text(&dir.path().join("commits.json"), commits); + write_gz(&dir.path().join("data.json.gz"), data); + for (name, body) in file_sizes { + write_gz(&dir.path().join(name), body); + } + dir +} + +fn write_text(path: &Path, body: &str) { + let mut f = File::create(path).unwrap(); + f.write_all(body.as_bytes()).unwrap(); +} + +fn write_gz(path: &Path, body: &str) { + let f = File::create(path).unwrap(); + let mut gz = GzEncoder::new(f, Compression::default()); + gz.write_all(body.as_bytes()).unwrap(); + gz.finish().unwrap(); +} + +#[test] +fn migrate_inline_fixture_populates_each_table() { + let src_dir = build_fixture(COMMITS_JSONL, DATA_JSONL, &[]); + let target_dir = TempDir::new().unwrap(); + let target = target_dir.path().join("v3.duckdb"); + + let summary = migrate::run(&Source::Local(src_dir.path().into()), &target).unwrap(); + + assert_eq!(summary.records_read, 4, "summary={summary}"); + assert_eq!(summary.uncategorized, 0, "summary={summary}"); + assert_eq!(summary.commits_inserted, 1); + assert_eq!(summary.query_inserted, 1); + assert_eq!(summary.compression_time_inserted, 1); + assert_eq!(summary.compression_size_inserted, 1); + assert_eq!(summary.random_access_inserted, 1); + + let conn = Connection::open(&target).unwrap(); + let count = |table: &str| -> i64 { + conn.query_row(&format!("SELECT COUNT(*) FROM {table}"), [], |r| r.get(0)) + .unwrap() + }; + assert_eq!(count("commits"), 1); + assert_eq!(count("query_measurements"), 1); + assert_eq!(count("compression_times"), 1); + assert_eq!(count("compression_sizes"), 1); + assert_eq!(count("random_access_times"), 1); + + // Spot-check the v3 column values for each kind. + let (engine, format, query_idx, value_ns): (String, String, i32, i64) = conn + .query_row( + "SELECT engine, format, query_idx, value_ns FROM query_measurements", + [], + |r| Ok((r.get(0)?, r.get(1)?, r.get(2)?, r.get(3)?)), + ) + .unwrap(); + assert_eq!(engine, "datafusion"); + assert_eq!(format, "parquet"); + assert_eq!(query_idx, 7); + assert_eq!(value_ns, 42000); + + let (dataset, format, op): (String, String, String) = conn + .query_row( + "SELECT dataset, format, op FROM compression_times", + [], + |r| Ok((r.get(0)?, r.get(1)?, r.get(2)?)), + ) + .unwrap(); + assert_eq!(dataset, "clickbench"); + assert_eq!(format, "vortex-file-compressed"); + assert_eq!(op, "encode"); + + let (dataset, format, value_bytes): (String, String, i64) = conn + .query_row( + "SELECT dataset, format, value_bytes FROM compression_sizes", + [], + |r| Ok((r.get(0)?, r.get(1)?, r.get(2)?)), + ) + .unwrap(); + assert_eq!(dataset, "clickbench"); + assert_eq!(format, "vortex-file-compressed"); + assert_eq!(value_bytes, 1024); + + let (dataset, format): (String, String) = conn + .query_row("SELECT dataset, format FROM random_access_times", [], |r| { + Ok((r.get(0)?, r.get(1)?)) + }) + .unwrap(); + assert_eq!(dataset, "taxi/take"); + assert_eq!(format, "parquet"); +} + +#[test] +fn dedup_collision_keeps_one_row() { + // Two data.json.gz lines whose query-measurement dim columns are + // identical (same commit / dataset / engine / format / query_idx, + // and `storage` collapses to "nvme" since `storage` is unset). + // Different `value`s. The accumulator's HashSet + // should drop the second one and bump `summary.deduped`. + const DATA: &str = r#"{"name":"clickbench_q07/datafusion:parquet","commit_id":"deadbeef","unit":"ns","value":111} +{"name":"clickbench_q07/datafusion:parquet","commit_id":"deadbeef","unit":"ns","value":222} +"#; + + let src_dir = build_fixture(COMMITS_JSONL, DATA, &[]); + let target_dir = TempDir::new().unwrap(); + let target = target_dir.path().join("v3.duckdb"); + + let summary = migrate::run(&Source::Local(src_dir.path().into()), &target).unwrap(); + + assert_eq!(summary.records_read, 2, "summary={summary}"); + assert_eq!(summary.query_inserted, 1, "summary={summary}"); + assert_eq!(summary.deduped, 1, "summary={summary}"); + + let conn = Connection::open(&target).unwrap(); + let n: i64 = conn + .query_row("SELECT COUNT(*) FROM query_measurements", [], |r| r.get(0)) + .unwrap(); + assert_eq!(n, 1); +} + +#[test] +fn dedup_with_conflicting_value_ns_is_counted() { + // Same dim columns, different `value`s. Dedup keeps the first + // and bumps `deduped_with_conflict` because the dropped row's + // value_ns differed from the kept row's. This is the signal we + // care about when watching for silent value-corruption across + // duplicated v2 emissions. + const DATA: &str = r#"{"name":"clickbench_q07/datafusion:parquet","commit_id":"deadbeef","unit":"ns","value":111} +{"name":"clickbench_q07/datafusion:parquet","commit_id":"deadbeef","unit":"ns","value":222} +"#; + + let src_dir = build_fixture(COMMITS_JSONL, DATA, &[]); + let target_dir = TempDir::new().unwrap(); + let target = target_dir.path().join("v3.duckdb"); + + let summary = migrate::run(&Source::Local(src_dir.path().into()), &target).unwrap(); + + assert_eq!(summary.deduped, 1, "summary={summary}"); + assert_eq!(summary.deduped_with_conflict, 1, "summary={summary}"); +} + +#[test] +fn dedup_with_matching_value_ns_does_not_count_conflict() { + // Same dim columns AND identical `value`s. Dedup still drops the + // duplicate, but `deduped_with_conflict` stays 0. + const DATA: &str = r#"{"name":"clickbench_q07/datafusion:parquet","commit_id":"deadbeef","unit":"ns","value":111} +{"name":"clickbench_q07/datafusion:parquet","commit_id":"deadbeef","unit":"ns","value":111} +"#; + + let src_dir = build_fixture(COMMITS_JSONL, DATA, &[]); + let target_dir = TempDir::new().unwrap(); + let target = target_dir.path().join("v3.duckdb"); + + let summary = migrate::run(&Source::Local(src_dir.path().into()), &target).unwrap(); + + assert_eq!(summary.deduped, 1, "summary={summary}"); + assert_eq!(summary.deduped_with_conflict, 0, "summary={summary}"); +} + +#[test] +fn compression_size_data_and_file_sizes_merge() { + // A `vortex size/tpch` record from data.json.gz and a + // file-sizes-tpch-nvme.json.gz row covering the same (commit, + // dataset, format, SF) tuple should produce the *same* + // measurement_id so the in-memory accumulator merges them into + // one row instead of two. + // + // Both sources use scale_factor "1.0", which both code paths + // filter out → dataset_variant: None on both sides → matching mid. + const DATA: &str = r#"{"name":"vortex size/tpch","commit_id":"deadbeef","unit":"bytes","value":200,"dataset":{"tpch":{"scale_factor":"1.0"}}} +"#; + const FILE_SIZES: &str = r#"{"commit_id":"deadbeef","benchmark":"tpch","scale_factor":"1.0","format":"vortex-file-compressed","file":"part-0.vortex","size_bytes":100} +"#; + + let src_dir = build_fixture( + COMMITS_JSONL, + DATA, + &[("file-sizes-tpch-nvme.json.gz", FILE_SIZES)], + ); + let target_dir = TempDir::new().unwrap(); + let target = target_dir.path().join("v3.duckdb"); + + let summary = migrate::run(&Source::Local(src_dir.path().into()), &target).unwrap(); + + assert_eq!(summary.compression_size_inserted, 1, "summary={summary}"); + + let conn = Connection::open(&target).unwrap(); + let (n, value_bytes): (i64, i64) = conn + .query_row( + "SELECT COUNT(*), SUM(value_bytes) FROM compression_sizes", + [], + |r| Ok((r.get(0)?, r.get(1)?)), + ) + .unwrap(); + assert_eq!(n, 1); + // data.json.gz seeds value_bytes=200, file-sizes adds 100. + assert_eq!(value_bytes, 300); +} + +#[test] +fn file_sizes_sum_into_one_row() { + // Two file-sizes rows sharing (commit, benchmark, format, + // scale_factor) and value_bytes 100 + 200 must collapse to a + // single compression_sizes row with 300. + const FILE_SIZES: &str = r#"{"commit_id":"deadbeef","benchmark":"clickbench","scale_factor":"1.0","format":"vortex-file-compressed","file":"part-0.vortex","size_bytes":100} +{"commit_id":"deadbeef","benchmark":"clickbench","scale_factor":"1.0","format":"vortex-file-compressed","file":"part-1.vortex","size_bytes":200} +"#; + + let src_dir = build_fixture( + COMMITS_JSONL, + "", + &[("file-sizes-clickbench.json.gz", FILE_SIZES)], + ); + let target_dir = TempDir::new().unwrap(); + let target = target_dir.path().join("v3.duckdb"); + + let summary = migrate::run(&Source::Local(src_dir.path().into()), &target).unwrap(); + + assert_eq!(summary.file_size_inserted, 2, "summary={summary}"); + assert_eq!(summary.compression_size_inserted, 1, "summary={summary}"); + + let conn = Connection::open(&target).unwrap(); + let n: i64 = conn + .query_row("SELECT COUNT(*) FROM compression_sizes", [], |r| r.get(0)) + .unwrap(); + assert_eq!(n, 1); + let value_bytes: i64 = conn + .query_row("SELECT value_bytes FROM compression_sizes", [], |r| { + r.get(0) + }) + .unwrap(); + assert_eq!(value_bytes, 300); +} From ccc591947f9abbb6cb34fc3186e0513d08b82568 Mon Sep 17 00:00:00 2001 From: Connor Tsui Date: Mon, 27 Apr 2026 09:53:51 -0400 Subject: [PATCH 11/26] add .bench-env to gitignore Signed-off-by: Connor Tsui --- .gitignore | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.gitignore b/.gitignore index 6a996cf96cc..bcc8ef746ee 100644 --- a/.gitignore +++ b/.gitignore @@ -244,4 +244,4 @@ trace*.pb vortex-python/.benchmarks/ # For local benchmarks website server and things like the WAL **.duckdb* - +.bench-env From d21f57f850c382c3b1c3daf2ce67a95610d9fdd7 Mon Sep 17 00:00:00 2001 From: Connor Tsui <87130162+connortsui20@users.noreply.github.com> Date: Mon, 27 Apr 2026 13:39:04 -0400 Subject: [PATCH 12/26] [claude] chore(benchmarks-migrate): post-alpha cleanup nits (#7671) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Six small fixes left over from the v3 migration alpha. All paths relative to `benchmarks-website/migrate/` unless noted. ## Fixes - **Scale-factor canonicalization** (`src/classifier.rs::bin_compression_size`, `src/migrate.rs::migrate_file_sizes`, helper in `src/v2.rs`): both paths now route the v2 SF string through `canonical_scale_factor`, which parses to `f64` and formats with no trailing zeros. Without this, `"1"` vs `"1.0"` and `"10"` vs `"10.0"` would produce different `dataset_variant` strings and prevent the data.json.gz and file-sizes-*.json.gz rows from sharing a `measurement_id`. - **Summary counter timing** (`src/migrate.rs::run`): per-fact counters used to be set from accumulator length *before* the flush, so a flush failure would print a summary that lied. Refactored into a `flush_all` helper that bumps `summary._inserted` from the flushed `RecordBatch::num_rows()` only after each `Appender::append_record_batch` succeeds. - **Empty-string normalization in commits** (`src/commits.rs`, `benchmarks-website/server/src/schema.rs`, `benchmarks-website/server/src/api.rs`): `message`, `author_name`/`email`, `committer_name`/`email` now bind as `Option` and store SQL `NULL` when v2 supplied an empty or whitespace-only string. Schema columns made nullable; server reads use `COALESCE(c.message, '')` so the existing `String` decoder still works. - **Orphan WAL cleanup** (`src/migrate.rs::open_target_db`): the existing code already attempts `remove_if_exists` on the `.wal` regardless of whether the main file was present; pinned the behavior with a regression test that stages an orphan `.wal` (no main file) and asserts the orphan bytes don't survive `open_target_db`. - **Random-access dataset extraction** (`src/classifier.rs::bin_random_access`): 4-part records `random-access///-tokio-local-disk` continue to extract `dataset/pattern` from the raw name. 2-part legacy records carry no dataset and used to render under the placeholder `"random access"`; they're now dropped to keep the v3 dataset column meaningful. - **`migrate_file_sizes` dataset fallback** (`src/migrate.rs::migrate_file_sizes`): when the matrix id stripped from `file-sizes-.json.gz` isn't on the `KNOWN_FILE_SIZES_SUITES` allowlist, the fallback now emits `unknown:` so the UI clearly flags it instead of presenting it as a real dataset. ## Tests Each fix has a focused regression test (`rstest` parametrization where useful): - `tests/classifier.rs::compression_size_scale_factor_canonicalizes` covering `"1"`, `"1.0"`, `"10"`, `"10.0"`, `"0.1"`, whitespace, and `""`. - `tests/classifier.rs::unmapped_records_yield_none` extended with `random_access_2_part_legacy` and `random_access_3_part`. - `migrate::tests::flush_all_does_not_overcount_on_failure` (private unit test that drops `compression_times` to force the second flush to fail and asserts only the queries counter is set). - `tests/end_to_end.rs::summary_counts_match_actual_rows_on_success` (sister invariant for the success path). - `tests/end_to_end.rs::empty_author_email_stored_as_null`. - `tests/end_to_end.rs::open_target_db_removes_orphan_wal`. - `tests/end_to_end.rs::file_sizes_unknown_id_falls_back_to_unknown_prefix` and `file_sizes_known_id_uses_id_directly`. - `tests/end_to_end.rs::compression_size_data_and_file_sizes_merge_with_canonical_sf` (cross-path SF canonicalization end to end). ## Verification - `cargo build -p vortex-bench-migrate` — clean. - `cargo test -p vortex-bench-migrate` — 7 unit + 46 classifier + 12 end-to-end tests all pass. - `cargo test -p vortex-bench-server` — 6 unit + 10 ingest + 6 web_ui tests pass; schema and `COALESCE` changes are server-safe. - `cargo clippy -p vortex-bench-migrate --all-targets` — clean. - `cargo fmt` on changed files (nightly fmt unavailable in this sandbox; ran with stable, which is a no-op for the imports-granularity options the repo's `rustfmt.toml` gates on nightly). - Skipped `./scripts/public-api.sh`: migrate is a leaf binary outside the public-api lockfile set, and the only newly `pub` item is the internal `canonical_scale_factor` helper. Signed-off-by: Claude --- _Generated by [Claude Code](https://claude.ai/code/session_012XyYJRpcGFxmJXdTJuW8Ff)_ --------- Signed-off-by: Claude Co-authored-by: Claude Signed-off-by: Connor Tsui --- benchmarks-website/migrate/src/classifier.rs | 80 ++++---- benchmarks-website/migrate/src/commits.rs | 53 +++-- benchmarks-website/migrate/src/migrate.rs | 153 +++++++++++--- benchmarks-website/migrate/src/source.rs | 2 +- benchmarks-website/migrate/src/v2.rs | 22 +++ .../migrate/tests/classifier.rs | 53 ++++- .../migrate/tests/end_to_end.rs | 187 ++++++++++++++++++ benchmarks-website/server/src/api.rs | 10 +- benchmarks-website/server/src/schema.rs | 10 +- 9 files changed, 463 insertions(+), 107 deletions(-) diff --git a/benchmarks-website/migrate/src/classifier.rs b/benchmarks-website/migrate/src/classifier.rs index 8a17b31fcd2..ebc14dea39b 100644 --- a/benchmarks-website/migrate/src/classifier.rs +++ b/benchmarks-website/migrate/src/classifier.rs @@ -398,7 +398,7 @@ pub enum V3Bin { pub fn classify(record: &V2Record) -> Option { let cls = classify_v2(record)?; match &cls.group { - V2Group::RandomAccess => bin_random_access(&cls, record), + V2Group::RandomAccess => bin_random_access(record), V2Group::Compression => bin_compression_time(&cls, record), V2Group::CompressionSize => bin_compression_size(&cls, record), V2Group::Query { .. } => bin_query(&cls, record), @@ -537,7 +537,16 @@ pub fn classify_outcome(record: &V2Record) -> Outcome { return Outcome::Skip(Skip::DerivedRatio); } let bin = match &cls.group { - V2Group::RandomAccess => bin_random_access(&cls, record), + V2Group::RandomAccess => match bin_random_access(record) { + Some(b) => Some(b), + // Legacy 2-part `random-access/-…` records carry + // no dataset and are intentionally dropped by + // `bin_random_access`. Route them to Skip so the + // `Outcome::Unknown` arm below — and the 5% + // uncategorized gate in `migrate::run` — don't trip on + // them. + None => return Outcome::Skip(Skip::UnsupportedShape), + }, V2Group::Compression => bin_compression_time(&cls, record), V2Group::CompressionSize => bin_compression_size(&cls, record), V2Group::Query { .. } => bin_query(&cls, record), @@ -556,34 +565,34 @@ pub fn classify_outcome(record: &V2Record) -> Outcome { Outcome::Bin(bin) } -fn bin_random_access(cls: &V2Classification, record: &V2Record) -> Option { - // v2 chart name shape: "RANDOM ACCESS" or "DATASET/PATTERN" (uppercase). - // We store it as the v3 dataset value verbatim, lowercased so - // `/api/groups` returns canonical lowercase names. - let dataset = cls.chart.to_lowercase(); - if dataset.is_empty() { - return None; - } - // Pull format from the raw, pre-rename v2 name so v3 stores the - // canonical `Format::name()` string (matching what the v3 live - // emitter writes). Raw shape is +fn bin_random_access(record: &V2Record) -> Option { + // Pull dataset and format from the raw, pre-rename v2 name so v3 + // stores meaningful values. Raw shape is // `random-access///-tokio-local-disk` - // (4-part) or `random-access/-tokio-local-disk` (2-part - // legacy). After stripping the `-tokio-local-disk` suffix, map the - // v2 random-access ext label (`vortex`, from `Format::ext()`) to - // the canonical name (`vortex-file-compressed`, from - // `Format::name()`). `parquet` and `lance` match between ext and - // name. The `vortex` ext is shared by both `OnDiskVortex` (name + // (4-part). 2-part legacy records (`random-access/-…`) + // carry no dataset and historically rendered as the placeholder + // string "RANDOM ACCESS"; drop them rather than emit a fake + // dataset. Deriving from the raw name (rather than `cls.chart`) + // also keeps this independent of v2's `normalizeChartName`. + // + // After stripping the `-tokio-local-disk` suffix, map the v2 + // random-access ext label (`vortex`, from `Format::ext()`) to the + // canonical name (`vortex-file-compressed`, from `Format::name()`). + // `parquet` and `lance` match between ext and name. The `vortex` + // ext is shared by both `OnDiskVortex` (name // `vortex-file-compressed`) and `VortexCompact` (name // `vortex-compact`), but v2's random-access bench only emitted // `OnDiskVortex`, so mapping to `vortex-file-compressed` is // correct for all historical data. let parts: Vec<&str> = record.name.split('/').collect(); - let raw = match parts.len() { - 4 => parts[3], - 2 => parts[1], - _ => return None, - }; + if parts.len() != 4 { + return None; + } + if parts[1].is_empty() || parts[2].is_empty() { + return None; + } + let dataset = format!("{}/{}", parts[1], parts[2]).to_lowercase(); + let raw = parts[3]; if raw.is_empty() || raw == "default" { return None; } @@ -668,15 +677,20 @@ fn bin_compression_size(cls: &V2Classification, record: &V2Record) -> Option, commit: &V2Commit) -> Result { let mut warnings = Vec::new(); let timestamp = require_field(&commit.timestamp, "timestamp", &commit.id, &mut warnings); - let message = require_field(&commit.message, "message", &commit.id, &mut warnings); - let author_name = require_field( - &commit.author.as_ref().and_then(|p| p.name.clone()), - "author.name", - &commit.id, - &mut warnings, - ); - let author_email = require_field( - &commit.author.as_ref().and_then(|p| p.email.clone()), - "author.email", - &commit.id, - &mut warnings, - ); - let committer_name = require_field( - &commit.committer.as_ref().and_then(|p| p.name.clone()), - "committer.name", - &commit.id, - &mut warnings, - ); - let committer_email = require_field( - &commit.committer.as_ref().and_then(|p| p.email.clone()), - "committer.email", - &commit.id, - &mut warnings, - ); + let message = optional_field(&commit.message); + let author_name = optional_field(&commit.author.as_ref().and_then(|p| p.name.clone())); + let author_email = optional_field(&commit.author.as_ref().and_then(|p| p.email.clone())); + let committer_name = optional_field(&commit.committer.as_ref().and_then(|p| p.name.clone())); + let committer_email = optional_field(&commit.committer.as_ref().and_then(|p| p.email.clone())); let tree_sha = require_field(&commit.tree_id, "tree_id", &commit.id, &mut warnings); let url = require_field(&commit.url, "url", &commit.id, &mut warnings); @@ -93,6 +75,19 @@ fn require_field( } } +/// Coerce a v2-supplied `Option` into a SQL-bindable +/// `Option`, treating an empty / whitespace-only value as +/// missing. v2 sometimes wrote `""` for blank author / committer / +/// message fields; storing those as actual `NULL` lets the UI +/// distinguish "missing metadata" from "deliberately blank". +fn optional_field(field: &Option) -> Option { + field + .as_deref() + .map(str::trim) + .filter(|s| !s.is_empty()) + .map(str::to_string) +} + /// Per-call warning bag returned to the caller for logging. #[derive(Debug, Default)] pub struct UpsertOutcome { diff --git a/benchmarks-website/migrate/src/migrate.rs b/benchmarks-website/migrate/src/migrate.rs index 7b3b32bb51c..93885e5d431 100644 --- a/benchmarks-website/migrate/src/migrate.rs +++ b/benchmarks-website/migrate/src/migrate.rs @@ -49,10 +49,12 @@ use vortex_utils::aliases::hash_map::HashMap; use crate::classifier; use crate::classifier::V3Bin; use crate::commits::upsert_commit; +use crate::source::KNOWN_FILE_SIZES_SUITES; use crate::source::Source; use crate::v2::V2Commit; use crate::v2::V2FileSize; use crate::v2::V2Record; +use crate::v2::canonical_scale_factor; use crate::v2::index_commits; use crate::v2::runtime_as_i64; use crate::v2::value_as_f64; @@ -171,27 +173,46 @@ pub fn run(source: &Source, target: &Path) -> Result { } info!("Flushing accumulators to DuckDB"); - summary.query_inserted = q.measurement_id.len() as u64; - summary.compression_time_inserted = ct.measurement_id.len() as u64; - summary.random_access_inserted = ra.measurement_id.len() as u64; - summary.compression_size_inserted = cs.rows.len() as u64; - - flush(&conn, "query_measurements", build_query_batch(q)?)?; - flush( - &conn, - "compression_times", - build_compression_time_batch(ct)?, - )?; - flush(&conn, "random_access_times", build_random_access_batch(ra)?)?; - flush( - &conn, - "compression_sizes", - build_compression_size_batch(cs)?, - )?; + flush_all(&conn, q, ct, ra, cs, &mut summary)?; Ok(summary) } +/// Flush each accumulator's batch and bump the matching per-fact +/// summary counter only AFTER the flush succeeds. This way a flush +/// failure leaves the counter at zero (or its previous value) rather +/// than reporting rows that never landed in DuckDB. +fn flush_all( + conn: &Connection, + q: QueryAccum, + ct: CompressionTimeAccum, + ra: RandomAccessAccum, + cs: CompressionSizeAccum, + summary: &mut MigrationSummary, +) -> Result<()> { + let batch = build_query_batch(q)?; + let n = batch.num_rows() as u64; + flush(conn, "query_measurements", batch)?; + summary.query_inserted = n; + + let batch = build_compression_time_batch(ct)?; + let n = batch.num_rows() as u64; + flush(conn, "compression_times", batch)?; + summary.compression_time_inserted = n; + + let batch = build_random_access_batch(ra)?; + let n = batch.num_rows() as u64; + flush(conn, "random_access_times", batch)?; + summary.random_access_inserted = n; + + let batch = build_compression_size_batch(cs)?; + let n = batch.num_rows() as u64; + flush(conn, "compression_sizes", batch)?; + summary.compression_size_inserted = n; + + Ok(()) +} + fn read_commits(source: &Source) -> Result> { let reader = source.open_commits_jsonl()?; let mut commits: Vec = Vec::new(); @@ -409,11 +430,19 @@ fn migrate_file_sizes( cs: &mut CompressionSizeAccum, ) -> Result<()> { let reader = source.open_file_sizes(name)?; - let dataset_fallback = name - .strip_prefix("file-sizes-") - .and_then(|s| s.strip_suffix(".json.gz")) - .unwrap_or(name) - .to_string(); + // Prefix unknown-id fallbacks with `unknown:` so they're clearly + // labeled in the UI rather than masquerading as a dataset name. + let dataset_fallback = { + let stripped = name + .strip_prefix("file-sizes-") + .and_then(|s| s.strip_suffix(".json.gz")) + .unwrap_or(name); + if KNOWN_FILE_SIZES_SUITES.contains(&stripped) { + stripped.to_string() + } else { + format!("unknown:{stripped}") + } + }; let started = Instant::now(); let mut last_log = Instant::now(); for line in reader.lines() { @@ -438,11 +467,10 @@ fn migrate_file_sizes( } else { sz.benchmark.clone() }; - let dataset_variant = sz - .scale_factor - .as_ref() - .filter(|s| !s.is_empty() && s.as_str() != "1.0") - .cloned(); + // Run SF through canonical_scale_factor so `"1"`, `"1.0"`, `"10"` + // and `"10.0"` collapse to one form, matching what + // `bin_compression_size` writes for the data.json.gz path. + let dataset_variant = canonical_scale_factor(sz.scale_factor.as_deref()); let csr = CompressionSize { commit_sha: sz.commit_id.clone(), dataset, @@ -834,3 +862,74 @@ impl std::fmt::Display for MigrationSummary { Ok(()) } } + +#[cfg(test)] +mod tests { + use vortex_bench_server::records::QueryMeasurement; + + use super::*; + + fn open_db_without(table: &str) -> (tempfile::TempDir, Connection) { + let dir = tempfile::TempDir::new().unwrap(); + let path = dir.path().join("v3.duckdb"); + let conn = open_target_db(&path).unwrap(); + conn.execute_batch(&format!("DROP TABLE {table}")).unwrap(); + (dir, conn) + } + + fn one_query_row() -> QueryMeasurement { + QueryMeasurement { + commit_sha: "deadbeef".into(), + dataset: "clickbench".into(), + dataset_variant: None, + scale_factor: None, + query_idx: 7, + storage: "nvme".into(), + engine: "datafusion".into(), + format: "parquet".into(), + value_ns: 100, + all_runtimes_ns: vec![100], + peak_physical: None, + peak_virtual: None, + physical_delta: None, + virtual_delta: None, + env_triple: None, + } + } + + #[test] + fn flush_all_does_not_overcount_on_failure() { + // Drop `compression_times` before flushing so the second + // flush in `flush_all` fails. The first (queries) succeeded, + // so its counter must be set; the failed table's counter and + // every later table's counter must stay at zero. + let (_dir, conn) = open_db_without("compression_times"); + + let mut summary = MigrationSummary::default(); + let mut q = QueryAccum::default(); + let qm = one_query_row(); + let mid = vortex_bench_server::db::measurement_id_query(&qm); + q.push(mid, qm, &mut summary); + + let ct = CompressionTimeAccum::default(); + let ra = RandomAccessAccum::default(); + let cs = CompressionSizeAccum::default(); + + let result = flush_all(&conn, q, ct, ra, cs, &mut summary); + assert!(result.is_err(), "expected flush to fail on missing table"); + + assert_eq!( + summary.query_inserted, 1, + "query flushed before the failure must be counted" + ); + assert_eq!( + summary.compression_time_inserted, 0, + "failed flush must not bump the counter" + ); + assert_eq!(summary.random_access_inserted, 0, "later flushes never ran"); + assert_eq!( + summary.compression_size_inserted, 0, + "later flushes never ran" + ); + } +} diff --git a/benchmarks-website/migrate/src/source.rs b/benchmarks-website/migrate/src/source.rs index c18e86a63ca..d0c059569dd 100644 --- a/benchmarks-website/migrate/src/source.rs +++ b/benchmarks-website/migrate/src/source.rs @@ -126,7 +126,7 @@ fn open_s3(name: &str) -> Result> { /// The post-bench `file-sizes` step uploads `file-sizes-${{ matrix.id /// }}.json.gz`, so this list must match those IDs verbatim. Adding a /// new matrix entry to that workflow means adding the same ID here. -const KNOWN_FILE_SIZES_SUITES: &[&str] = &[ +pub(crate) const KNOWN_FILE_SIZES_SUITES: &[&str] = &[ "clickbench-nvme", "tpch-nvme", "tpch-s3", diff --git a/benchmarks-website/migrate/src/v2.rs b/benchmarks-website/migrate/src/v2.rs index 2a9d3bdf5d0..de79837cfbd 100644 --- a/benchmarks-website/migrate/src/v2.rs +++ b/benchmarks-website/migrate/src/v2.rs @@ -51,6 +51,28 @@ pub fn dataset_scale_factor(dataset: &serde_json::Value, key: &str) -> Option) -> Option { + let s = raw?.trim(); + if s.is_empty() { + return None; + } + let value: f64 = s.parse().ok()?; + if value == 1.0 { + return None; + } + Some(format!("{value}")) +} + /// Best-effort numeric coercion for the polymorphic `value` field. pub fn value_as_f64(value: &serde_json::Value) -> Option { match value { diff --git a/benchmarks-website/migrate/tests/classifier.rs b/benchmarks-website/migrate/tests/classifier.rs index cddca0c517c..300c610bd24 100644 --- a/benchmarks-website/migrate/tests/classifier.rs +++ b/benchmarks-website/migrate/tests/classifier.rs @@ -161,13 +161,6 @@ fn fan_out_query_records( format: "vortex-file-compressed".into(), }, )] -#[case::random_access_2_part_legacy( - "random-access/parquet-tokio-local-disk", - V3Bin::RandomAccess { - dataset: "random access".into(), - format: "parquet".into(), - }, -)] #[case::random_access_4_part_lance( "random-access/taxi/take/lance-tokio-local-disk", V3Bin::RandomAccess { @@ -267,6 +260,8 @@ fn compression_size_records(#[case] name: &str, #[case] expected: V3Bin) { #[case::ratio_size_vortex_raw("vortex:raw size/clickbench")] #[case::throughput("compress throughput/clickbench")] #[case::nonsense_prefix("not-a-known-bench/series")] +#[case::random_access_2_part_legacy("random-access/parquet-tokio-local-disk")] +#[case::random_access_3_part("random-access/taxi/parquet-tokio-local-disk")] fn unmapped_records_yield_none(#[case] name: &str) { let r = record(name); assert_eq!( @@ -276,6 +271,23 @@ fn unmapped_records_yield_none(#[case] name: &str) { ); } +#[test] +fn random_access_2_part_legacy_is_skip_not_unknown() { + // The 2-part legacy shape `random-access/-tokio-local-disk` + // carries no dataset, so `bin_random_access` returns None. That + // None must route through `Outcome::Skip` (an intentional drop), + // NOT `Outcome::Unknown`, otherwise these records count against + // the 5% uncategorized gate in `migrate::run`. Top-level + // `classify()` returns None for both Skip and Unknown, so this + // assertion has to go through `classify_outcome`. + let r = record("random-access/parquet-tokio-local-disk"); + let outcome = classify_outcome(&r); + assert!( + matches!(outcome, Outcome::Skip(_)), + "2-part legacy random-access must Skip, not Unknown; got {outcome:?}" + ); +} + #[test] fn parquet_zstd_size_is_deprecated() { // `parquet-zstd` is not on the v3 emitter's format allowlist, so @@ -401,6 +413,33 @@ fn tpch_compression_size_drops_default_scale_factor() { assert_eq!(dataset_variant, None); } +#[rstest] +// SF=1 is the implicit default; both spellings must drop to None so +// `bin_compression_size` and `migrate_file_sizes` agree. +#[case::int_one("1", None)] +#[case::float_one("1.0", None)] +// SF=10 must produce the same canonical string regardless of spelling. +#[case::int_ten("10", Some("10".into()))] +#[case::float_ten("10.0", Some("10".into()))] +#[case::float_fractional("0.1", Some("0.1".into()))] +#[case::whitespace(" 10 ", Some("10".into()))] +#[case::empty("", None)] +fn compression_size_scale_factor_canonicalizes( + #[case] raw_sf: &str, + #[case] expected: Option, +) { + let mut r = record("vortex size/tpch"); + r.dataset = Some(serde_json::json!({ "tpch": { "scale_factor": raw_sf } })); + let outcome = classify_outcome(&r); + let Outcome::Bin(V3Bin::CompressionSize { + dataset_variant, .. + }) = outcome + else { + panic!("expected Bin(CompressionSize) for sf={raw_sf:?}, got {outcome:?}"); + }; + assert_eq!(dataset_variant, expected, "sf={raw_sf:?}"); +} + #[test] fn engine_casing_lowercased() { // Older v2 records emitted display-case engines like `DataFusion` diff --git a/benchmarks-website/migrate/tests/end_to_end.rs b/benchmarks-website/migrate/tests/end_to_end.rs index 210092a4058..25bf5c0ad55 100644 --- a/benchmarks-website/migrate/tests/end_to_end.rs +++ b/benchmarks-website/migrate/tests/end_to_end.rs @@ -227,6 +227,193 @@ fn compression_size_data_and_file_sizes_merge() { assert_eq!(value_bytes, 300); } +#[test] +fn empty_author_email_stored_as_null() { + // v2 sometimes wrote `""` for blank author/email/message. The + // migrator normalizes those to None so DuckDB stores SQL NULL, + // letting the UI distinguish "missing metadata" from "empty + // string". Here author.email is "" — verify the column is NULL, + // not the empty string. + const COMMITS: &str = r#"{"id":"deadbeef","timestamp":"2026-04-25T00:00:00Z","message":"fixture","author":{"name":"A","email":""},"committer":{"name":"C","email":"c@example.com"},"tree_id":"abcd0001","url":"https://example.com/commit/deadbeef"} +"#; + + let src_dir = build_fixture(COMMITS, "", &[]); + let target_dir = TempDir::new().unwrap(); + let target = target_dir.path().join("v3.duckdb"); + + migrate::run(&Source::Local(src_dir.path().into()), &target).unwrap(); + + let conn = Connection::open(&target).unwrap(); + let is_null: bool = conn + .query_row( + "SELECT author_email IS NULL FROM commits WHERE commit_sha = 'deadbeef'", + [], + |r| r.get(0), + ) + .unwrap(); + assert!(is_null, "empty author.email must store as SQL NULL"); + + // Non-empty fields still round-trip as strings. + let committer_email: String = conn + .query_row( + "SELECT committer_email FROM commits WHERE commit_sha = 'deadbeef'", + [], + |r| r.get(0), + ) + .unwrap(); + assert_eq!(committer_email, "c@example.com"); +} + +#[test] +fn open_target_db_removes_orphan_wal() { + // A `.wal` left from a previous crash with no main file present + // must still be removed so the next run starts from a known-empty + // state. Otherwise DuckDB can replay stale WAL into the fresh DB + // and corrupt subsequent inserts. + let target_dir = TempDir::new().unwrap(); + let target = target_dir.path().join("v3.duckdb"); + let wal = target_dir.path().join("v3.duckdb.wal"); + std::fs::write(&wal, b"orphan-wal-bytes").unwrap(); + assert!(wal.exists(), "precondition: orphan wal staged"); + assert!(!target.exists(), "precondition: no main db file"); + + let _conn = migrate::open_target_db(&target).unwrap(); + + // The migrator opens the DB after sweeping the WAL; DuckDB may + // recreate its own wal under load, but our pre-existing orphan + // bytes must not survive the sweep. We assert by content: either + // the path is missing, or its contents differ from the orphan we + // staged. + if wal.exists() { + let now = std::fs::read(&wal).unwrap(); + assert_ne!( + now, b"orphan-wal-bytes", + "orphan wal bytes must not survive open_target_db" + ); + } +} + +#[test] +fn file_sizes_unknown_id_falls_back_to_unknown_prefix() { + // A file-sizes-*.json.gz whose id isn't in + // `KNOWN_FILE_SIZES_SUITES`, with an empty `benchmark` field, used + // to surface as a bare id like `mystery-suite` and render as a + // dataset name. The migrator now prefixes those with `unknown:` + // so the UI can flag them. + const FILE_SIZES: &str = r#"{"commit_id":"deadbeef","benchmark":"","scale_factor":"","format":"vortex-file-compressed","file":"part-0.vortex","size_bytes":1000} +"#; + + let src_dir = build_fixture( + COMMITS_JSONL, + "", + &[("file-sizes-mystery-suite.json.gz", FILE_SIZES)], + ); + let target_dir = TempDir::new().unwrap(); + let target = target_dir.path().join("v3.duckdb"); + + migrate::run(&Source::Local(src_dir.path().into()), &target).unwrap(); + + let conn = Connection::open(&target).unwrap(); + let dataset: String = conn + .query_row("SELECT dataset FROM compression_sizes", [], |r| r.get(0)) + .unwrap(); + assert_eq!(dataset, "unknown:mystery-suite"); +} + +#[test] +fn file_sizes_known_id_uses_id_directly() { + // For a KNOWN_FILE_SIZES_SUITES id, the fallback path keeps the + // raw id (no `unknown:` prefix). `clickbench-nvme` is on the list. + const FILE_SIZES: &str = r#"{"commit_id":"deadbeef","benchmark":"","scale_factor":"","format":"vortex-file-compressed","file":"part-0.vortex","size_bytes":1000} +"#; + + let src_dir = build_fixture( + COMMITS_JSONL, + "", + &[("file-sizes-clickbench-nvme.json.gz", FILE_SIZES)], + ); + let target_dir = TempDir::new().unwrap(); + let target = target_dir.path().join("v3.duckdb"); + + migrate::run(&Source::Local(src_dir.path().into()), &target).unwrap(); + + let conn = Connection::open(&target).unwrap(); + let dataset: String = conn + .query_row("SELECT dataset FROM compression_sizes", [], |r| r.get(0)) + .unwrap(); + assert_eq!(dataset, "clickbench-nvme"); +} + +#[test] +fn compression_size_data_and_file_sizes_merge_with_canonical_sf() { + // Same logical SF written as `"10"` on the data.json.gz side and + // `"10.0"` on the file-sizes side. Both paths must canonicalize + // to `"10"` so the rows share a `measurement_id` and merge into + // one compression_sizes row. + const DATA: &str = r#"{"name":"vortex size/tpch","commit_id":"deadbeef","unit":"bytes","value":200,"dataset":{"tpch":{"scale_factor":"10"}}} +"#; + const FILE_SIZES: &str = r#"{"commit_id":"deadbeef","benchmark":"tpch","scale_factor":"10.0","format":"vortex-file-compressed","file":"part-0.vortex","size_bytes":100} +"#; + + let src_dir = build_fixture( + COMMITS_JSONL, + DATA, + &[("file-sizes-tpch-nvme-10.json.gz", FILE_SIZES)], + ); + let target_dir = TempDir::new().unwrap(); + let target = target_dir.path().join("v3.duckdb"); + + let summary = migrate::run(&Source::Local(src_dir.path().into()), &target).unwrap(); + + assert_eq!(summary.compression_size_inserted, 1, "summary={summary}"); + let conn = Connection::open(&target).unwrap(); + let (n, value_bytes, dataset_variant): (i64, i64, String) = conn + .query_row( + "SELECT COUNT(*), SUM(value_bytes), MAX(dataset_variant) FROM compression_sizes", + [], + |r| Ok((r.get(0)?, r.get(1)?, r.get(2)?)), + ) + .unwrap(); + assert_eq!(n, 1); + // data.json.gz seeds 200, file-sizes adds 100. + assert_eq!(value_bytes, 300); + assert_eq!(dataset_variant, "10"); +} + +#[test] +fn summary_counts_match_actual_rows_on_success() { + // Sister test to migrate::tests::flush_all_does_not_overcount_on_failure. + // On a fully successful run, the post-flush summary counters must + // equal `SELECT COUNT(*)` from each fact table. This is the + // invariant the flush-after-count refactor preserves. + let src_dir = build_fixture(COMMITS_JSONL, DATA_JSONL, &[]); + let target_dir = TempDir::new().unwrap(); + let target = target_dir.path().join("v3.duckdb"); + + let summary = migrate::run(&Source::Local(src_dir.path().into()), &target).unwrap(); + + let conn = Connection::open(&target).unwrap(); + let actual = |table: &str| -> u64 { + let n: i64 = conn + .query_row(&format!("SELECT COUNT(*) FROM {table}"), [], |r| r.get(0)) + .unwrap(); + n as u64 + }; + assert_eq!(summary.query_inserted, actual("query_measurements")); + assert_eq!( + summary.compression_time_inserted, + actual("compression_times") + ); + assert_eq!( + summary.compression_size_inserted, + actual("compression_sizes") + ); + assert_eq!( + summary.random_access_inserted, + actual("random_access_times") + ); +} + #[test] fn file_sizes_sum_into_one_row() { // Two file-sizes rows sharing (commit, benchmark, format, diff --git a/benchmarks-website/server/src/api.rs b/benchmarks-website/server/src/api.rs index e6581c7c4f0..f482e57d030 100644 --- a/benchmarks-website/server/src/api.rs +++ b/benchmarks-website/server/src/api.rs @@ -502,7 +502,7 @@ fn collect_query_chart( r#" SELECT q.commit_sha, CAST(c.timestamp AS VARCHAR), - c.message, c.url, + COALESCE(c.message, '') AS message, c.url, q.engine, q.format, q.value_ns FROM query_measurements q JOIN commits c USING (commit_sha) @@ -567,7 +567,7 @@ fn collect_compression_time_chart( r#" SELECT t.commit_sha, CAST(c.timestamp AS VARCHAR), - c.message, c.url, + COALESCE(c.message, '') AS message, c.url, t.format, t.op, t.value_ns FROM compression_times t JOIN commits c USING (commit_sha) @@ -615,7 +615,7 @@ fn collect_compression_size_chart( r#" SELECT s.commit_sha, CAST(c.timestamp AS VARCHAR), - c.message, c.url, + COALESCE(c.message, '') AS message, c.url, s.format, s.value_bytes FROM compression_sizes s JOIN commits c USING (commit_sha) @@ -658,7 +658,7 @@ fn collect_random_access_chart(conn: &Connection, dataset: &str) -> Result Date: Mon, 27 Apr 2026 16:44:25 -0400 Subject: [PATCH 13/26] [claude] feat(benchmarks-website): historical comparison UX + mobile (#7681) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit ## Summary Brings the v3 benchmarks website to a demo-ready state focused on the historical-comparison use case (Vortex vs other engines on the same commit, HEAD vs N commits ago, latest vs first as % delta). Single process, single binary; SSR `maud` + inline JSON ` +// paired with a via the index attribute. +// Construction is deferred until the canvas crosses an IntersectionObserver +// threshold so a 22-chart group doesn't pay for offscreen charts up front. +// +// URL state (n, y, mode, hidden) is the source of truth. Server emits +// scope/Y/mode toolbar links that navigate via plain ; client-side +// legend toggles rewrite ?hidden=... via history.replaceState so a +// permalink reproduces the view. (function () { "use strict"; + // ----------------------------------------------------------------------- + // Palette + helpers + // ----------------------------------------------------------------------- var palette = [ "#2563eb", "#dc2626", "#16a34a", "#ea580c", "#7c3aed", "#0891b2", "#ca8a04", "#db2777", "#65a30d", "#475569", @@ -22,70 +33,351 @@ return typeof sha === "string" ? sha.slice(0, 7) : String(sha); } - function readPayload() { - var node = document.getElementById("chart-data"); - if (!node) { - throw new Error("missing #chart-data element"); + function shortDate(ts) { + if (typeof ts !== "string") return ""; + // commits.timestamp arrives as either ISO 8601 or DuckDB's `YYYY-MM-DD HH:MM:SS`. + return ts.slice(0, 10); + } + + function truncate(s, max) { + if (typeof s !== "string") return ""; + return s.length > max ? s.slice(0, max - 1) + "…" : s; + } + + function escapeHtml(s) { + return String(s) + .replace(/&/g, "&") + .replace(//g, ">") + .replace(/"/g, """) + .replace(/'/g, "'"); + } + + function formatNumber(v, unit) { + if (v === null || v === undefined || Number.isNaN(v)) return "—"; + if (unit === "ns") { + // Pick a friendlier unit when the magnitude warrants it. + var abs = Math.abs(v); + if (abs >= 1e9) return (v / 1e9).toFixed(2) + " s"; + if (abs >= 1e6) return (v / 1e6).toFixed(2) + " ms"; + if (abs >= 1e3) return (v / 1e3).toFixed(2) + " µs"; + return v.toFixed(0) + " ns"; + } + if (unit === "bytes") { + var a = Math.abs(v); + if (a >= 1024 * 1024 * 1024) return (v / (1024 * 1024 * 1024)).toFixed(2) + " GiB"; + if (a >= 1024 * 1024) return (v / (1024 * 1024)).toFixed(2) + " MiB"; + if (a >= 1024) return (v / 1024).toFixed(2) + " KiB"; + return v.toFixed(0) + " B"; + } + return v.toString(); + } + + // ----------------------------------------------------------------------- + // URL state + // ----------------------------------------------------------------------- + function parseUrl() { + var p = new URLSearchParams(window.location.search); + return { + n: p.get("n") || "", + y: p.get("y") === "log" ? "log" : "linear", + mode: p.get("mode") === "rel" ? "rel" : "abs", + hidden: parseHiddenParam(p.get("hidden")), + }; + } + + // `|` cannot appear in our series labels (which are + // "engine:format"-shaped today), unlike `,`/`/` which could plausibly + // sneak in via dataset variants. URLSearchParams handles `|` as-is. + var HIDDEN_DELIM = "|"; + + function parseHiddenParam(s) { + if (!s) return Object.create(null); + var out = Object.create(null); + s.split(HIDDEN_DELIM).forEach(function (k) { + if (k) out[k] = true; + }); + return out; + } + + function serializeHidden(set) { + var keys = Object.keys(set).filter(function (k) { return set[k]; }); + keys.sort(); + return keys.join(HIDDEN_DELIM); + } + + function rewriteHiddenInUrl(set) { + var p = new URLSearchParams(window.location.search); + var v = serializeHidden(set); + if (v) { + p.set("hidden", v); + } else { + p.delete("hidden"); } - return JSON.parse(node.textContent); + var qs = p.toString(); + var url = window.location.pathname + (qs ? "?" + qs : "") + window.location.hash; + window.history.replaceState(null, "", url); } - function buildDatasets(series) { - var names = Object.keys(series).sort(); + // ----------------------------------------------------------------------- + // Payload + dataset construction + // ----------------------------------------------------------------------- + function readPayload(scriptNode) { + return JSON.parse(scriptNode.textContent); + } + + function buildDatasets(payload, urlState) { + var raw = payload.series || {}; + var names = Object.keys(raw).sort(); + var values = names.map(function (name) { + return Array.isArray(raw[name]) ? raw[name].slice() : []; + }); + + if (urlState.mode === "rel") { + values = values.map(function (arr) { + var baseline = null; + for (var i = 0; i < arr.length; i++) { + if (arr[i] !== null && arr[i] !== undefined && !Number.isNaN(arr[i])) { + baseline = arr[i]; + break; + } + } + if (!baseline) return arr.map(function () { return null; }); + return arr.map(function (v) { + if (v === null || v === undefined || Number.isNaN(v)) return null; + return (v / baseline) * 100; + }); + }); + } + return names.map(function (name, i) { return { label: name, - data: series[name], + data: values[i], + rawData: raw[name], borderColor: colorFor(i), backgroundColor: colorFor(i), spanGaps: true, tension: 0.1, - pointRadius: 3, + pointRadius: 2, pointHoverRadius: 5, + hidden: !!urlState.hidden[name], }; }); } - function init() { - var canvas = document.getElementById("chart"); - if (!canvas || typeof Chart === "undefined") { - return; + // ----------------------------------------------------------------------- + // Tooltip + // ----------------------------------------------------------------------- + function externalTooltipHandler(payload, host) { + return function (context) { + var tooltipModel = context.tooltip; + if (!host) return; + if (tooltipModel.opacity === 0) { + host.style.opacity = "0"; + host.style.pointerEvents = "none"; + return; + } + + var idx = tooltipModel.dataPoints && tooltipModel.dataPoints[0] + ? tooltipModel.dataPoints[0].dataIndex + : -1; + var commit = (payload.commits || [])[idx] || {}; + var unit = payload.unit || ""; + + var rows = (tooltipModel.dataPoints || []).map(function (dp) { + var ds = dp.dataset || {}; + var raw = (ds.rawData || [])[idx]; + var prevIdx = idx - 1; + var prevRaw = null; + while (prevIdx >= 0) { + var pv = (ds.rawData || [])[prevIdx]; + if (pv !== null && pv !== undefined && !Number.isNaN(pv)) { prevRaw = pv; break; } + prevIdx--; + } + var deltaHtml = ""; + if (prevRaw !== null && raw !== null && raw !== undefined && prevRaw !== 0) { + var pct = ((raw - prevRaw) / prevRaw) * 100; + var cls = pct > 0 ? "tt-delta tt-delta--worse" + : pct < 0 ? "tt-delta tt-delta--better" : "tt-delta"; + var sign = pct > 0 ? "+" : ""; + deltaHtml = '' + sign + pct.toFixed(1) + "%"; + } + return '
' + + '' + + '' + escapeHtml(ds.label) + '' + + '' + escapeHtml(formatNumber(raw, unit)) + '' + + deltaHtml + + "
"; + }).join(""); + + var titleHtml = '
' + + escapeHtml(shortSha(commit.sha)) + ' · ' + + escapeHtml(shortDate(commit.timestamp)) + + "
"; + + var msg = commit.message ? truncate(commit.message, 120) : ""; + var footerHtml = ""; + if (msg || commit.url) { + footerHtml = '
"; + } + + host.innerHTML = titleHtml + '
' + rows + "
" + footerHtml; + + var canvasRect = context.chart.canvas.getBoundingClientRect(); + var hostRect = host.parentNode.getBoundingClientRect(); + var x = canvasRect.left - hostRect.left + tooltipModel.caretX; + var y = canvasRect.top - hostRect.top + tooltipModel.caretY; + host.style.opacity = "1"; + host.style.pointerEvents = "auto"; + host.style.left = x + "px"; + host.style.top = y + "px"; + }; + } + + // ----------------------------------------------------------------------- + // Single-chart construction + // ----------------------------------------------------------------------- + function constructChart(card, urlState) { + var idx = card.getAttribute("data-chart-index"); + var script = document.getElementById("chart-data-" + idx); + var canvas = card.querySelector('canvas[data-chart-index="' + idx + '"]'); + if (!script || !canvas || typeof Chart === "undefined") return null; + if (canvas.__bench_chart) return canvas.__bench_chart; + + var payload; + try { + payload = readPayload(script); + } catch (e) { + return null; } - var payload = readPayload(); - var labels = (payload.commits || []).map(function (c) { - return shortSha(c.sha); - }); - var datasets = buildDatasets(payload.series || {}); - new Chart(canvas, { + + var labels = (payload.commits || []).map(function (c) { return shortSha(c.sha); }); + var datasets = buildDatasets(payload, urlState); + var unit = payload.unit || ""; + var host = card.querySelector(".chart-tooltip-host"); + + var yTitle = unit; + if (urlState.mode === "rel") yTitle = "% of baseline"; + // Mobile gets the legend above the chart so the chart doesn't get pushed + // off-screen by a tall legend on narrow viewports. + var legendPosition = (window.matchMedia + && window.matchMedia("(max-width: 768px)").matches) ? "top" : "bottom"; + + var chart = new Chart(canvas, { type: "line", data: { labels: labels, datasets: datasets }, options: { responsive: true, maintainAspectRatio: false, + animation: false, interaction: { mode: "index", intersect: false }, scales: { y: { - beginAtZero: true, - title: { display: true, text: payload.unit || "" }, + type: urlState.y === "log" ? "logarithmic" : "linear", + beginAtZero: urlState.y !== "log" && urlState.mode !== "rel", + title: { display: !!yTitle, text: yTitle }, }, - x: { title: { display: true, text: "commit" } }, + x: { title: { display: false } }, }, plugins: { - legend: { position: "bottom" }, - tooltip: { - callbacks: { - title: function (items) { - if (!items.length) return ""; - var idx = items[0].dataIndex; - var c = (payload.commits || [])[idx] || {}; - var msg = c.message ? " — " + c.message : ""; - return shortSha(c.sha) + msg; - }, + legend: { + position: legendPosition, + onClick: function (e, item, legend) { + // Default toggle behaviour, then mirror into URL. + var ci = legend.chart; + var meta = ci.getDatasetMeta(item.datasetIndex); + meta.hidden = meta.hidden === null ? !ci.data.datasets[item.datasetIndex].hidden : null; + ci.update(); + var hiddenSet = parseHiddenParam(new URLSearchParams(window.location.search).get("hidden")); + var label = item.text; + if (meta.hidden) hiddenSet[label] = true; else delete hiddenSet[label]; + rewriteHiddenInUrl(hiddenSet); }, }, + tooltip: { + enabled: false, + external: externalTooltipHandler(payload, host), + }, }, }, }); + canvas.__bench_chart = chart; + return chart; + } + + // ----------------------------------------------------------------------- + // Page wiring + // ----------------------------------------------------------------------- + function initCharts() { + var urlState = parseUrl(); + var cards = document.querySelectorAll(".chart-card[data-chart-index]"); + if (!cards.length) return; + + if (typeof IntersectionObserver === "undefined") { + cards.forEach(function (card) { constructChart(card, urlState); }); + } else { + var io = new IntersectionObserver(function (entries) { + entries.forEach(function (entry) { + if (entry.isIntersecting) { + constructChart(entry.target, urlState); + io.unobserve(entry.target); + } + }); + }, { rootMargin: "150px 0px" }); + cards.forEach(function (card) { io.observe(card); }); + } + + // Tap-elsewhere closes any open external tooltip. + document.addEventListener("click", function (e) { + var hosts = document.querySelectorAll(".chart-tooltip-host"); + hosts.forEach(function (host) { + if (!host.contains(e.target)) { + host.style.opacity = "0"; + host.style.pointerEvents = "none"; + } + }); + }); + + initSlider(); + } + + function initSlider() { + var slider = document.getElementById("scope-slider"); + var label = document.getElementById("scope-slider-label"); + if (!slider) return; + slider.addEventListener("input", function () { + if (label) label.textContent = slider.value; + }); + slider.addEventListener("change", function () { + var p = new URLSearchParams(window.location.search); + p.set("n", slider.value); + window.location.search = p.toString(); + }); + } + + function initLandingFilter() { + var input = document.getElementById("group-search"); + if (!input) return; + var groups = document.querySelectorAll("section.group[data-group-name]"); + input.addEventListener("input", function () { + var q = input.value.toLowerCase(); + groups.forEach(function (g) { + var name = (g.getAttribute("data-group-name") || "").toLowerCase(); + g.style.display = !q || name.indexOf(q) !== -1 ? "" : "none"; + }); + }); + } + + function init() { + initLandingFilter(); + initCharts(); } if (document.readyState === "loading") { diff --git a/benchmarks-website/server/static/style.css b/benchmarks-website/server/static/style.css index 0c13f8788b4..bac0c505ab8 100644 --- a/benchmarks-website/server/static/style.css +++ b/benchmarks-website/server/static/style.css @@ -6,9 +6,14 @@ --fg: #1a1a1a; --muted: #6b6b6b; --accent: #2563eb; + --accent-fg: #ffffff; --border: #e4e4e7; --card-bg: #fafafa; --code-bg: #f3f4f6; + --tooltip-bg: #1a1a1a; + --tooltip-fg: #f5f5f5; + --good: #16a34a; + --bad: #dc2626; } @media (prefers-color-scheme: dark) { @@ -17,15 +22,18 @@ --fg: #e6e6e6; --muted: #a1a1aa; --accent: #60a5fa; + --accent-fg: #0f1115; --border: #27272a; --card-bg: #16181d; --code-bg: #1f2229; + --tooltip-bg: #1a1c22; + --tooltip-fg: #f5f5f5; + --good: #4ade80; + --bad: #f87171; } } -* { - box-sizing: border-box; -} +* { box-sizing: border-box; } html, body { margin: 0; @@ -38,30 +46,15 @@ html, body { line-height: 1.5; } -a { - color: var(--accent); - text-decoration: none; -} - -a:hover { - text-decoration: underline; -} +a { color: var(--accent); text-decoration: none; } +a:hover { text-decoration: underline; } .page-header { border-bottom: 1px solid var(--border); padding: 1rem 1.5rem; } - -.page-header h1 { - margin: 0; - font-size: 1.25rem; - font-weight: 600; -} - -.page-header h1 a { - color: var(--fg); -} - +.page-header h1 { margin: 0; font-size: 1.25rem; font-weight: 600; } +.page-header h1 a { color: var(--fg); } .page-header .subtitle { margin: 0.25rem 0 0; color: var(--muted); @@ -69,7 +62,7 @@ a:hover { } main { - max-width: 1100px; + max-width: 1200px; margin: 0 auto; padding: 1.5rem; } @@ -79,20 +72,42 @@ main { font-style: italic; } +/* ---------------------------------------------------------------------- + * Landing page + * ---------------------------------------------------------------------- */ +.landing-search { + margin-bottom: 1rem; +} +.landing-search input { + width: 100%; + max-width: 32rem; + padding: 0.5rem 0.75rem; + border: 1px solid var(--border); + border-radius: 6px; + background: var(--bg); + color: var(--fg); + font: inherit; +} + .group { - margin-bottom: 2rem; + margin-bottom: 1.5rem; border: 1px solid var(--border); border-radius: 6px; background: var(--card-bg); padding: 1rem 1.25rem; } - .group h2 { margin: 0 0 0.75rem; font-size: 1rem; font-weight: 600; color: var(--fg); } +.group .group-link { + color: var(--fg); +} +.group .group-link:hover { + color: var(--accent); +} .charts { list-style: none; @@ -100,13 +115,9 @@ main { padding: 0; display: flex; flex-wrap: wrap; - gap: 0.5rem; -} - -.charts li { - margin: 0; + gap: 0.4rem; } - +.charts li { margin: 0; } .charts a { display: inline-block; padding: 0.25rem 0.6rem; @@ -116,12 +127,77 @@ main { font-size: 0.875rem; } +/* ---------------------------------------------------------------------- + * Toolbar + * ---------------------------------------------------------------------- */ +.toolbar { + display: flex; + flex-wrap: wrap; + gap: 0.75rem 1.25rem; + align-items: center; + padding: 0.75rem 1rem; + margin-bottom: 1rem; + border: 1px solid var(--border); + border-radius: 6px; + background: var(--card-bg); + position: sticky; + top: 0; + z-index: 10; + -webkit-backdrop-filter: saturate(140%) blur(6px); + backdrop-filter: saturate(140%) blur(6px); +} +.toolbar-group { + display: flex; + flex-wrap: wrap; + align-items: center; + gap: 0.35rem; +} +.toolbar-label { + color: var(--muted); + font-size: 0.8rem; + text-transform: uppercase; + letter-spacing: 0.04em; + margin-right: 0.25rem; +} +.toolbar-btn { + display: inline-flex; + align-items: center; + justify-content: center; + min-height: 32px; + padding: 0.25rem 0.7rem; + border: 1px solid var(--border); + border-radius: 4px; + background: var(--bg); + color: var(--fg); + font-size: 0.85rem; + text-decoration: none; +} +.toolbar-btn:hover { border-color: var(--accent); } +.toolbar-btn--active { + background: var(--accent); + border-color: var(--accent); + color: var(--accent-fg); +} +.toolbar-btn--active:hover { color: var(--accent-fg); } +.toolbar-slider-label { + font-variant-numeric: tabular-nums; + font-size: 0.85rem; + color: var(--muted); + min-width: 2.5rem; + text-align: right; +} +#scope-slider { + width: 9rem; +} + +/* ---------------------------------------------------------------------- + * Single-chart + group-page chart card + * ---------------------------------------------------------------------- */ .chart-meta { color: var(--muted); font-size: 0.875rem; - margin: 0 0 1rem; + margin: 0 0 0.75rem; } - .chart-meta code { background: var(--code-bg); padding: 0.1rem 0.35rem; @@ -129,20 +205,43 @@ main { font-size: 0.85em; } -.chart-wrap { +.chart-card { position: relative; - height: 480px; border: 1px solid var(--border); border-radius: 6px; background: var(--card-bg); - padding: 1rem; + padding: 0.75rem 1rem 1rem; + margin-bottom: 1.25rem; +} +.chart-card-title { + margin: 0 0 0.5rem; + font-size: 0.95rem; + font-weight: 600; } +.chart-card-title a { color: var(--fg); } +.chart-card-title a:hover { color: var(--accent); } +.chart-grid { + display: grid; + grid-template-columns: repeat(auto-fit, minmax(420px, 1fr)); + gap: 1rem; +} + +.chart-wrap { + position: relative; + height: 320px; +} +.chart-card .chart-wrap canvas, .chart-wrap canvas { width: 100% !important; height: 100% !important; } +/* Single-chart pages get a taller area; group cards keep the compact 320px. */ +main > .chart-card:not(.chart-grid .chart-card) .chart-wrap { + height: 480px; +} + .no-script { margin-top: 1rem; padding: 0.75rem 1rem; @@ -152,3 +251,104 @@ main { color: var(--muted); font-size: 0.875rem; } + +/* ---------------------------------------------------------------------- + * Custom external tooltip + * ---------------------------------------------------------------------- */ +.chart-tooltip-host { + position: absolute; + top: 0; + left: 0; + pointer-events: none; + opacity: 0; + z-index: 20; + background: var(--tooltip-bg); + color: var(--tooltip-fg); + border-radius: 6px; + padding: 0.5rem 0.65rem; + font-size: 0.8rem; + line-height: 1.35; + max-width: 22rem; + box-shadow: 0 6px 20px rgba(0, 0, 0, 0.25); + transform: translate(-50%, calc(-100% - 12px)); + transition: opacity 100ms; +} +.tt-title { + font-weight: 600; + margin-bottom: 0.25rem; + font-variant-numeric: tabular-nums; +} +.tt-rows { display: flex; flex-direction: column; gap: 0.15rem; } +.tt-row { + display: flex; + align-items: center; + gap: 0.35rem; + white-space: nowrap; +} +.tt-swatch { + display: inline-block; + width: 0.6rem; + height: 0.6rem; + border-radius: 50%; + flex: 0 0 auto; +} +.tt-label { + flex: 1 1 auto; + overflow: hidden; + text-overflow: ellipsis; + max-width: 11rem; +} +.tt-value { + font-variant-numeric: tabular-nums; + font-weight: 500; +} +.tt-delta { + font-variant-numeric: tabular-nums; + font-size: 0.75rem; + opacity: 0.85; +} +.tt-delta--better { color: var(--good); } +.tt-delta--worse { color: var(--bad); } +.tt-footer { + margin-top: 0.4rem; + padding-top: 0.4rem; + border-top: 1px solid rgba(255, 255, 255, 0.15); + display: flex; + flex-direction: column; + gap: 0.2rem; +} +.tt-msg { + color: var(--tooltip-fg); + opacity: 0.85; + white-space: normal; + font-style: italic; +} +.tt-link { + color: #93c5fd; + font-size: 0.78rem; +} +.tt-link:hover { color: #c7e0ff; } + +/* ---------------------------------------------------------------------- + * Mobile + * ---------------------------------------------------------------------- */ +@media (max-width: 768px) { + main { padding: 1rem; } + .toolbar { + padding: 0.5rem 0.65rem; + gap: 0.5rem 0.85rem; + } + .toolbar-btn { + min-height: 40px; + padding: 0.4rem 0.85rem; + font-size: 0.95rem; + } + #scope-slider { width: 100%; flex: 1 1 auto; } + .chart-grid { grid-template-columns: 1fr; gap: 0.75rem; } + .chart-wrap { height: 260px; } + main > .chart-card:not(.chart-grid .chart-card) .chart-wrap { height: 360px; } + .group { + padding: 0.75rem 0.85rem; + } + .charts a { padding: 0.35rem 0.7rem; min-height: 32px; } +} diff --git a/benchmarks-website/server/tests/snapshots/chart_page_query.snap b/benchmarks-website/server/tests/snapshots/chart_page_query.snap index f7244f57f82..579d7067bef 100644 --- a/benchmarks-website/server/tests/snapshots/chart_page_query.snap +++ b/benchmarks-website/server/tests/snapshots/chart_page_query.snap @@ -1,6 +1,5 @@ --- source: benchmarks-website/server/tests/web_ui.rs -assertion_line: 267 expression: body --- -tpch sf=1 Q1 [nvme] — bench.vortex.dev

unit: ns · 2 series · 3 commits

+tpch sf=1 Q1 [nvme] — bench.vortex.dev

unit: ns · 2 series · 3 commits

diff --git a/benchmarks-website/server/tests/snapshots/group_page_query.snap b/benchmarks-website/server/tests/snapshots/group_page_query.snap new file mode 100644 index 00000000000..045ceb8bc34 --- /dev/null +++ b/benchmarks-website/server/tests/snapshots/group_page_query.snap @@ -0,0 +1,5 @@ +--- +source: benchmarks-website/server/tests/web_ui.rs +expression: body +--- +tpch sf=1 [nvme] — bench.vortex.dev

1 chart

Q1

diff --git a/benchmarks-website/server/tests/snapshots/landing_page.snap b/benchmarks-website/server/tests/snapshots/landing_page.snap index 34873e5eb89..607d01be36d 100644 --- a/benchmarks-website/server/tests/snapshots/landing_page.snap +++ b/benchmarks-website/server/tests/snapshots/landing_page.snap @@ -2,4 +2,4 @@ source: benchmarks-website/server/tests/web_ui.rs expression: body --- -bench.vortex.dev

tpch sf=1 [nvme]

Compression

Compression Size

Random Access

cohere-large-10m / partitioned

+bench.vortex.dev

tpch sf=1 [nvme]

Compression

Compression Size

Random Access

cohere-large-10m / partitioned

diff --git a/benchmarks-website/server/tests/web_ui.rs b/benchmarks-website/server/tests/web_ui.rs index 4bed7a35271..201ef2f9008 100644 --- a/benchmarks-website/server/tests/web_ui.rs +++ b/benchmarks-website/server/tests/web_ui.rs @@ -249,19 +249,23 @@ async fn chart_page_snapshot() -> Result<()> { .to_string(); let resp = client - .get(server.url(&format!("/chart/{slug}"))) + .get(server.url(&format!("/chart/{slug}?n=100"))) .send() .await?; assert_eq!(resp.status(), 200); let body = resp.text().await?; assert!( - body.contains(r#" -// paired with a via the index attribute. -// Construction is deferred until the canvas crosses an IntersectionObserver -// threshold so a 22-chart group doesn't pay for offscreen charts up front. +// paired with a via the index attribute. The +// chart-card carries `data-chart-slug` so the toolbar can refetch a single +// card from `/api/chart/{slug}?n=...` without a page reload. // -// URL state (n, y, mode, hidden) is the source of truth. Server emits -// scope/Y/mode toolbar links that navigate via plain ; client-side -// legend toggles rewrite ?hidden=... via history.replaceState so a -// permalink reproduces the view. +// URL state (n, y, mode, hidden) is the source of truth and the URL stays in +// sync via `history.replaceState`. Toolbar clicks are handled in JS: +// - `n` → refetch every chart on the page, swap data, chart.update("none"). +// - `y` → swap `chart.options.scales.y` in place; no fetch. +// - `mode` → recompute datasets client-side; no fetch. +// - legend toggle → mirror into `?hidden=...` like before. (function () { "use strict"; @@ -106,24 +108,38 @@ return keys.join(HIDDEN_DELIM); } - function rewriteHiddenInUrl(set) { + // Default value the server treats as "use the route's default scope". When + // the URL has no `n` we want to leave the param off so the server can + // re-pick its own default (50 on `/`, 100 on `/chart` and `/group`). + function applyUrlState(state) { var p = new URLSearchParams(window.location.search); - var v = serializeHidden(set); - if (v) { - p.set("hidden", v); - } else { - p.delete("hidden"); - } + if (state.n) p.set("n", state.n); else p.delete("n"); + if (state.y && state.y !== "linear") p.set("y", state.y); else p.delete("y"); + if (state.mode && state.mode !== "abs") p.set("mode", state.mode); else p.delete("mode"); + var h = serializeHidden(state.hidden || {}); + if (h) p.set("hidden", h); else p.delete("hidden"); var qs = p.toString(); var url = window.location.pathname + (qs ? "?" + qs : "") + window.location.hash; window.history.replaceState(null, "", url); } + function rewriteHiddenInUrl(set) { + var state = parseUrl(); + state.hidden = set; + applyUrlState(state); + } + // ----------------------------------------------------------------------- // Payload + dataset construction // ----------------------------------------------------------------------- - function readPayload(scriptNode) { - return JSON.parse(scriptNode.textContent); + function readInlinePayload(idx) { + var script = document.getElementById("chart-data-" + idx); + if (!script) return null; + try { + return JSON.parse(script.textContent); + } catch (e) { + return null; + } } function buildDatasets(payload, urlState) { @@ -166,10 +182,14 @@ }); } + function yAxisTitle(payload, urlState) { + return urlState.mode === "rel" ? "% of baseline" : (payload.unit || ""); + } + // ----------------------------------------------------------------------- // Tooltip // ----------------------------------------------------------------------- - function externalTooltipHandler(payload, host) { + function externalTooltipHandler(canvas, host) { return function (context) { var tooltipModel = context.tooltip; if (!host) return; @@ -179,6 +199,10 @@ return; } + // Always read the current payload from the canvas: a refetch may have + // replaced it under us since this handler was installed. + var payload = canvas.__bench_payload || { commits: [], unit: "" }; + var idx = tooltipModel.dataPoints && tooltipModel.dataPoints[0] ? tooltipModel.dataPoints[0].dataIndex : -1; @@ -242,34 +266,30 @@ } // ----------------------------------------------------------------------- - // Single-chart construction + // Single-chart construction + in-place rebuild // ----------------------------------------------------------------------- function constructChart(card, urlState) { var idx = card.getAttribute("data-chart-index"); - var script = document.getElementById("chart-data-" + idx); var canvas = card.querySelector('canvas[data-chart-index="' + idx + '"]'); - if (!script || !canvas || typeof Chart === "undefined") return null; + if (!canvas || typeof Chart === "undefined") return null; if (canvas.__bench_chart) return canvas.__bench_chart; - var payload; - try { - payload = readPayload(script); - } catch (e) { - return null; - } + // Prefer a payload that arrived via fetch (refetch landed before the + // canvas scrolled into view); else fall back to the inline JSON. + var payload = canvas.__bench_payload || readInlinePayload(idx); + if (!payload) return null; + canvas.__bench_payload = payload; var labels = (payload.commits || []).map(function (c) { return shortSha(c.sha); }); var datasets = buildDatasets(payload, urlState); - var unit = payload.unit || ""; var host = card.querySelector(".chart-tooltip-host"); - var yTitle = unit; - if (urlState.mode === "rel") yTitle = "% of baseline"; // Mobile gets the legend above the chart so the chart doesn't get pushed // off-screen by a tall legend on narrow viewports. var legendPosition = (window.matchMedia && window.matchMedia("(max-width: 768px)").matches) ? "top" : "bottom"; + var yTitle = yAxisTitle(payload, urlState); var chart = new Chart(canvas, { type: "line", data: { labels: labels, datasets: datasets }, @@ -303,7 +323,7 @@ }, tooltip: { enabled: false, - external: externalTooltipHandler(payload, host), + external: externalTooltipHandler(canvas, host), }, }, }, @@ -312,6 +332,202 @@ return chart; } + // Re-skin a chart from its current payload + url state. No fetch. + function rebuildChart(card, urlState) { + var idx = card.getAttribute("data-chart-index"); + var canvas = card.querySelector('canvas[data-chart-index="' + idx + '"]'); + if (!canvas) return; + var chart = canvas.__bench_chart; + var payload = canvas.__bench_payload; + if (!chart || !payload) return; + + chart.data.labels = (payload.commits || []).map(function (c) { return shortSha(c.sha); }); + chart.data.datasets = buildDatasets(payload, urlState); + chart.options.scales.y.type = urlState.y === "log" ? "logarithmic" : "linear"; + chart.options.scales.y.beginAtZero = urlState.y !== "log" && urlState.mode !== "rel"; + var t = yAxisTitle(payload, urlState); + chart.options.scales.y.title.display = !!t; + chart.options.scales.y.title.text = t; + chart.update("none"); + } + + // ----------------------------------------------------------------------- + // Loading + error overlays per card + // ----------------------------------------------------------------------- + function setCardLoading(card, on) { + var existing = card.querySelector(".chart-loading"); + if (on) { + if (existing) return; + var el = document.createElement("div"); + el.className = "chart-loading"; + el.textContent = "loading…"; + card.appendChild(el); + } else if (existing) { + existing.remove(); + } + } + + function showCardError(card, msg) { + var existing = card.querySelector(".chart-error"); + if (existing) existing.remove(); + var el = document.createElement("div"); + el.className = "chart-error"; + el.textContent = msg; + card.appendChild(el); + setTimeout(function () { if (el.parentNode) el.remove(); }, 4000); + } + + // ----------------------------------------------------------------------- + // Refetching when the commit window changes + // ----------------------------------------------------------------------- + function refetchAll(urlState) { + var cards = document.querySelectorAll(".chart-card[data-chart-slug]"); + if (!cards.length) return Promise.resolve(); + var n = urlState.n || ""; + var qs = n ? "?n=" + encodeURIComponent(n) : ""; + + var jobs = []; + cards.forEach(function (card) { + var slug = card.getAttribute("data-chart-slug"); + var canvas = card.querySelector("canvas"); + if (!slug || !canvas) return; + var prevPayload = canvas.__bench_payload; + setCardLoading(card, true); + var p = fetch("/api/chart/" + encodeURIComponent(slug) + qs, { + headers: { "accept": "application/json" }, + }) + .then(function (r) { + if (!r.ok) throw new Error("HTTP " + r.status); + return r.json(); + }) + .then(function (payload) { + canvas.__bench_payload = payload; + if (canvas.__bench_chart) { + rebuildChart(card, urlState); + } + // Else: chart not constructed yet; the IntersectionObserver path + // will read the new payload when the canvas eventually scrolls in. + }) + .catch(function (err) { + if (prevPayload) canvas.__bench_payload = prevPayload; + showCardError(card, "failed to load: " + (err && err.message ? err.message : err)); + }) + .then(function () { setCardLoading(card, false); }); + jobs.push(p); + }); + return Promise.all(jobs); + } + + // ----------------------------------------------------------------------- + // Toolbar wiring + // ----------------------------------------------------------------------- + function updateToolbarActive(group, value) { + var attr = "data-" + group; + var btns = document.querySelectorAll(".toolbar-btn[" + attr + "]"); + btns.forEach(function (b) { + var match = b.getAttribute(attr) === value; + b.classList.toggle("toolbar-btn--active", match); + }); + } + + function updateSubtitle(urlState, defaultN) { + var sub = document.querySelector(".page-header .subtitle"); + if (!sub) return; + var base = sub.getAttribute("data-base") || sub.textContent.split(" · ")[0]; + sub.setAttribute("data-base", base); + var bits = [base]; + var n = urlState.n || String(defaultN || ""); + if (n === "all") bits.push("all commits"); + else if (n) bits.push("last " + n + " commits"); + if (urlState.y === "log") bits.push("log"); + if (urlState.mode === "rel") bits.push("rel"); + sub.textContent = bits.join(" · "); + } + + function updateSliderUi(value) { + var slider = document.getElementById("scope-slider"); + var label = document.getElementById("scope-slider-label"); + if (slider && /^\d+$/.test(value)) slider.value = value; + if (label) label.textContent = value; + } + + function applyScope(value, defaultN) { + var state = parseUrl(); + state.n = value; + applyUrlState(state); + updateToolbarActive("scope", value); + updateSliderUi(value); + updateSubtitle(state, defaultN); + rewriteCardLinks(); + refetchAll(state); + } + + function applyY(value, defaultN) { + var state = parseUrl(); + state.y = value; + applyUrlState(state); + updateToolbarActive("y", value); + updateSubtitle(state, defaultN); + rewriteCardLinks(); + document.querySelectorAll(".chart-card[data-chart-index]").forEach(function (card) { + rebuildChart(card, state); + }); + } + + function applyMode(value, defaultN) { + var state = parseUrl(); + state.mode = value; + applyUrlState(state); + updateToolbarActive("mode", value); + updateSubtitle(state, defaultN); + rewriteCardLinks(); + document.querySelectorAll(".chart-card[data-chart-index]").forEach(function (card) { + rebuildChart(card, state); + }); + } + + // The chart-card title links carry the toolbar state in their query string + // so a click out to a permalink preserves the current view. After every + // toolbar change we rewrite them. + function rewriteCardLinks() { + var p = new URLSearchParams(window.location.search); + var qs = p.toString(); + var suffix = qs ? "?" + qs : ""; + document.querySelectorAll(".chart-card-title a[data-permalink]").forEach(function (a) { + a.setAttribute("href", a.getAttribute("data-permalink") + suffix); + }); + } + + function initToolbar(defaultN) { + var toolbar = document.querySelector(".toolbar"); + if (!toolbar) return; + + toolbar.addEventListener("click", function (e) { + var btn = e.target.closest(".toolbar-btn"); + if (!btn || !toolbar.contains(btn)) return; + // Hijack the link; we update state in place. + e.preventDefault(); + if (btn.hasAttribute("data-scope")) { + applyScope(btn.getAttribute("data-scope"), defaultN); + } else if (btn.hasAttribute("data-y")) { + applyY(btn.getAttribute("data-y"), defaultN); + } else if (btn.hasAttribute("data-mode")) { + applyMode(btn.getAttribute("data-mode"), defaultN); + } + }); + + var slider = document.getElementById("scope-slider"); + var label = document.getElementById("scope-slider-label"); + if (slider) { + slider.addEventListener("input", function () { + if (label) label.textContent = slider.value; + }); + slider.addEventListener("change", function () { + applyScope(slider.value, defaultN); + }); + } + } + // ----------------------------------------------------------------------- // Page wiring // ----------------------------------------------------------------------- @@ -326,7 +542,7 @@ var io = new IntersectionObserver(function (entries) { entries.forEach(function (entry) { if (entry.isIntersecting) { - constructChart(entry.target, urlState); + constructChart(entry.target, parseUrl()); io.unobserve(entry.target); } }); @@ -344,22 +560,6 @@ } }); }); - - initSlider(); - } - - function initSlider() { - var slider = document.getElementById("scope-slider"); - var label = document.getElementById("scope-slider-label"); - if (!slider) return; - slider.addEventListener("input", function () { - if (label) label.textContent = slider.value; - }); - slider.addEventListener("change", function () { - var p = new URLSearchParams(window.location.search); - p.set("n", slider.value); - window.location.search = p.toString(); - }); } function initLandingFilter() { @@ -376,8 +576,11 @@ } function init() { + var main = document.querySelector("main"); + var defaultN = main && main.getAttribute("data-default-n"); initLandingFilter(); initCharts(); + initToolbar(defaultN); } if (document.readyState === "loading") { diff --git a/benchmarks-website/server/static/style.css b/benchmarks-website/server/static/style.css index bac0c505ab8..d96a62bc7d6 100644 --- a/benchmarks-website/server/static/style.css +++ b/benchmarks-website/server/static/style.css @@ -252,6 +252,31 @@ main > .chart-card:not(.chart-grid .chart-card) .chart-wrap { font-size: 0.875rem; } +/* Per-card loading + error indicators for in-place toolbar refetches. */ +.chart-loading, +.chart-error { + position: absolute; + top: 0.5rem; + right: 0.75rem; + font-size: 0.75rem; + padding: 0.15rem 0.5rem; + border-radius: 4px; + pointer-events: none; + z-index: 5; +} +.chart-loading { + background: var(--code-bg); + color: var(--muted); + border: 1px solid var(--border); +} +.chart-error { + background: var(--bad); + color: var(--accent-fg); + pointer-events: auto; + max-width: 18rem; + white-space: normal; +} + /* ---------------------------------------------------------------------- * Custom external tooltip * ---------------------------------------------------------------------- */ diff --git a/benchmarks-website/server/tests/snapshots/chart_page_query.snap b/benchmarks-website/server/tests/snapshots/chart_page_query.snap index 579d7067bef..ef9dea72bc4 100644 --- a/benchmarks-website/server/tests/snapshots/chart_page_query.snap +++ b/benchmarks-website/server/tests/snapshots/chart_page_query.snap @@ -2,4 +2,4 @@ source: benchmarks-website/server/tests/web_ui.rs expression: body --- -tpch sf=1 Q1 [nvme] — bench.vortex.dev

unit: ns · 2 series · 3 commits

+tpch sf=1 Q1 [nvme] — bench.vortex.dev

unit: ns · 2 series · 3 commits

diff --git a/benchmarks-website/server/tests/snapshots/group_page_query.snap b/benchmarks-website/server/tests/snapshots/group_page_query.snap index 045ceb8bc34..b5037adfe32 100644 --- a/benchmarks-website/server/tests/snapshots/group_page_query.snap +++ b/benchmarks-website/server/tests/snapshots/group_page_query.snap @@ -2,4 +2,4 @@ source: benchmarks-website/server/tests/web_ui.rs expression: body --- -tpch sf=1 [nvme] — bench.vortex.dev

1 chart

Q1

+tpch sf=1 [nvme] — bench.vortex.dev

1 chart

Q1

diff --git a/benchmarks-website/server/tests/snapshots/landing_page.snap b/benchmarks-website/server/tests/snapshots/landing_page.snap index 607d01be36d..27e2653f83d 100644 --- a/benchmarks-website/server/tests/snapshots/landing_page.snap +++ b/benchmarks-website/server/tests/snapshots/landing_page.snap @@ -2,4 +2,4 @@ source: benchmarks-website/server/tests/web_ui.rs expression: body --- -bench.vortex.dev

tpch sf=1 [nvme]

Compression

Compression Size

Random Access

cohere-large-10m / partitioned

+bench.vortex.dev

tpch sf=1 [nvme]

Q1

Compression

tpch-lineitem

Compression Size

tpch-lineitem

Random Access

taxi

cohere-large-10m / partitioned

threshold=0.75

diff --git a/benchmarks-website/server/tests/web_ui.rs b/benchmarks-website/server/tests/web_ui.rs index 201ef2f9008..8618cdf7bb3 100644 --- a/benchmarks-website/server/tests/web_ui.rs +++ b/benchmarks-website/server/tests/web_ui.rs @@ -199,7 +199,10 @@ async fn landing_page_snapshot() -> Result<()> { seed(&server).await?; let client = reqwest::Client::new(); - let resp = client.get(server.url("/")).send().await?; + // Pin ?n=100 so the snapshot doesn't change when the landing default + // (50) is tweaked. The `landing_page_default_window` test below covers + // the default explicitly. + let resp = client.get(server.url("/?n=100")).send().await?; assert_eq!(resp.status(), 200); let content_type = resp .headers() @@ -212,12 +215,52 @@ async fn landing_page_snapshot() -> Result<()> { ); let body = resp.text().await?; + // Phase 2: every chart is rendered inline on the landing page, so the + // page must contain at least one `` plus a matching JSON payload. + assert!( + body.contains("" + ); + assert!( + body.contains(r#"id="chart-data-0""#), + "landing page must inline at least one chart payload" + ); + assert!( + body.contains(r#"data-chart-slug="#), + "landing page chart cards must carry data-chart-slug for in-place refetch" + ); + insta_settings().bind(|| { insta::assert_snapshot!("landing_page", body); }); Ok(()) } +/// Without `?n=` the landing page defaults to last-50 commits (cheap by +/// default), distinct from the 100-commit default of `/chart` and `/group`. +#[tokio::test] +async fn landing_page_default_window() -> Result<()> { + let server = Server::start().await?; + seed(&server).await?; + + let client = reqwest::Client::new(); + let resp = client.get(server.url("/")).send().await?; + assert_eq!(resp.status(), 200); + let body = resp.text().await?; + + assert!( + body.contains("last 50 commits"), + "landing page subtitle should reflect the n=50 default" + ); + // The toolbar should highlight `50` (data-scope) as active. + assert!( + body.contains(r#"toolbar-btn--active" href="?n=50""#) + || body.contains(r#"toolbar-btn--active" href="?n=50&"#), + "landing toolbar should mark scope=50 active by default" + ); + Ok(()) +} + #[tokio::test] async fn chart_page_snapshot() -> Result<()> { let server = Server::start().await?; @@ -368,6 +411,104 @@ async fn group_api_returns_charts() -> Result<()> { Ok(()) } +/// `GET /api/chart/{slug}` returns the same JSON shape that the HTML pages +/// inline as ` -// paired with a via the index attribute. The -// chart-card carries `data-chart-slug` so the toolbar can refetch a single -// card from `/api/chart/{slug}?n=...` without a page reload. -// -// URL state (n, y, mode, hidden) is the source of truth and the URL stays in -// sync via `history.replaceState`. Toolbar clicks are handled in JS: -// - `n` → refetch every chart on the page, swap data, chart.update("none"). -// - `y` → swap `chart.options.scales.y` in place; no fetch. -// - `mode` → recompute datasets client-side; no fetch. -// - legend toggle → mirror into `?hidden=...` like before. +// Per-chart UX: +// - Each `.chart-card` carries `data-chart-slug`. The card *owns* its own +// toolbar (`.toolbar--card`) — there is no page-level toolbar. +// - Each chart fetches up to 1000 commits once. The toolbar's "Show" buttons +// and slider set `chart.options.scales.x.min/max` to reveal a window of +// that fetched slice; we never refetch on a scope change. +// - The slider is throttled to ~16ms (one frame at 60fps) per v2's +// `CONFIG.ZOOM_THROTTLE_DELAY` so dragging the slider feels continuous. +// - Mouse wheel pans horizontally (chartjs-plugin-zoom does not expose +// pan-on-wheel, so a manual `wheel` listener calls `chart.pan(...)`). +// - Drag-pan + drag-rectangle-zoom are wired through the plugin. +// - A custom inline plugin draws a vertical crosshair at the hovered +// commit; the external tooltip is offset and `pointer-events: none` +// to fix the flicker described in the per-chart UX rebuild brief. (function () { "use strict"; + // ----------------------------------------------------------------------- + // Constants — match v2 (`origin/ct/vfvb:benchmarks-website/config.js`). + // ----------------------------------------------------------------------- + var ZOOM_THROTTLE_MS = 16; // one frame at ~60fps for slider drag + var FETCH_N = 1000; // matches `PER_CHART_FETCH_N` server-side + var DEFAULT_VISIBLE = 100; // initial visible window (last 100 of fetched) + // ----------------------------------------------------------------------- // Palette + helpers // ----------------------------------------------------------------------- @@ -27,9 +37,7 @@ "#0891b2", "#ca8a04", "#db2777", "#65a30d", "#475569", ]; - function colorFor(i) { - return palette[i % palette.length]; - } + function colorFor(i) { return palette[i % palette.length]; } function shortSha(sha) { return typeof sha === "string" ? sha.slice(0, 7) : String(sha); @@ -37,7 +45,6 @@ function shortDate(ts) { if (typeof ts !== "string") return ""; - // commits.timestamp arrives as either ISO 8601 or DuckDB's `YYYY-MM-DD HH:MM:SS`. return ts.slice(0, 10); } @@ -58,7 +65,6 @@ function formatNumber(v, unit) { if (v === null || v === undefined || Number.isNaN(v)) return "—"; if (unit === "ns") { - // Pick a friendlier unit when the magnitude warrants it. var abs = Math.abs(v); if (abs >= 1e9) return (v / 1e9).toFixed(2) + " s"; if (abs >= 1e6) return (v / 1e6).toFixed(2) + " ms"; @@ -75,141 +81,91 @@ return v.toString(); } - // ----------------------------------------------------------------------- - // URL state - // ----------------------------------------------------------------------- - function parseUrl() { - var p = new URLSearchParams(window.location.search); - return { - n: p.get("n") || "", - y: p.get("y") === "log" ? "log" : "linear", - mode: p.get("mode") === "rel" ? "rel" : "abs", - hidden: parseHiddenParam(p.get("hidden")), + // Throttle to a max call rate; trailing call is preserved so the final + // slider position is honoured. (`requestAnimationFrame` is conceptually + // similar but we want a hard ceiling regardless of when the browser + // schedules a frame.) + function throttle(fn, ms) { + var lastRan = 0; + var pending = null; + var pendingArgs = null; + return function () { + var now = Date.now(); + pendingArgs = arguments; + if (now - lastRan >= ms) { + lastRan = now; + fn.apply(null, pendingArgs); + } else if (!pending) { + var wait = ms - (now - lastRan); + pending = setTimeout(function () { + lastRan = Date.now(); + pending = null; + fn.apply(null, pendingArgs); + }, wait); + } }; } - // `|` cannot appear in our series labels (which are - // "engine:format"-shaped today), unlike `,`/`/` which could plausibly - // sneak in via dataset variants. URLSearchParams handles `|` as-is. - var HIDDEN_DELIM = "|"; - - function parseHiddenParam(s) { - if (!s) return Object.create(null); - var out = Object.create(null); - s.split(HIDDEN_DELIM).forEach(function (k) { - if (k) out[k] = true; - }); - return out; - } - - function serializeHidden(set) { - var keys = Object.keys(set).filter(function (k) { return set[k]; }); - keys.sort(); - return keys.join(HIDDEN_DELIM); - } - - // Default value the server treats as "use the route's default scope". When - // the URL has no `n` we want to leave the param off so the server can - // re-pick its own default (50 on `/`, 100 on `/chart` and `/group`). - function applyUrlState(state) { - var p = new URLSearchParams(window.location.search); - if (state.n) p.set("n", state.n); else p.delete("n"); - if (state.y && state.y !== "linear") p.set("y", state.y); else p.delete("y"); - if (state.mode && state.mode !== "abs") p.set("mode", state.mode); else p.delete("mode"); - var h = serializeHidden(state.hidden || {}); - if (h) p.set("hidden", h); else p.delete("hidden"); - var qs = p.toString(); - var url = window.location.pathname + (qs ? "?" + qs : "") + window.location.hash; - window.history.replaceState(null, "", url); - } - - function rewriteHiddenInUrl(set) { - var state = parseUrl(); - state.hidden = set; - applyUrlState(state); - } - // ----------------------------------------------------------------------- - // Payload + dataset construction + // Crosshair plugin: draws a vertical line at the chart's active hover + // index. Using an inline plugin is cheaper than pulling in + // chartjs-plugin-crosshair, which is overkill for this one feature. // ----------------------------------------------------------------------- - function readInlinePayload(idx) { - var script = document.getElementById("chart-data-" + idx); - if (!script) return null; - try { - return JSON.parse(script.textContent); - } catch (e) { - return null; - } - } - - function buildDatasets(payload, urlState) { - var raw = payload.series || {}; - var names = Object.keys(raw).sort(); - var values = names.map(function (name) { - return Array.isArray(raw[name]) ? raw[name].slice() : []; - }); - - if (urlState.mode === "rel") { - values = values.map(function (arr) { - var baseline = null; - for (var i = 0; i < arr.length; i++) { - if (arr[i] !== null && arr[i] !== undefined && !Number.isNaN(arr[i])) { - baseline = arr[i]; - break; - } - } - if (!baseline) return arr.map(function () { return null; }); - return arr.map(function (v) { - if (v === null || v === undefined || Number.isNaN(v)) return null; - return (v / baseline) * 100; - }); - }); - } - - return names.map(function (name, i) { - return { - label: name, - data: values[i], - rawData: raw[name], - borderColor: colorFor(i), - backgroundColor: colorFor(i), - spanGaps: true, - tension: 0.1, - pointRadius: 2, - pointHoverRadius: 5, - hidden: !!urlState.hidden[name], - }; - }); - } - - function yAxisTitle(payload, urlState) { - return urlState.mode === "rel" ? "% of baseline" : (payload.unit || ""); - } + var crosshairPlugin = { + id: "benchCrosshair", + afterDatasetsDraw: function (chart) { + var active = chart.tooltip && chart.tooltip.getActiveElements + ? chart.tooltip.getActiveElements() + : []; + if (!active || !active.length) return; + var x = active[0].element.x; + var ya = chart.scales && chart.scales.y; + if (!ya || !Number.isFinite(x)) return; + var ctx = chart.ctx; + ctx.save(); + // `--muted` from the page theme — read it lazily so dark mode picks + // up the right colour. + var muted = getComputedStyle(document.documentElement) + .getPropertyValue("--muted").trim() || "#9ca3af"; + ctx.strokeStyle = muted; + ctx.lineWidth = 1; + ctx.setLineDash([4, 4]); + ctx.beginPath(); + ctx.moveTo(x, ya.top); + ctx.lineTo(x, ya.bottom); + ctx.stroke(); + ctx.restore(); + }, + }; // ----------------------------------------------------------------------- - // Tooltip + // External tooltip with offset + flip-on-overflow. + // + // Flicker fix: the tooltip host is **always** `pointer-events: none`. The + // previous implementation flipped it to `auto` when visible; the cursor + // would land on the tooltip, fire mouseout on the canvas, the tooltip + // would hide, the cursor would re-enter the canvas, and the cycle would + // repeat at event-loop frequency. The cost of `pointer-events: none` is + // that the github-link in the tooltip footer is no longer clickable, but + // the chart-card title already links to the permalink. // ----------------------------------------------------------------------- function externalTooltipHandler(canvas, host) { return function (context) { - var tooltipModel = context.tooltip; + var tt = context.tooltip; if (!host) return; - if (tooltipModel.opacity === 0) { + if (tt.opacity === 0) { host.style.opacity = "0"; - host.style.pointerEvents = "none"; return; } - // Always read the current payload from the canvas: a refetch may have - // replaced it under us since this handler was installed. var payload = canvas.__bench_payload || { commits: [], unit: "" }; - - var idx = tooltipModel.dataPoints && tooltipModel.dataPoints[0] - ? tooltipModel.dataPoints[0].dataIndex + var idx = tt.dataPoints && tt.dataPoints[0] + ? tt.dataPoints[0].dataIndex : -1; var commit = (payload.commits || [])[idx] || {}; var unit = payload.unit || ""; - var rows = (tooltipModel.dataPoints || []).map(function (dp) { + var rows = (tt.dataPoints || []).map(function (dp) { var ds = dp.dataset || {}; var raw = (ds.rawData || [])[idx]; var prevIdx = idx - 1; @@ -241,120 +197,322 @@ + ""; var msg = commit.message ? truncate(commit.message, 120) : ""; - var footerHtml = ""; - if (msg || commit.url) { - footerHtml = '" + : ""; host.innerHTML = titleHtml + '
' + rows + "
" + footerHtml; + // Position the tooltip relative to its container, offset 12px from + // the cursor. Flip horizontally if it would overflow. var canvasRect = context.chart.canvas.getBoundingClientRect(); var hostRect = host.parentNode.getBoundingClientRect(); - var x = canvasRect.left - hostRect.left + tooltipModel.caretX; - var y = canvasRect.top - hostRect.top + tooltipModel.caretY; + var x = canvasRect.left - hostRect.left + tt.caretX; + var y = canvasRect.top - hostRect.top + tt.caretY; host.style.opacity = "1"; - host.style.pointerEvents = "auto"; host.style.left = x + "px"; host.style.top = y + "px"; + // Measure after content swap so flipping is correct. + var ttWidth = host.offsetWidth || 0; + var containerWidth = host.parentNode.clientWidth || 0; + var flip = (x + ttWidth + 24) > containerWidth; + host.style.transform = flip + ? "translate(calc(-100% - 12px), 12px)" + : "translate(12px, 12px)"; }; } // ----------------------------------------------------------------------- - // Single-chart construction + in-place rebuild + // Payload + datasets // ----------------------------------------------------------------------- - function constructChart(card, urlState) { + function readInlinePayload(idx) { + var s = document.getElementById("chart-data-" + idx); + if (!s) return null; + try { return JSON.parse(s.textContent); } catch (e) { return null; } + } + + function buildDatasets(payload, mode) { + var raw = payload.series || {}; + var names = Object.keys(raw).sort(); + var values = names.map(function (name) { + return Array.isArray(raw[name]) ? raw[name].slice() : []; + }); + + if (mode === "rel") { + values = values.map(function (arr) { + var baseline = null; + for (var i = 0; i < arr.length; i++) { + if (arr[i] !== null && arr[i] !== undefined && !Number.isNaN(arr[i])) { + baseline = arr[i]; + break; + } + } + if (!baseline) return arr.map(function () { return null; }); + return arr.map(function (v) { + if (v === null || v === undefined || Number.isNaN(v)) return null; + return (v / baseline) * 100; + }); + }); + } + + return names.map(function (name, i) { + return { + label: name, + data: values[i], + rawData: raw[name], + borderColor: colorFor(i), + backgroundColor: colorFor(i), + spanGaps: true, + tension: 0.1, + pointRadius: 2, + pointHoverRadius: 5, + }; + }); + } + + function yAxisTitle(payload, mode) { + return mode === "rel" ? "% of baseline" : (payload.unit || ""); + } + + // ----------------------------------------------------------------------- + // Per-card construction. State lives on the canvas: + // canvas.__bench_chart — Chart.js instance + // canvas.__bench_payload — last-fetched ChartResponse + // canvas.__bench_state — { y, mode, scope } (per-chart toolbar state) + // ----------------------------------------------------------------------- + function constructChart(card) { var idx = card.getAttribute("data-chart-index"); var canvas = card.querySelector('canvas[data-chart-index="' + idx + '"]'); if (!canvas || typeof Chart === "undefined") return null; if (canvas.__bench_chart) return canvas.__bench_chart; - // Prefer a payload that arrived via fetch (refetch landed before the - // canvas scrolled into view); else fall back to the inline JSON. var payload = canvas.__bench_payload || readInlinePayload(idx); if (!payload) return null; canvas.__bench_payload = payload; + var state = canvas.__bench_state || { y: "linear", mode: "abs", scope: DEFAULT_VISIBLE }; + canvas.__bench_state = state; + var labels = (payload.commits || []).map(function (c) { return shortSha(c.sha); }); - var datasets = buildDatasets(payload, urlState); + var datasets = buildDatasets(payload, state.mode); var host = card.querySelector(".chart-tooltip-host"); - - // Mobile gets the legend above the chart so the chart doesn't get pushed - // off-screen by a tall legend on narrow viewports. + var range = visibleRange(labels.length, state.scope); var legendPosition = (window.matchMedia && window.matchMedia("(max-width: 768px)").matches) ? "top" : "bottom"; - var yTitle = yAxisTitle(payload, urlState); var chart = new Chart(canvas, { type: "line", data: { labels: labels, datasets: datasets }, + plugins: [crosshairPlugin], options: { responsive: true, maintainAspectRatio: false, animation: false, - interaction: { mode: "index", intersect: false }, + // Snap-to-x-index, no vertical-intersection requirement: a stable + // hover anywhere over the chart, with the crosshair plugin painting + // the column. Combined with `pointer-events: none` on the tooltip + // host, this is the flicker fix. + interaction: { mode: "index", intersect: false, axis: "x" }, scales: { y: { - type: urlState.y === "log" ? "logarithmic" : "linear", - beginAtZero: urlState.y !== "log" && urlState.mode !== "rel", - title: { display: !!yTitle, text: yTitle }, + type: state.y === "log" ? "logarithmic" : "linear", + beginAtZero: state.y !== "log" && state.mode !== "rel", + title: { display: true, text: yAxisTitle(payload, state.mode) }, + }, + x: { + min: range.min, + max: range.max, + title: { display: false }, }, - x: { title: { display: false } }, }, plugins: { - legend: { - position: legendPosition, - onClick: function (e, item, legend) { - // Default toggle behaviour, then mirror into URL. - var ci = legend.chart; - var meta = ci.getDatasetMeta(item.datasetIndex); - meta.hidden = meta.hidden === null ? !ci.data.datasets[item.datasetIndex].hidden : null; - ci.update(); - var hiddenSet = parseHiddenParam(new URLSearchParams(window.location.search).get("hidden")); - var label = item.text; - if (meta.hidden) hiddenSet[label] = true; else delete hiddenSet[label]; - rewriteHiddenInUrl(hiddenSet); - }, - }, + legend: { position: legendPosition }, tooltip: { enabled: false, external: externalTooltipHandler(canvas, host), }, + // chartjs-plugin-zoom config — wheel-zoom is disabled because we + // want wheel-pan instead (handled by the canvas wheel listener + // below). Drag-pan and drag-rectangle-zoom are free. + zoom: { + zoom: { + wheel: { enabled: false }, + drag: { + enabled: true, + backgroundColor: "rgba(37, 99, 235, 0.10)", + }, + mode: "x", + }, + pan: { + enabled: true, + mode: "x", + modifierKey: null, + }, + limits: { + x: { min: 0, max: Math.max(0, labels.length - 1), minRange: 4 }, + }, + }, }, }, }); + canvas.__bench_chart = chart; + attachWheelPan(canvas, chart); return chart; } - // Re-skin a chart from its current payload + url state. No fetch. - function rebuildChart(card, urlState) { - var idx = card.getAttribute("data-chart-index"); - var canvas = card.querySelector('canvas[data-chart-index="' + idx + '"]'); - if (!canvas) return; - var chart = canvas.__bench_chart; + // Wheel = horizontal pan. Chart.js zoom plugin doesn't support wheel-pan + // out of the box (wheel is always zoom in its config), so we attach a + // `wheel` listener that translates `deltaY`/`deltaX` into `chart.pan`. + function attachWheelPan(canvas, chart) { + if (canvas.__bench_wheel_attached) return; + canvas.__bench_wheel_attached = true; + canvas.addEventListener("wheel", function (e) { + // Treat horizontal-wheel-or-shift+wheel as horizontal pan; otherwise + // also pan on plain vertical wheel so trackpad scroll-up/down moves + // through commit history without needing modifier keys. + var dx = (Math.abs(e.deltaX) > Math.abs(e.deltaY)) ? e.deltaX : e.deltaY; + if (!dx) return; + e.preventDefault(); + // Pan negative deltaX → forward in time. Multiplier tuned for trackpad + // feel; fast wheels still travel quickly because we accumulate deltas + // through the plugin's pan handler. + chart.pan({ x: -dx * 0.5 }, undefined, "none"); + }, { passive: false }); + } + + // ----------------------------------------------------------------------- + // Recompute helpers driven by the per-chart toolbar. + // ----------------------------------------------------------------------- + function visibleRange(commitCount, scope) { + if (commitCount <= 0) return { min: undefined, max: undefined }; + var maxIdx = commitCount - 1; + if (scope === "all" || !Number.isFinite(scope) || scope <= 0 || scope >= commitCount) { + return { min: 0, max: maxIdx }; + } + return { min: Math.max(0, maxIdx - (scope - 1)), max: maxIdx }; + } + + function applyScope(card, scopeValue) { + var canvas = card.querySelector("canvas"); + var chart = canvas && canvas.__bench_chart; + if (!chart) return; + var commits = chart.data.labels.length; + var scope = scopeValue === "all" ? "all" : parseInt(scopeValue, 10); + canvas.__bench_state.scope = scope; + var range = visibleRange(commits, scope); + chart.options.scales.x.min = range.min; + chart.options.scales.x.max = range.max; + chart.update("none"); + syncToolbarUi(card, "scope", String(scopeValue)); + } + + function applyY(card, yValue) { + var canvas = card.querySelector("canvas"); + var chart = canvas && canvas.__bench_chart; + if (!chart) return; + canvas.__bench_state.y = yValue; + chart.options.scales.y.type = yValue === "log" ? "logarithmic" : "linear"; + chart.options.scales.y.beginAtZero = yValue !== "log" + && canvas.__bench_state.mode !== "rel"; + chart.update("none"); + syncToolbarUi(card, "y", yValue); + } + + function applyMode(card, modeValue) { + var canvas = card.querySelector("canvas"); + var chart = canvas && canvas.__bench_chart; + if (!chart) return; + canvas.__bench_state.mode = modeValue; var payload = canvas.__bench_payload; - if (!chart || !payload) return; - - chart.data.labels = (payload.commits || []).map(function (c) { return shortSha(c.sha); }); - chart.data.datasets = buildDatasets(payload, urlState); - chart.options.scales.y.type = urlState.y === "log" ? "logarithmic" : "linear"; - chart.options.scales.y.beginAtZero = urlState.y !== "log" && urlState.mode !== "rel"; - var t = yAxisTitle(payload, urlState); - chart.options.scales.y.title.display = !!t; - chart.options.scales.y.title.text = t; + chart.data.datasets = buildDatasets(payload, modeValue); + chart.options.scales.y.beginAtZero = canvas.__bench_state.y !== "log" + && modeValue !== "rel"; + chart.options.scales.y.title.text = yAxisTitle(payload, modeValue); chart.update("none"); + syncToolbarUi(card, "mode", modeValue); + } + + function syncToolbarUi(card, group, value) { + var attr = "data-" + group; + card.querySelectorAll(".toolbar-btn[" + attr + "]").forEach(function (b) { + b.classList.toggle("toolbar-btn--active", b.getAttribute(attr) === value); + }); + if (group === "scope") { + var slider = card.querySelector('[data-role="scope-slider"]'); + var label = card.querySelector('[data-role="scope-slider-label"]'); + if (label) label.textContent = value; + if (slider && /^\d+$/.test(value)) slider.value = value; + } + } + + function bindToolbar(card) { + var toolbar = card.querySelector(".toolbar--card"); + if (!toolbar || toolbar.__bench_bound) return; + toolbar.__bench_bound = true; + + toolbar.addEventListener("click", function (e) { + var btn = e.target.closest(".toolbar-btn"); + if (!btn || !toolbar.contains(btn)) return; + if (btn.hasAttribute("data-scope")) applyScope(card, btn.getAttribute("data-scope")); + else if (btn.hasAttribute("data-y")) applyY(card, btn.getAttribute("data-y")); + else if (btn.hasAttribute("data-mode")) applyMode(card, btn.getAttribute("data-mode")); + }); + + var slider = toolbar.querySelector('[data-role="scope-slider"]'); + var label = toolbar.querySelector('[data-role="scope-slider-label"]'); + if (slider) { + // `input` (continuous), throttled so dragging stays at ~60fps even on + // pages with dozens of charts. Last value still lands because + // `throttle` preserves the trailing call. + var throttled = throttle(function () { + if (label) label.textContent = slider.value; + applyScope(card, slider.value); + }, ZOOM_THROTTLE_MS); + slider.addEventListener("input", throttled); + } } // ----------------------------------------------------------------------- - // Loading + error overlays per card + // Lazy fetch on `
` toggle for closed-by-default groups. // ----------------------------------------------------------------------- - function setCardLoading(card, on) { + function fetchAndConstruct(card) { + var canvas = card.querySelector("canvas"); + if (!canvas) return Promise.resolve(); + if (canvas.__bench_chart) return Promise.resolve(); + if (canvas.__bench_payload) { + constructChart(card); + bindToolbar(card); + return Promise.resolve(); + } + var slug = card.getAttribute("data-chart-slug"); + if (!slug) return Promise.resolve(); + showCardLoading(card, true); + return fetch("/api/chart/" + encodeURIComponent(slug) + "?n=" + FETCH_N, { + headers: { "accept": "application/json" }, + }) + .then(function (r) { + if (r.status === 404) return null; // empty chart, leave the shell + if (!r.ok) throw new Error("HTTP " + r.status); + return r.json(); + }) + .then(function (payload) { + if (!payload) return; + canvas.__bench_payload = payload; + constructChart(card); + bindToolbar(card); + }) + .catch(function (err) { + showCardError(card, "failed to load: " + (err && err.message ? err.message : err)); + }) + .then(function () { showCardLoading(card, false); }); + } + + function showCardLoading(card, on) { var existing = card.querySelector(".chart-loading"); if (on) { if (existing) return; @@ -377,187 +535,47 @@ setTimeout(function () { if (el.parentNode) el.remove(); }, 4000); } - // ----------------------------------------------------------------------- - // Refetching when the commit window changes - // ----------------------------------------------------------------------- - function refetchAll(urlState) { - var cards = document.querySelectorAll(".chart-card[data-chart-slug]"); - if (!cards.length) return Promise.resolve(); - var n = urlState.n || ""; - var qs = n ? "?n=" + encodeURIComponent(n) : ""; - - var jobs = []; - cards.forEach(function (card) { - var slug = card.getAttribute("data-chart-slug"); - var canvas = card.querySelector("canvas"); - if (!slug || !canvas) return; - var prevPayload = canvas.__bench_payload; - setCardLoading(card, true); - var p = fetch("/api/chart/" + encodeURIComponent(slug) + qs, { - headers: { "accept": "application/json" }, - }) - .then(function (r) { - if (!r.ok) throw new Error("HTTP " + r.status); - return r.json(); - }) - .then(function (payload) { - canvas.__bench_payload = payload; - if (canvas.__bench_chart) { - rebuildChart(card, urlState); - } - // Else: chart not constructed yet; the IntersectionObserver path - // will read the new payload when the canvas eventually scrolls in. - }) - .catch(function (err) { - if (prevPayload) canvas.__bench_payload = prevPayload; - showCardError(card, "failed to load: " + (err && err.message ? err.message : err)); - }) - .then(function () { setCardLoading(card, false); }); - jobs.push(p); - }); - return Promise.all(jobs); - } - - // ----------------------------------------------------------------------- - // Toolbar wiring - // ----------------------------------------------------------------------- - function updateToolbarActive(group, value) { - var attr = "data-" + group; - var btns = document.querySelectorAll(".toolbar-btn[" + attr + "]"); - btns.forEach(function (b) { - var match = b.getAttribute(attr) === value; - b.classList.toggle("toolbar-btn--active", match); - }); - } - - function updateSubtitle(urlState, defaultN) { - var sub = document.querySelector(".page-header .subtitle"); - if (!sub) return; - var base = sub.getAttribute("data-base") || sub.textContent.split(" · ")[0]; - sub.setAttribute("data-base", base); - var bits = [base]; - var n = urlState.n || String(defaultN || ""); - if (n === "all") bits.push("all commits"); - else if (n) bits.push("last " + n + " commits"); - if (urlState.y === "log") bits.push("log"); - if (urlState.mode === "rel") bits.push("rel"); - sub.textContent = bits.join(" · "); - } - - function updateSliderUi(value) { - var slider = document.getElementById("scope-slider"); - var label = document.getElementById("scope-slider-label"); - if (slider && /^\d+$/.test(value)) slider.value = value; - if (label) label.textContent = value; - } - - function applyScope(value, defaultN) { - var state = parseUrl(); - state.n = value; - applyUrlState(state); - updateToolbarActive("scope", value); - updateSliderUi(value); - updateSubtitle(state, defaultN); - rewriteCardLinks(); - refetchAll(state); - } - - function applyY(value, defaultN) { - var state = parseUrl(); - state.y = value; - applyUrlState(state); - updateToolbarActive("y", value); - updateSubtitle(state, defaultN); - rewriteCardLinks(); - document.querySelectorAll(".chart-card[data-chart-index]").forEach(function (card) { - rebuildChart(card, state); - }); - } - - function applyMode(value, defaultN) { - var state = parseUrl(); - state.mode = value; - applyUrlState(state); - updateToolbarActive("mode", value); - updateSubtitle(state, defaultN); - rewriteCardLinks(); - document.querySelectorAll(".chart-card[data-chart-index]").forEach(function (card) { - rebuildChart(card, state); - }); - } - - // The chart-card title links carry the toolbar state in their query string - // so a click out to a permalink preserves the current view. After every - // toolbar change we rewrite them. - function rewriteCardLinks() { - var p = new URLSearchParams(window.location.search); - var qs = p.toString(); - var suffix = qs ? "?" + qs : ""; - document.querySelectorAll(".chart-card-title a[data-permalink]").forEach(function (a) { - a.setAttribute("href", a.getAttribute("data-permalink") + suffix); - }); - } - - function initToolbar(defaultN) { - var toolbar = document.querySelector(".toolbar"); - if (!toolbar) return; - - toolbar.addEventListener("click", function (e) { - var btn = e.target.closest(".toolbar-btn"); - if (!btn || !toolbar.contains(btn)) return; - // Hijack the link; we update state in place. - e.preventDefault(); - if (btn.hasAttribute("data-scope")) { - applyScope(btn.getAttribute("data-scope"), defaultN); - } else if (btn.hasAttribute("data-y")) { - applyY(btn.getAttribute("data-y"), defaultN); - } else if (btn.hasAttribute("data-mode")) { - applyMode(btn.getAttribute("data-mode"), defaultN); - } - }); - - var slider = document.getElementById("scope-slider"); - var label = document.getElementById("scope-slider-label"); - if (slider) { - slider.addEventListener("input", function () { - if (label) label.textContent = slider.value; - }); - slider.addEventListener("change", function () { - applyScope(slider.value, defaultN); - }); - } - } - // ----------------------------------------------------------------------- // Page wiring // ----------------------------------------------------------------------- - function initCharts() { - var urlState = parseUrl(); + function initOpenCharts() { + // Charts that arrive with inline JSON (` +tpch sf=1 Q1 [nvme] — bench.vortex.dev

unit: ns · 2 series · 3 commits

Show100
Y
Mode
diff --git a/benchmarks-website/server/tests/snapshots/group_page_query.snap b/benchmarks-website/server/tests/snapshots/group_page_query.snap index b5037adfe32..135e2603d94 100644 --- a/benchmarks-website/server/tests/snapshots/group_page_query.snap +++ b/benchmarks-website/server/tests/snapshots/group_page_query.snap @@ -2,4 +2,4 @@ source: benchmarks-website/server/tests/web_ui.rs expression: body --- -tpch sf=1 [nvme] — bench.vortex.dev

1 chart

Q1

+TPC-H (NVMe) (SF=1) — bench.vortex.dev

1 chart

Q1

Show100
Y
Mode
diff --git a/benchmarks-website/server/tests/snapshots/landing_page.snap b/benchmarks-website/server/tests/snapshots/landing_page.snap index 27e2653f83d..3446f2c0d84 100644 --- a/benchmarks-website/server/tests/snapshots/landing_page.snap +++ b/benchmarks-website/server/tests/snapshots/landing_page.snap @@ -2,4 +2,4 @@ source: benchmarks-website/server/tests/web_ui.rs expression: body --- -bench.vortex.dev

tpch sf=1 [nvme]

Q1

Compression

tpch-lineitem

Compression Size

tpch-lineitem

Random Access

taxi

cohere-large-10m / partitioned

threshold=0.75

+bench.vortex.dev
Random Access1

taxi

Show100
Y
Mode
Compression1

tpch-lineitem

Show100
Y
Mode
Compression Size1

tpch-lineitem

Show100
Y
Mode
TPC-H (NVMe) (SF=1)1

Q1

Show100
Y
Mode
cohere-large-10m / partitioned1

threshold=0.75

Show100
Y
Mode
diff --git a/benchmarks-website/server/tests/web_ui.rs b/benchmarks-website/server/tests/web_ui.rs index 8618cdf7bb3..45dc68e31d7 100644 --- a/benchmarks-website/server/tests/web_ui.rs +++ b/benchmarks-website/server/tests/web_ui.rs @@ -5,7 +5,7 @@ //! //! Builds a temp DuckDB via the same `/api/ingest` path real callers use, //! seeds it with a multi-commit fixture so chart series have more than one -//! point, then snapshots the rendered HTML for both routes plus a chart slug +//! point, then snapshots the rendered HTML for each route plus a chart slug //! round-trip. use std::net::SocketAddr; @@ -193,16 +193,54 @@ fn insta_settings() -> insta::Settings { s } +/// Lift a single chart slug from `/api/groups`, picking from a group whose +/// name matches `predicate`. Used by tests that need a real slug to drive +/// `/chart/{slug}` and `/api/chart/{slug}` round-trips. +async fn pick_chart_slug(server: &Server, predicate: impl Fn(&str) -> bool) -> Result { + let client = reqwest::Client::new(); + let groups: Value = client + .get(server.url("/api/groups")) + .send() + .await? + .json() + .await?; + groups["groups"] + .as_array() + .context("groups is array")? + .iter() + .find(|g| g["name"].as_str().is_some_and(|s| predicate(s))) + .and_then(|g| g["charts"].as_array()) + .and_then(|c| c.first()) + .and_then(|c| c["slug"].as_str()) + .map(str::to_string) + .context("matching chart slug") +} + +async fn pick_group_slug(server: &Server, predicate: impl Fn(&str) -> bool) -> Result { + let client = reqwest::Client::new(); + let groups: Value = client + .get(server.url("/api/groups")) + .send() + .await? + .json() + .await?; + groups["groups"] + .as_array() + .context("groups is array")? + .iter() + .find(|g| g["name"].as_str().is_some_and(|s| predicate(s))) + .and_then(|g| g["slug"].as_str()) + .map(str::to_string) + .context("matching group slug") +} + #[tokio::test] async fn landing_page_snapshot() -> Result<()> { let server = Server::start().await?; seed(&server).await?; let client = reqwest::Client::new(); - // Pin ?n=100 so the snapshot doesn't change when the landing default - // (50) is tweaked. The `landing_page_default_window` test below covers - // the default explicitly. - let resp = client.get(server.url("/?n=100")).send().await?; + let resp = client.get(server.url("/")).send().await?; assert_eq!(resp.status(), 200); let content_type = resp .headers() @@ -215,19 +253,18 @@ async fn landing_page_snapshot() -> Result<()> { ); let body = resp.text().await?; - // Phase 2: every chart is rendered inline on the landing page, so the - // page must contain at least one `` plus a matching JSON payload. + // Inline canvas + chart-data-0 from the open-by-default first group. assert!( body.contains("" ); assert!( body.contains(r#"id="chart-data-0""#), - "landing page must inline at least one chart payload" + "the open-by-default first group must inline its chart payload" ); assert!( body.contains(r#"data-chart-slug="#), - "landing page chart cards must carry data-chart-slug for in-place refetch" + "every chart card carries data-chart-slug for the lazy-fetch path" ); insta_settings().bind(|| { @@ -236,63 +273,121 @@ async fn landing_page_snapshot() -> Result<()> { Ok(()) } -/// Without `?n=` the landing page defaults to last-50 commits (cheap by -/// default), distinct from the 100-commit default of `/chart` and `/group`. +/// The first `
` group is rendered with the `open` attribute; every +/// other group lacks it, so the user sees only the first group's charts on +/// first paint. #[tokio::test] -async fn landing_page_default_window() -> Result<()> { +async fn details_first_group_open_others_closed() -> Result<()> { let server = Server::start().await?; seed(&server).await?; let client = reqwest::Client::new(); - let resp = client.get(server.url("/")).send().await?; - assert_eq!(resp.status(), 200); - let body = resp.text().await?; + let body = client.get(server.url("/")).send().await?.text().await?; + + let opens: Vec<_> = body + .match_indices("').map(|p| i + p).unwrap_or(i); + body[i..=tag_end].contains(" open") + }) + .collect(); + assert!(!opens.is_empty(), "landing page must render
"); + assert!(opens[0], "first group must be open"); + for (i, is_open) in opens.iter().enumerate().skip(1) { + assert!(!is_open, "group #{i} must be closed by default"); + } + Ok(()) +} + +/// Every `.chart-card` carries a `.toolbar.toolbar--card` so the user has +/// per-chart controls. There is no page-level toolbar. +#[tokio::test] +async fn chart_card_carries_per_chart_toolbar() -> Result<()> { + let server = Server::start().await?; + seed(&server).await?; + + let client = reqwest::Client::new(); + let body = client.get(server.url("/")).send().await?.text().await?; + + let card_count = body.matches(r#"
0, "landing page must render chart cards"); + assert_eq!( + toolbar_count, card_count, + "every chart-card must contain a toolbar--card ({card_count} cards / {toolbar_count} toolbars)" + ); + // Same invariant on /chart/{slug}. + let slug = pick_chart_slug(&server, |s| s.starts_with("TPC-H")).await?; + let body = client + .get(server.url(&format!("/chart/{slug}"))) + .send() + .await? + .text() + .await?; assert!( - body.contains("last 50 commits"), - "landing page subtitle should reflect the n=50 default" + body.contains(r#"class="toolbar toolbar--card""#), + "chart page must carry a per-chart toolbar" ); - // The toolbar should highlight `50` (data-scope) as active. + + // Same invariant on /group/{slug}. + let group_slug = pick_group_slug(&server, |s| s.starts_with("TPC-H")).await?; + let body = client + .get(server.url(&format!("/group/{group_slug}"))) + .send() + .await? + .text() + .await?; assert!( - body.contains(r#"toolbar-btn--active" href="?n=50""#) - || body.contains(r#"toolbar-btn--active" href="?n=50&"#), - "landing toolbar should mark scope=50 active by default" + body.contains(r#"class="toolbar toolbar--card""#), + "group page must carry per-chart toolbars" ); Ok(()) } +/// Landing-page `
` summaries appear in the canonical v2 order: the +/// fixture seeds Random Access, Compression, Compression Size, TPC-H, and a +/// vector-search group. The first three are in [`api::GROUP_ORDER`] in the +/// expected positions; TPC-H follows; the unknown vector-search group sorts +/// last (alphabetical fallback after the listed names). +#[tokio::test] +async fn landing_groups_render_in_v2_order() -> Result<()> { + let server = Server::start().await?; + seed(&server).await?; + + let client = reqwest::Client::new(); + let body = client.get(server.url("/")).send().await?.text().await?; + + // Extract group names in render order from the `data-group-name=` attrs. + let mut names = Vec::new(); + for window in body.split("data-group-name=\"").skip(1) { + if let Some(end) = window.find('"') { + names.push(window[..end].to_string()); + } + } + let expected = [ + "Random Access", + "Compression", + "Compression Size", + "TPC-H (NVMe) (SF=1)", + "cohere-large-10m / partitioned", + ]; + assert_eq!(names, expected, "v2 ordering"); + Ok(()) +} + #[tokio::test] async fn chart_page_snapshot() -> Result<()> { let server = Server::start().await?; seed(&server).await?; let client = reqwest::Client::new(); - // Pick the query_measurements chart: it has two series (engine:format - // combinations) so the snapshot exercises multi-series rendering. - let groups: Value = client - .get(server.url("/api/groups")) - .send() - .await? - .json() - .await?; - let slug = groups["groups"] - .as_array() - .context("groups is array")? - .iter() - .find(|g| { - g["name"] - .as_str() - .map(|s| s.starts_with("tpch")) - .unwrap_or(false) - }) - .and_then(|g| g["charts"].as_array()) - .and_then(|c| c.first()) - .and_then(|c| c["slug"].as_str()) - .context("tpch chart slug")? - .to_string(); + // The query_measurements chart has two series so the snapshot + // exercises multi-series rendering. + let slug = pick_chart_slug(&server, |s| s.starts_with("TPC-H")).await?; let resp = client - .get(server.url(&format!("/chart/{slug}?n=100"))) + .get(server.url(&format!("/chart/{slug}"))) .send() .await?; assert_eq!(resp.status(), 200); @@ -306,8 +401,12 @@ async fn chart_page_snapshot() -> Result<()> { "Chart.js must be referenced from the static asset route" ); assert!( - body.contains("class=\"toolbar\""), - "toolbar must be rendered on chart page" + body.contains(r#"
+tpch sf=1 Q1 [nvme] — bench.vortex.dev

unit: ns · 2 series · 3 commits

Show
Y
diff --git a/benchmarks-website/server/tests/snapshots/group_page_query.snap b/benchmarks-website/server/tests/snapshots/group_page_query.snap index 135e2603d94..438ac38ce2e 100644 --- a/benchmarks-website/server/tests/snapshots/group_page_query.snap +++ b/benchmarks-website/server/tests/snapshots/group_page_query.snap @@ -2,4 +2,4 @@ source: benchmarks-website/server/tests/web_ui.rs expression: body --- -TPC-H (NVMe) (SF=1) — bench.vortex.dev

1 chart

Q1

Show100
Y
Mode
+TPC-H (NVMe) (SF=1) — bench.vortex.dev

2 charts

Performance Summary

#1datafusion:vortex-file-compressed1.11x1.80 ms
#2duckdb:parquet1.60x900.00 us
Geomean of query time ratio to fastest (lower is better)

Q1

Show
Y

Q2

Show
Y
diff --git a/benchmarks-website/server/tests/snapshots/landing_page.snap b/benchmarks-website/server/tests/snapshots/landing_page.snap index 3446f2c0d84..787d0730739 100644 --- a/benchmarks-website/server/tests/snapshots/landing_page.snap +++ b/benchmarks-website/server/tests/snapshots/landing_page.snap @@ -2,4 +2,4 @@ source: benchmarks-website/server/tests/web_ui.rs expression: body --- -bench.vortex.dev
Random Access1

taxi

Show100
Y
Mode
Compression1

tpch-lineitem

Show100
Y
Mode
Compression Size1

tpch-lineitem

Show100
Y
Mode
TPC-H (NVMe) (SF=1)1

Q1

Show100
Y
Mode
cohere-large-10m / partitioned1

threshold=0.75

Show100
Y
Mode
+bench.vortex.dev
Random Access1 chart

Random Access Performance

#1vortex-file-compressed100.50 us1.00x
#2parquet201.00 us2.00x
Random access time | Ratio to fastest (lower is better)

taxi

Show
Y
Compression1 chart

Compression Throughput vs Parquet

writeWrite Speed (Compression)2.00x
scanScan Speed (Decompression)2.00x
Inverse geomean of Vortex/Parquet ratios (higher is better)

tpch-lineitem

Show
Y
Compression Size1 chart

Compression Size Summary

minMin Size Ratio0.50x
meanMean Size Ratio0.50x
maxMax Size Ratio0.50x
Geomean of Vortex/Parquet size ratios (lower is better)

tpch-lineitem

Show
Y
TPC-H (NVMe) (SF=1)2 charts

Performance Summary

#1datafusion:vortex-file-compressed1.11x1.80 ms
#2duckdb:parquet1.60x900.00 us
Geomean of query time ratio to fastest (lower is better)

Q1

Show
Y

Q2

Show
Y
cohere-large-10m / partitioned1 chart

threshold=0.75

Show
Y
diff --git a/benchmarks-website/server/tests/web_ui.rs b/benchmarks-website/server/tests/web_ui.rs index 8fe20bb9044..832fc9362a3 100644 --- a/benchmarks-website/server/tests/web_ui.rs +++ b/benchmarks-website/server/tests/web_ui.rs @@ -125,6 +125,18 @@ fn envelope_for(sha: &str, ts: &str, msg: &str, value_bias: i64) -> Value { "value_ns": 800_000 + value_bias, "all_runtimes_ns": [800_000 + value_bias] }, + { + "kind": "query_measurement", + "commit_sha": sha, + "dataset": "tpch", + "scale_factor": "1", + "query_idx": 2, + "storage": "nvme", + "engine": "datafusion", + "format": "vortex-file-compressed", + "value_ns": 600_000 + value_bias, + "all_runtimes_ns": [600_000 + value_bias] + }, { "kind": "compression_time", "commit_sha": sha, @@ -134,6 +146,33 @@ fn envelope_for(sha: &str, ts: &str, msg: &str, value_bias: i64) -> Value { "value_ns": 9_000 + value_bias, "all_runtimes_ns": [9_000 + value_bias] }, + { + "kind": "compression_time", + "commit_sha": sha, + "dataset": "tpch-lineitem", + "format": "vortex-file-compressed", + "op": "decode", + "value_ns": 5_000 + value_bias, + "all_runtimes_ns": [5_000 + value_bias] + }, + { + "kind": "compression_time", + "commit_sha": sha, + "dataset": "tpch-lineitem", + "format": "parquet", + "op": "encode", + "value_ns": 18_000 + (2 * value_bias), + "all_runtimes_ns": [18_000 + (2 * value_bias)] + }, + { + "kind": "compression_time", + "commit_sha": sha, + "dataset": "tpch-lineitem", + "format": "parquet", + "op": "decode", + "value_ns": 10_000 + (2 * value_bias), + "all_runtimes_ns": [10_000 + (2 * value_bias)] + }, { "kind": "compression_size", "commit_sha": sha, @@ -141,6 +180,13 @@ fn envelope_for(sha: &str, ts: &str, msg: &str, value_bias: i64) -> Value { "format": "vortex-file-compressed", "value_bytes": 4_000 + value_bias }, + { + "kind": "compression_size", + "commit_sha": sha, + "dataset": "tpch-lineitem", + "format": "parquet", + "value_bytes": 8_000 + (2 * value_bias) + }, { "kind": "random_access_time", "commit_sha": sha, @@ -149,6 +195,14 @@ fn envelope_for(sha: &str, ts: &str, msg: &str, value_bias: i64) -> Value { "value_ns": 500 + value_bias, "all_runtimes_ns": [500 + value_bias] }, + { + "kind": "random_access_time", + "commit_sha": sha, + "dataset": "taxi", + "format": "parquet", + "value_ns": 1_000 + (2 * value_bias), + "all_runtimes_ns": [1_000 + (2 * value_bias)] + }, { "kind": "vector_search_run", "commit_sha": sha, @@ -234,6 +288,23 @@ async fn pick_group_slug(server: &Server, predicate: impl Fn(&str) -> bool) -> R .context("matching group slug") } +fn group_by_name<'a>(groups: &'a Value, name: &str) -> Result<&'a Value> { + groups["groups"] + .as_array() + .context("groups is array")? + .iter() + .find(|g| g["name"].as_str() == Some(name)) + .with_context(|| format!("group {name:?} exists")) +} + +fn assert_close(actual: f64, expected: f64) { + let delta = (actual - expected).abs(); + assert!( + delta < 0.000_001, + "expected {actual} to be close to {expected}" + ); +} + #[tokio::test] async fn landing_page_snapshot() -> Result<()> { let server = Server::start().await?; @@ -266,6 +337,10 @@ async fn landing_page_snapshot() -> Result<()> { body.contains(r#"data-chart-slug="#), "every chart card carries data-chart-slug for the lazy-fetch path" ); + assert!( + !body.contains(r#"id="group-search""#), + "landing page should not render the old group search bar" + ); insta_settings().bind(|| { insta::assert_snapshot!("landing_page", body); @@ -273,7 +348,7 @@ async fn landing_page_snapshot() -> Result<()> { Ok(()) } -/// The first `
` group is rendered with the `open` attribute; every +/// The first group disclosure is rendered with the `open` attribute; every /// other group lacks it, so the user sees only the first group's charts on /// first paint. #[tokio::test] @@ -285,7 +360,7 @@ async fn details_first_group_open_others_closed() -> Result<()> { let body = client.get(server.url("/")).send().await?.text().await?; let opens: Vec<_> = body - .match_indices("').map(|p| i + p).unwrap_or(i); body[i..=tag_end].contains(" open") @@ -299,8 +374,54 @@ async fn details_first_group_open_others_closed() -> Result<()> { Ok(()) } -/// Every `.chart-card` carries a `.toolbar.toolbar--card` so the user has -/// per-chart controls. There is no page-level toolbar. +#[tokio::test] +async fn collapsed_groups_still_show_summaries() -> Result<()> { + let server = Server::start().await?; + seed(&server).await?; + + let client = reqwest::Client::new(); + let body = client.get(server.url("/")).send().await?.text().await?; + + let mut found_visible_summary = false; + for (group_start, _) in body.match_indices(r#"
') + .map(|p| details_start + p) + .context("details tag closes")?; + let is_open = body[details_start..=details_tag_end].contains(" open"); + if is_open { + continue; + } + + let summary_end = body[details_start..] + .find("
") + .map(|p| details_start + p) + .context("disclosure closes")?; + let chart_grid_start = body[summary_end..] + .find(r#"
"#) + .map(|p| summary_end + p) + .context("details contains chart grid")?; + let visible_region = &body[summary_end..chart_grid_start]; + if visible_region.contains(r#"class="benchmark-scores-summary""#) { + found_visible_summary = true; + break; + } + } + + assert!( + found_visible_summary, + "at least one closed group should render its score summary before the hidden chart grid" + ); + Ok(()) +} + +/// Every `.chart-card` carries a compact `.toolbar.toolbar--card` so the user +/// has per-chart controls. There is no page-level toolbar, no preset scope +/// button row, and no abs/rel mode toggle. #[tokio::test] async fn chart_card_carries_per_chart_toolbar() -> Result<()> { let server = Server::start().await?; @@ -316,6 +437,22 @@ async fn chart_card_carries_per_chart_toolbar() -> Result<()> { toolbar_count, card_count, "every chart-card must contain a toolbar--card ({card_count} cards / {toolbar_count} toolbars)" ); + assert!( + !body.contains(r#"data-mode="#), + "abs/rel mode buttons should not render" + ); + assert!( + !body.contains(r#"data-scope="#), + "preset scope buttons should not render; use the slider instead" + ); + assert!( + body.contains(r#"data-role="scope-slider""#), + "scope slider should remain available" + ); + assert!( + !body.contains(r#"scope-slider-label"#), + "scope value labels should not add repeated numbers to every card" + ); // Same invariant on /chart/{slug}. let slug = pick_chart_slug(&server, |s| s.starts_with("TPC-H")).await?; @@ -329,6 +466,10 @@ async fn chart_card_carries_per_chart_toolbar() -> Result<()> { body.contains(r#"class="toolbar toolbar--card""#), "chart page must carry a per-chart toolbar" ); + assert!(!body.contains(r#"data-mode="#)); + assert!(!body.contains(r#"data-scope="#)); + assert!(body.contains(r#"data-role="scope-slider""#)); + assert!(!body.contains(r#"scope-slider-label"#)); // Same invariant on /group/{slug}. let group_slug = pick_group_slug(&server, |s| s.starts_with("TPC-H")).await?; @@ -342,6 +483,10 @@ async fn chart_card_carries_per_chart_toolbar() -> Result<()> { body.contains(r#"class="toolbar toolbar--card""#), "group page must carry per-chart toolbars" ); + assert!(!body.contains(r#"data-mode="#)); + assert!(!body.contains(r#"data-scope="#)); + assert!(body.contains(r#"data-role="scope-slider""#)); + assert!(!body.contains(r#"scope-slider-label"#)); Ok(()) } @@ -397,11 +542,11 @@ async fn chart_page_snapshot() -> Result<()> { "chart payload must be embedded inline" ); assert!( - body.contains(r#" +tpch sf=1 Q1 [nvme] — bench.vortex.dev

unit: ns · 2 series · 3 commits

Show
Y
diff --git a/benchmarks-website/server/tests/snapshots/group_page_query.snap b/benchmarks-website/server/tests/snapshots/group_page_query.snap index 438ac38ce2e..3592a362b95 100644 --- a/benchmarks-website/server/tests/snapshots/group_page_query.snap +++ b/benchmarks-website/server/tests/snapshots/group_page_query.snap @@ -2,4 +2,4 @@ source: benchmarks-website/server/tests/web_ui.rs expression: body --- -TPC-H (NVMe) (SF=1) — bench.vortex.dev

2 charts

Performance Summary

#1datafusion:vortex-file-compressed1.11x1.80 ms
#2duckdb:parquet1.60x900.00 us
Geomean of query time ratio to fastest (lower is better)

Q1

Show
Y

Q2

Show
Y
+TPC-H (NVMe) (SF=1) — bench.vortex.dev

2 charts

Performance Summary

#1datafusion:vortex-file-compressed1.11x1.80 ms
#2duckdb:parquet1.60x900.00 us
Geomean of query time ratio to fastest (lower is better)

Q1

Show
Y

Q2

Show
Y
diff --git a/benchmarks-website/server/tests/snapshots/landing_page.snap b/benchmarks-website/server/tests/snapshots/landing_page.snap index 787d0730739..494a1d9daa8 100644 --- a/benchmarks-website/server/tests/snapshots/landing_page.snap +++ b/benchmarks-website/server/tests/snapshots/landing_page.snap @@ -2,4 +2,4 @@ source: benchmarks-website/server/tests/web_ui.rs expression: body --- -bench.vortex.dev
Random Access1 chart

Random Access Performance

#1vortex-file-compressed100.50 us1.00x
#2parquet201.00 us2.00x
Random access time | Ratio to fastest (lower is better)

taxi

Show
Y
Compression1 chart

Compression Throughput vs Parquet

writeWrite Speed (Compression)2.00x
scanScan Speed (Decompression)2.00x
Inverse geomean of Vortex/Parquet ratios (higher is better)

tpch-lineitem

Show
Y
Compression Size1 chart

Compression Size Summary

minMin Size Ratio0.50x
meanMean Size Ratio0.50x
maxMax Size Ratio0.50x
Geomean of Vortex/Parquet size ratios (lower is better)

tpch-lineitem

Show
Y
TPC-H (NVMe) (SF=1)2 charts

Performance Summary

#1datafusion:vortex-file-compressed1.11x1.80 ms
#2duckdb:parquet1.60x900.00 us
Geomean of query time ratio to fastest (lower is better)

Q1

Show
Y

Q2

Show
Y
cohere-large-10m / partitioned1 chart

threshold=0.75

Show
Y
+bench.vortex.dev
Random Access1 chart

Random Access Performance

#1vortex-file-compressed100.50 us1.00x
#2parquet201.00 us2.00x
Random access time | Ratio to fastest (lower is better)

taxi

Show
Y
Compression1 chart

Compression Throughput vs Parquet

Write Speed (Compression)2.00x
📤Scan Speed (Decompression)2.00x
Inverse geomean of Vortex/Parquet ratios (higher is better)

tpch-lineitem

Show
Y
Compression Size1 chart

Compression Size Summary

⬇️Min Size Ratio0.50x
📊Mean Size Ratio0.50x
⬆️Max Size Ratio0.50x
Geomean of Vortex/Parquet size ratios (lower is better)

tpch-lineitem

Show
Y
TPC-H (NVMe) (SF=1)2 charts

Performance Summary

#1datafusion:vortex-file-compressed1.11x1.80 ms
#2duckdb:parquet1.60x900.00 us
Geomean of query time ratio to fastest (lower is better)

Q1

Show
Y

Q2

Show
Y
cohere-large-10m / partitioned1 chart

threshold=0.75

Show
Y
diff --git a/benchmarks-website/server/tests/web_ui.rs b/benchmarks-website/server/tests/web_ui.rs index 832fc9362a3..ab3f752be59 100644 --- a/benchmarks-website/server/tests/web_ui.rs +++ b/benchmarks-website/server/tests/web_ui.rs @@ -341,6 +341,32 @@ async fn landing_page_snapshot() -> Result<()> { !body.contains(r#"id="group-search""#), "landing page should not render the old group search bar" ); + assert!( + body.contains(r#"class="sticky-header""#), + "landing page should render the v2-style top navbar" + ); + assert!( + body.contains(r#"data-action="expand-all""#) + && body.contains(r#"data-action="collapse-all""#), + "navbar should expose expand/collapse controls" + ); + assert!( + body.contains(r#"data-role="theme-toggle""#), + "navbar should expose a theme toggle" + ); + assert!( + body.contains(r#"class="btn-icon""#) + || body.contains(r#"class="btn-icon theme-icon theme-icon-light""#), + "navbar controls should render icons" + ); + assert!( + body.contains(r#"vortex_black_nobg.svg"#) && body.contains(r#"vortex_white_nobg.svg"#), + "navbar should render the Vortex logo assets" + ); + assert!( + body.contains("⚡") && body.contains("📤") && body.contains("⬇️") && body.contains("📊"), + "summaries should render the v2 summary icons" + ); insta_settings().bind(|| { insta::assert_snapshot!("landing_page", body); @@ -873,6 +899,8 @@ async fn static_assets_are_served() -> Result<()> { ), ("/static/chart-init.js", "application/javascript"), ("/static/style.css", "text/css"), + ("/vortex_black_nobg.svg", "image/svg+xml"), + ("/vortex_white_nobg.svg", "image/svg+xml"), ] { let resp = client.get(server.url(path)).send().await?; assert_eq!(resp.status(), 200, "GET {path} should be 200"); From 5c1ee8f5da02826a585ac2ea3121770c5965f6dc Mon Sep 17 00:00:00 2001 From: Claude Date: Wed, 29 Apr 2026 01:58:32 +0000 Subject: [PATCH 20/26] docs(benchmarks-website): refresh planning docs to match current code - AGENTS.md: bullet 4 now lists all four JSON routes (groups, chart, group, health), not just chart. - 02-contracts.md: Read API section adds /api/group/:slug and /health, drops the "two routes" framing. - 01-schema.md: relax commits.{message,author_name,author_email, committer_name,committer_email} to optional, matching schema.rs DDL. - README.md: remove "/health endpoint" from the not-yet-done list (it's implemented in api.rs and routed in app.rs), refresh the line range for the collect_group_charts N+1 reference. Signed-off-by: Claude --- benchmarks-website/planning/01-schema.md | 10 ++++---- benchmarks-website/planning/02-contracts.md | 27 ++++++++++++++++----- benchmarks-website/planning/AGENTS.md | 5 ++-- benchmarks-website/planning/README.md | 7 +++--- 4 files changed, 33 insertions(+), 16 deletions(-) diff --git a/benchmarks-website/planning/01-schema.md b/benchmarks-website/planning/01-schema.md index b74d0bb9a92..dfc6b05ba27 100644 --- a/benchmarks-website/planning/01-schema.md +++ b/benchmarks-website/planning/01-schema.md @@ -77,11 +77,11 @@ DuckDB types, indexes, and constraint syntax. |---|---|---|---| | `commit_sha` | string | yes (PK) | 40-hex lowercase | | `timestamp` | timestamptz | yes | | -| `message` | string | yes | first line only | -| `author_name` | string | yes | | -| `author_email` | string | yes | | -| `committer_name` | string | yes | | -| `committer_email` | string | yes | | +| `message` | string | optional | first line only | +| `author_name` | string | optional | | +| `author_email` | string | optional | | +| `committer_name` | string | optional | | +| `committer_email` | string | optional | | | `tree_sha` | string | yes | | | `url` | string | yes | | diff --git a/benchmarks-website/planning/02-contracts.md b/benchmarks-website/planning/02-contracts.md index 9aba31fb73b..5d995c7120d 100644 --- a/benchmarks-website/planning/02-contracts.md +++ b/benchmarks-website/planning/02-contracts.md @@ -170,10 +170,10 @@ slug format, change it without breaking the web-ui, or make it debuggable (e.g. `qm-tpch-q01-nvme-sf1`) - the only contract is "`/api/chart/:slug` accepts any slug `/api/groups` returned." -## Read API (alpha) +## Read API -Two routes - just enough to render one chart page. Field shapes are -not binding; refine during implementation. +Four JSON routes today. Field shapes are not binding; refine during +implementation. ### `GET /api/groups` @@ -182,7 +182,8 @@ just enough metadata to link to a chart. The server walks each fact table to produce the group keys defined in [`01-schema.md`](./01-schema.md#group--chart--series-fit). Every chart entry includes a `slug` that round-trips through -`/api/chart/:slug`. +`/api/chart/:slug`, and every group has its own `slug` that +round-trips through `/api/group/:slug`. ### `GET /api/chart/:slug` @@ -190,6 +191,20 @@ Returns the data for one chart: a `display_name`, a `unit`, an ordered `commits` list (sha + timestamp + first-line message + url), and a `series` map keyed by series name where each value is an array aligned to `commits` (with `null` for missing data points). +Accepts `?n=&y=&mode=&hidden=` to scope the commit window and +configure the rendered view. + +### `GET /api/group/:slug` + +Returns every chart in a group as a single batch payload, in render +order. Used by the `/group/{slug}` HTML page and (today) by the +landing page hydration path. Same query parameters as +`/api/chart/:slug`. + +### `GET /health` + +Returns `{ status, db_path, schema_version, latest_commit_timestamp, +row_counts }`. Cheap; suitable for load-balancer health checks. -Per-commit page, zoom/pan, range queries, and the rest of the read -API are deferred. See [`deferred.md`](./deferred.md). +Per-commit page, range queries, and the rest of the read API are +deferred. See [`deferred.md`](./deferred.md). diff --git a/benchmarks-website/planning/AGENTS.md b/benchmarks-website/planning/AGENTS.md index ba4d6cec6f7..7777804b08a 100644 --- a/benchmarks-website/planning/AGENTS.md +++ b/benchmarks-website/planning/AGENTS.md @@ -29,8 +29,9 @@ it as `vortex-bench-server` at `benchmarks-website/server/`. [`01-schema.md`](./01-schema.md). - One ingest endpoint: `POST /api/ingest`, gated by a static bearer token from the `INGEST_BEARER_TOKEN` env var. Wire shapes in [`02-contracts.md`](./02-contracts.md). -- Three HTML routes — `/`, `/chart/{slug}`, `/group/{slug}` — and one JSON route, - `GET /api/chart/{slug}`, all served from the same binary. +- Three HTML routes — `/`, `/chart/{slug}`, `/group/{slug}` — and four JSON routes — + `GET /api/groups`, `GET /api/chart/{slug}`, `GET /api/group/{slug}`, `GET /health` — all served + from the same binary. - `ChartKey` and `GroupKey` enums round-trip through URLs as `.` slugs. No DB lookup required to decode a URL. - Charts render inline on the landing page. Each `` is paired with a diff --git a/benchmarks-website/planning/README.md b/benchmarks-website/planning/README.md index 79333ff91eb..02d549b3f33 100644 --- a/benchmarks-website/planning/README.md +++ b/benchmarks-website/planning/README.md @@ -66,13 +66,14 @@ host, point `VORTEX_BENCH_DB` at it, and walk every group's charts in a browser. ### 5. Operational hygiene (not yet done) -- Health-check endpoint (`GET /health` returning 200). - Structured logging review (we already use `tracing`; verify fields are useful for prod debugging). - Rate limiting on `/api/ingest` — the bearer token is the only gate today. - TLS termination strategy: front with a load balancer / nginx / Caddy, or terminate in-process? Decide before DNS flip. -- DB schema-version tracking, so future migrations are coordinated rather than ad-hoc. +- DB schema-version tracking, so future migrations are coordinated rather than ad-hoc. The server + already exposes the constant via `/health`; what's missing is on-disk persistence and a check on + boot. - Backup story. Open question: is "copy the file" enough, or do we want a WAL-based / point-in-time approach? Investigate DuckDB options. @@ -122,7 +123,7 @@ These are user/owner decisions, not agent decisions. display-name map. Cosmetic but visible. - **Deferred UI follow-ups.** The user is handling these directly; agents should not pre-empt them: - - `collect_group_charts` N+1 refactor in `api.rs:583-613`. + - `collect_group_charts` N+1 refactor in `api.rs:1131-1162`. - Mobile legend resize handler. - Zoom-sync within a group. - LTTB downsampling for very long histories. From 3afa43a7e0f5f9b72a58172002e2931c9d9c9cf6 Mon Sep 17 00:00:00 2001 From: Claude Date: Wed, 29 Apr 2026 13:28:52 +0000 Subject: [PATCH 21/26] fix(benchmarks-website/migrate): recover 2-part legacy random-access as dataset=taxi MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The migrator's `bin_random_access` rejected every 2-part v2 name shape `random-access/-tokio-local-disk` as `Skip::UnsupportedShape`, even though `random-access-bench`'s `measurement_name` only emits the 2-part form for the legacy taxi run (no `AccessPattern`) and the live v3 emitter writes those measurements with `dataset="taxi"`. The historical 2-part records on S3 (every random-access timing emitted between 2025-04 and 2026-02 plus the post-2026-02 cached/footer duplicates) were therefore dropped, leaving only the 4-part `taxi/correlated` and `taxi/uniform` history in v3. Recover them under `dataset="taxi"` so the chart matches what the live v3 emitter produces. The reopen-mode `-footer` variant still falls through to `Skip::Deprecated` because its format string doesn't strip clean to a v3-allowlisted name; that mirrors how the live emitter doesn't distinguish reopen vs cached either. Also extend the verifier so future regressions are easy to spot: - `verify.rs` now diffs at the chart-name level (not just chart count) and routes documented intentional asymmetries — derived ratios, empty FAN_OUT_GROUPS placeholders, the `RANDOM ACCESS` placeholder, the recovered `TAXI` chart, the new `VORTEX COMPACT SIZE` chart, the fineweb group — to a separate "intentional" bucket so a real drop shows up as a fresh ✗ regression candidate. - `MigrationSummary` now carries a per-`Skip`-reason histogram and prints it in the run summary, so a regression that pushes records into the wrong bucket is visible at a glance. - The CLI's `verify` exit code reflects `report.is_clean()` (every asymmetry on the documented allowlist) instead of just group-level coverage. Test plan: `cargo test -p vortex-bench-migrate` (66 tests pass), full end-to-end migrate against the production v2 S3 dump, verify against a local v2 server seeded from the same dump (chart-name diff is clean, every asymmetry documented). Signed-off-by: Claude --- benchmarks-website/migrate/src/classifier.rs | 67 ++-- benchmarks-website/migrate/src/main.rs | 6 +- benchmarks-website/migrate/src/migrate.rs | 38 +- benchmarks-website/migrate/src/verify.rs | 349 ++++++++++++++++-- .../migrate/tests/classifier.rs | 81 +++- 5 files changed, 478 insertions(+), 63 deletions(-) diff --git a/benchmarks-website/migrate/src/classifier.rs b/benchmarks-website/migrate/src/classifier.rs index ebc14dea39b..0524069e5b9 100644 --- a/benchmarks-website/migrate/src/classifier.rs +++ b/benchmarks-website/migrate/src/classifier.rs @@ -539,12 +539,11 @@ pub fn classify_outcome(record: &V2Record) -> Outcome { let bin = match &cls.group { V2Group::RandomAccess => match bin_random_access(record) { Some(b) => Some(b), - // Legacy 2-part `random-access/-…` records carry - // no dataset and are intentionally dropped by - // `bin_random_access`. Route them to Skip so the - // `Outcome::Unknown` arm below — and the 5% - // uncategorized gate in `migrate::run` — don't trip on - // them. + // `bin_random_access` only returns None for malformed + // shapes (empty dataset/pattern segment, empty/`default` + // format). Route them to Skip so the `Outcome::Unknown` + // arm below — and the 5% uncategorized gate in + // `migrate::run` — don't trip on them. None => return Outcome::Skip(Skip::UnsupportedShape), }, V2Group::Compression => bin_compression_time(&cls, record), @@ -567,13 +566,22 @@ pub fn classify_outcome(record: &V2Record) -> Outcome { fn bin_random_access(record: &V2Record) -> Option { // Pull dataset and format from the raw, pre-rename v2 name so v3 - // stores meaningful values. Raw shape is - // `random-access///-tokio-local-disk` - // (4-part). 2-part legacy records (`random-access/-…`) - // carry no dataset and historically rendered as the placeholder - // string "RANDOM ACCESS"; drop them rather than emit a fake - // dataset. Deriving from the raw name (rather than `cls.chart`) - // also keeps this independent of v2's `normalizeChartName`. + // stores meaningful values. Two raw shapes are supported: + // + // - 4-part `random-access///-tokio-local-disk` + // - 2-part legacy `random-access/-tokio-local-disk` + // + // The 2-part shape is what `random-access-bench`'s `measurement_name` + // emits when called without an `AccessPattern`, and per its source + // comment that path is only taken for the legacy taxi run + // (`if dataset.name() == "taxi"` in `benchmarks/random-access-bench/ + // src/main.rs`). The live v3 emitter `random_access_record` writes + // `dataset="taxi"` for those same measurements, so the historical + // 2-part records are taxi too — assigning `dataset="taxi"` here + // recovers the time series instead of letting it disappear under + // v2's "RANDOM ACCESS" placeholder. Deriving from the raw name + // (rather than `cls.chart`) keeps this independent of v2's + // `normalizeChartName`. // // After stripping the `-tokio-local-disk` suffix, map the v2 // random-access ext label (`vortex`, from `Format::ext()`) to the @@ -584,19 +592,32 @@ fn bin_random_access(record: &V2Record) -> Option { // `vortex-compact`), but v2's random-access bench only emitted // `OnDiskVortex`, so mapping to `vortex-file-compressed` is // correct for all historical data. + // + // Records whose `` segment ends in `-footer` (the bench's + // reopen-mode variant, e.g. `parquet-tokio-local-disk-footer`) + // intentionally do not strip clean to a v3-allowlisted format; the + // outer `is_v3_dim` filter then routes them to `Skip::Deprecated`. + // The live v3 emitter doesn't distinguish reopen vs cached either + // (`random_access_record` uses `format.name()` for both), so + // dropping `-footer` here keeps migration consistent with what + // v3 ingests live. let parts: Vec<&str> = record.name.split('/').collect(); - if parts.len() != 4 { - return None; - } - if parts[1].is_empty() || parts[2].is_empty() { - return None; - } - let dataset = format!("{}/{}", parts[1], parts[2]).to_lowercase(); - let raw = parts[3]; - if raw.is_empty() || raw == "default" { + let (dataset, raw_format) = match parts.as_slice() { + [_, ds, pat, format] => { + if ds.is_empty() || pat.is_empty() { + return None; + } + (format!("{ds}/{pat}").to_lowercase(), *format) + } + [_, format] => ("taxi".to_string(), *format), + _ => return None, + }; + if raw_format.is_empty() || raw_format == "default" { return None; } - let stripped = raw.strip_suffix("-tokio-local-disk").unwrap_or(raw); + let stripped = raw_format + .strip_suffix("-tokio-local-disk") + .unwrap_or(raw_format); let format = match stripped { "vortex" => "vortex-file-compressed".to_string(), other => other.to_lowercase(), diff --git a/benchmarks-website/migrate/src/main.rs b/benchmarks-website/migrate/src/main.rs index 366834ed441..fa2858cd44f 100644 --- a/benchmarks-website/migrate/src/main.rs +++ b/benchmarks-website/migrate/src/main.rs @@ -105,7 +105,11 @@ fn run() -> Result<()> { Command::Verify { against, duckdb } => { let report = verify::run(&against, &duckdb)?; print!("{report}"); - if !report.v2_groups_covered() { + // Non-zero exit when any per-chart asymmetry isn't on the + // documented `INTENTIONAL_ONLY_IN_V2` / `INTENTIONAL_ONLY_IN_V3` + // allowlist. Group-level membership is part of `is_clean()` + // too, so this also catches an undocumented missing group. + if !report.is_clean() { std::process::exit(1); } Ok(()) diff --git a/benchmarks-website/migrate/src/migrate.rs b/benchmarks-website/migrate/src/migrate.rs index 93885e5d431..633230f788c 100644 --- a/benchmarks-website/migrate/src/migrate.rs +++ b/benchmarks-website/migrate/src/migrate.rs @@ -74,6 +74,11 @@ pub struct MigrationSummary { pub commit_warnings: u64, pub skipped_no_value: u64, pub skipped_intentional: u64, + /// Per-`Skip` reason counts. Lets future agents see at a glance + /// which categories the classifier is throwing away records under, + /// so a regression that pushes data into the wrong `Skip` variant + /// jumps out of the run summary. + pub skipped_by_reason: BTreeMap<&'static str, u64>, pub commits_inserted: u64, pub deduped: u64, /// Number of records dropped by dedup whose `value_ns` (or @@ -317,8 +322,12 @@ fn apply_v2_record( let bin = match classifier::classify_outcome(record) { classifier::Outcome::Bin(b) => b, - classifier::Outcome::Skip(_) => { + classifier::Outcome::Skip(reason) => { summary.skipped_intentional += 1; + *summary + .skipped_by_reason + .entry(skip_reason_name(reason)) + .or_insert(0) += 1; return; } classifier::Outcome::Unknown => { @@ -821,6 +830,23 @@ fn build_list_int64(values: Vec>) -> ListArray { ) } +/// Stable, machine-friendly name for each [`classifier::Skip`] variant. +/// +/// Used as a histogram bucket key in [`MigrationSummary::skipped_by_reason`]. +/// Names are kept short and stable so the summary text stays diffable +/// across runs. +fn skip_reason_name(s: classifier::Skip) -> &'static str { + match s { + classifier::Skip::DerivedRatio => "DerivedRatio", + classifier::Skip::Throughput => "Throughput", + classifier::Skip::SkippedSuite => "SkippedSuite", + classifier::Skip::UnsupportedShape => "UnsupportedShape", + classifier::Skip::NoValue => "NoValue", + classifier::Skip::Deprecated => "Deprecated", + classifier::Skip::HistoricalMemory => "HistoricalMemory", + } +} + /// Print the summary in a human-readable form. Returned by the CLI. impl std::fmt::Display for MigrationSummary { fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { @@ -851,6 +877,16 @@ impl std::fmt::Display for MigrationSummary { self.uncategorized, 100.0 * self.uncategorized_fraction() )?; + if !self.skipped_by_reason.is_empty() { + writeln!(f, "Skip histogram (by reason):")?; + // Sort largest first so a regression that shifts records + // into an unexpected bucket is easy to spot. + let mut by_reason: Vec<_> = self.skipped_by_reason.iter().collect(); + by_reason.sort_by(|a, b| b.1.cmp(a.1).then(a.0.cmp(b.0))); + for (reason, n) in by_reason { + writeln!(f, " {reason:>20} : {n}")?; + } + } if !self.uncategorized_prefixes.is_empty() { let mut top: Vec<_> = self.uncategorized_prefixes.iter().collect(); top.sort_by(|a, b| b.1.cmp(a.1)); diff --git a/benchmarks-website/migrate/src/verify.rs b/benchmarks-website/migrate/src/verify.rs index eb4caef6df7..c4cdea3d14d 100644 --- a/benchmarks-website/migrate/src/verify.rs +++ b/benchmarks-website/migrate/src/verify.rs @@ -9,6 +9,13 @@ //! stores raw and the chart query divides. Group/chart structural //! equivalence is enough to spot classifier regressions before //! cutover. +//! +//! The diff distinguishes documented intentional asymmetries (e.g. +//! ratio charts that v3 derives at read time, the legacy +//! `RANDOM ACCESS` placeholder) from regression candidates so a clean +//! run shows only known-good differences and a regression jumps out +//! immediately. See [`INTENTIONAL_ONLY_IN_V2`] and +//! [`INTENTIONAL_ONLY_IN_V3`] for the live list. use std::collections::BTreeMap; use std::collections::BTreeSet; @@ -21,25 +28,58 @@ use serde::Deserialize; use crate::classifier::QUERY_SUITES; +/// One row of [`VerifyReport::group_chart_diffs`]: per-group lists of +/// chart names that are missing on either side, split into intentional +/// asymmetries and regression candidates. +#[derive(Debug, Default, Clone)] +pub struct GroupChartDiff { + pub group: String, + pub v2_count: usize, + pub v3_count: usize, + pub missing_in_v3_intentional: Vec, + pub missing_in_v3_regression: Vec, + pub missing_in_v2_intentional: Vec, + pub missing_in_v2_regression: Vec, +} + +impl GroupChartDiff { + /// True if every chart-name asymmetry between v2 and v3 for this + /// group is documented as intentional. False means at least one + /// regression candidate is on the list. + pub fn is_clean(&self) -> bool { + self.missing_in_v3_regression.is_empty() && self.missing_in_v2_regression.is_empty() + } +} + /// Result of one `verify` run. #[derive(Debug, Default)] pub struct VerifyReport { pub matched_groups: Vec, + /// Groups that exist in v3 but not v2, where the asymmetry is NOT + /// on the documented allowlist — counts as a regression. pub only_in_v3: Vec, + /// Groups that exist in v2 but not v3, where the asymmetry is NOT + /// on the documented allowlist — counts as a regression. pub only_in_v2: Vec, - pub chart_diffs: Vec, -} - -#[derive(Debug, Clone)] -pub struct ChartDiff { - pub group: String, - pub v2_count: usize, - pub v3_count: usize, + /// Groups whose v2/v3 asymmetry is on the documented allowlist + /// (e.g. `Fineweb` in v3, an empty `TPC-H (NVMe) (SF=1000)` fan-out + /// in v2). Surfaced for the human reader; not a regression. + pub only_in_v3_intentional: Vec, + pub only_in_v2_intentional: Vec, + pub group_chart_diffs: Vec, } impl VerifyReport { - /// True if every v2 group is represented in v3. The CLI's exit - /// code reflects this. + /// True if every v2 group is represented in v3 *and* every per- + /// chart-name asymmetry is documented as intentional. The CLI's + /// exit code reflects this. + pub fn is_clean(&self) -> bool { + self.only_in_v2.is_empty() + && self.only_in_v3.is_empty() + && self.group_chart_diffs.iter().all(|d| d.is_clean()) + } + + /// Backwards-compatible: were all v2 groups covered by v3? pub fn v2_groups_covered(&self) -> bool { self.only_in_v2.is_empty() } @@ -54,18 +94,30 @@ impl std::fmt::Display for VerifyReport { if !self.only_in_v2.is_empty() { writeln!(f, "Groups only in v2 (regression candidates):")?; for g in &self.only_in_v2 { - writeln!(f, " - {g}")?; + writeln!(f, " ✗ {g}")?; + } + } + if !self.only_in_v2_intentional.is_empty() { + writeln!(f, "Groups only in v2 (documented intentional skip):")?; + for g in &self.only_in_v2_intentional { + writeln!(f, " · {g}")?; } } if !self.only_in_v3.is_empty() { - writeln!(f, "Groups only in v3:")?; + writeln!(f, "Groups only in v3 (regression candidates):")?; for g in &self.only_in_v3 { - writeln!(f, " + {g}")?; + writeln!(f, " ✗ {g}")?; + } + } + if !self.only_in_v3_intentional.is_empty() { + writeln!(f, "Groups only in v3 (documented intentional addition):")?; + for g in &self.only_in_v3_intentional { + writeln!(f, " · {g}")?; } } - if !self.chart_diffs.is_empty() { - writeln!(f, "Chart count diffs:")?; - for d in &self.chart_diffs { + if !self.group_chart_diffs.is_empty() { + writeln!(f, "Chart name diffs (per group):")?; + for d in &self.group_chart_diffs { writeln!( f, " {} : v2={} v3={} (delta={})", @@ -74,8 +126,28 @@ impl std::fmt::Display for VerifyReport { d.v3_count, d.v3_count as i64 - d.v2_count as i64, )?; + for c in &d.missing_in_v3_regression { + writeln!(f, " ✗ only in v2 (regression candidate): {c}")?; + } + for c in &d.missing_in_v2_regression { + writeln!(f, " ✗ only in v3 (regression candidate): {c}")?; + } + for c in &d.missing_in_v3_intentional { + writeln!(f, " · only in v2 (documented intentional skip): {c}")?; + } + for c in &d.missing_in_v2_intentional { + writeln!( + f, + " · only in v3 (documented intentional addition): {c}" + )?; + } } } + if self.is_clean() { + writeln!(f, "verify: clean (every asymmetry is documented).")?; + } else { + writeln!(f, "verify: regression candidates present (see ✗ above).")?; + } Ok(()) } } @@ -106,6 +178,76 @@ pub fn run(v2_server: &str, duckdb_path: &Path) -> Result { Ok(diff(&v2, &v3)) } +/// Charts present in v2 metadata but intentionally absent from v3. +/// +/// Each entry is a `(group, normalized_chart_name)` pair. The +/// normalization matches [`normalize_chart`] so the lookup uses the +/// same key shape the diff produces. Update this list whenever the +/// classifier deliberately drops a v2 chart pattern (e.g. a derived +/// ratio, a placeholder, a deprecated format) so a future regression +/// shows up as a fresh ✗ instead of getting lost in the noise. +const INTENTIONAL_ONLY_IN_V2: &[(&str, &str)] = &[ + // Compression ratios are derived at read time from compression_sizes + // (joined to itself); the migrator routes them to Skip::DerivedRatio. + ("Compression", "VORTEX:LANCE RATIO COMPRESS TIME"), + ("Compression", "VORTEX:LANCE RATIO DECOMPRESS TIME"), + ("Compression", "VORTEX:PARQUET ZSTD RATIO COMPRESS TIME"), + ("Compression", "VORTEX:PARQUET ZSTD RATIO DECOMPRESS TIME"), + // Compression-size ratios — same story. + ("Compression Size", "VORTEX:LANCE SIZE"), + ("Compression Size", "VORTEX:PARQUET ZSTD SIZE"), + ("Compression Size", "VORTEX:RAW SIZE"), + // The legacy 2-part `random-access/-tokio-local-disk` records + // render in v2 under a "RANDOM ACCESS" placeholder chart. The + // migrator recovers their *values* under `dataset="taxi"` (see + // `bin_random_access`) instead of carrying the placeholder name + // forward, so v3 has a "TAXI" chart and v2 has "RANDOM ACCESS". + // Both sides are documented intentional asymmetries. + ("Random Access", "RANDOM ACCESS"), +]; + +/// Charts emitted by the migrator that v2 intentionally doesn't render. +/// +/// Pair shape matches [`INTENTIONAL_ONLY_IN_V2`]. +const INTENTIONAL_ONLY_IN_V3: &[(&str, &str)] = &[ + // `vortex-compact` size rows come in via `migrate_file_sizes` (the + // file-sizes-*.json.gz path). v2 never rendered the format because + // its `getGroup` didn't recognize the `vortex-compact` suite. + ("Compression Size", "VORTEX COMPACT SIZE"), + // 2-part legacy random-access records (per the "RANDOM ACCESS" + // entry in INTENTIONAL_ONLY_IN_V2 above) are recovered in v3 as + // dataset="taxi". v2 never had a chart by that name in Random + // Access — its taxi dataset always rode the `taxi/correlated` + // and `taxi/uniform` 4-part patterns. + ("Random Access", "TAXI"), +]; + +/// Groups intentionally surfaced by v3 but skipped by v2's metadata. +/// +/// `fineweb` is on `V3_QUERY_SUITES` because the live CI workflow still +/// emits fineweb measurements; v2's `getGroup` marks the suite +/// `skip: true` so the v2 server never builds metadata for it. +const INTENTIONAL_ONLY_IN_V3_GROUPS: &[&str] = &["Fineweb"]; + +/// Groups intentionally listed by v2 metadata that v3 doesn't materialize. +/// +/// v2's `FAN_OUT_GROUPS` registers TPC-H and TPC-DS group names for +/// every `(storage, scale_factor)` pair the UI knows about, even when +/// no records exist (the chart list comes back empty). The migrator +/// only writes a group when matching rows exist, so empty fan-outs +/// don't appear in v3 — which is the intended behavior. +const INTENTIONAL_ONLY_IN_V2_GROUPS: &[&str] = &[ + "TPC-DS (NVMe) (SF=10)", + "TPC-H (NVMe) (SF=1000)", + "TPC-H (S3) (SF=1000)", +]; + +fn is_intentional(table: &[(&str, &str)], group: &str, chart: &str) -> bool { + table + .iter() + .any(|(g, c)| *g == group && normalize_chart(c) == chart) +} + fn collect_v3_groups(duckdb_path: &Path) -> Result>> { let conn = Connection::open(duckdb_path) .with_context(|| format!("opening DuckDB at {}", duckdb_path.display()))?; @@ -229,27 +371,76 @@ fn diff( let mut report = VerifyReport::default(); let v2_keys: BTreeSet<&String> = v2.keys().collect(); let v3_keys: BTreeSet<&String> = v3.keys().collect(); + + // Group-level membership. An entry that's "only in v2" but with + // zero charts (e.g. a pre-registered FAN_OUT_GROUPS placeholder) + // and that's on the documented allowlist isn't a regression. for g in v2_keys.intersection(&v3_keys) { report.matched_groups.push((**g).clone()); let v2_charts = &v2[*g]; let v3_charts = &v3[*g]; - if v2_charts.len() != v3_charts.len() { - report.chart_diffs.push(ChartDiff { - group: (**g).clone(), - v2_count: v2_charts.len(), - v3_count: v3_charts.len(), - }); + let only_v3 = v3_charts.difference(v2_charts).cloned().collect::>(); + let only_v2 = v2_charts.difference(v3_charts).cloned().collect::>(); + if only_v3.is_empty() && only_v2.is_empty() { + continue; + } + let mut row = GroupChartDiff { + group: (**g).clone(), + v2_count: v2_charts.len(), + v3_count: v3_charts.len(), + ..Default::default() + }; + for c in only_v2 { + if is_intentional(INTENTIONAL_ONLY_IN_V2, g, &c) { + row.missing_in_v3_intentional.push(c); + } else { + row.missing_in_v3_regression.push(c); + } } + for c in only_v3 { + if is_intentional(INTENTIONAL_ONLY_IN_V3, g, &c) { + row.missing_in_v2_intentional.push(c); + } else { + row.missing_in_v2_regression.push(c); + } + } + row.missing_in_v3_intentional.sort(); + row.missing_in_v3_regression.sort(); + row.missing_in_v2_intentional.sort(); + row.missing_in_v2_regression.sort(); + report.group_chart_diffs.push(row); } for g in v3_keys.difference(&v2_keys) { - report.only_in_v3.push((**g).clone()); + // Group exists only in v3. If documented (e.g. fineweb), shunt + // it to the intentional list; otherwise it's a regression + // candidate. + if INTENTIONAL_ONLY_IN_V3_GROUPS.contains(&g.as_str()) { + report.only_in_v3_intentional.push((**g).clone()); + } else { + report.only_in_v3.push((**g).clone()); + } } for g in v2_keys.difference(&v3_keys) { - report.only_in_v2.push((**g).clone()); + // Group exists only in v2. Documented empty fan-outs (the + // hard-coded TPC-H/TPC-DS slots in v2's `FAN_OUT_GROUPS`) don't + // count as regressions; surface them as intentional. + let charts = &v2[*g]; + let documented_empty = + INTENTIONAL_ONLY_IN_V2_GROUPS.contains(&g.as_str()) && charts.is_empty(); + if documented_empty { + report.only_in_v2_intentional.push((**g).clone()); + } else { + report.only_in_v2.push((**g).clone()); + } } report.matched_groups.sort(); report.only_in_v3.sort(); + report.only_in_v3_intentional.sort(); report.only_in_v2.sort(); + report.only_in_v2_intentional.sort(); + report + .group_chart_diffs + .sort_by(|a, b| a.group.cmp(&b.group)); report } @@ -324,6 +515,10 @@ fn normalize_chart(s: &str) -> String { mod tests { use super::*; + fn group(charts: &[&str]) -> BTreeSet { + charts.iter().map(|s| normalize_chart(s)).collect() + } + #[test] fn normalize_chart_canonicalizes() { assert_eq!(normalize_chart("taxi/take"), "TAXI/TAKE"); @@ -347,4 +542,110 @@ mod tests { "Clickbench" ); } + + #[test] + fn diff_clean_when_only_documented_asymmetries() { + // v2 has "RANDOM ACCESS" placeholder; v3 has "TAXI" recovered. + // Both are on the intentional allowlist. + let mut v2 = BTreeMap::new(); + v2.insert( + "Random Access".to_string(), + group(&["TAXI/CORRELATED", "TAXI/UNIFORM", "RANDOM ACCESS"]), + ); + let mut v3 = BTreeMap::new(); + v3.insert( + "Random Access".to_string(), + group(&["TAXI/CORRELATED", "TAXI/UNIFORM", "TAXI"]), + ); + let report = diff(&v2, &v3); + assert!(report.is_clean(), "expected clean, got: {report}"); + let row = &report.group_chart_diffs[0]; + assert_eq!(row.group, "Random Access"); + assert_eq!(row.missing_in_v3_intentional, vec!["RANDOM ACCESS"]); + assert_eq!(row.missing_in_v2_intentional, vec!["TAXI"]); + assert!(row.missing_in_v3_regression.is_empty()); + assert!(row.missing_in_v2_regression.is_empty()); + } + + #[test] + fn diff_flags_undocumented_only_in_v2_chart() { + let mut v2 = BTreeMap::new(); + v2.insert("Random Access".to_string(), group(&["NEW CHART NAME"])); + let mut v3 = BTreeMap::new(); + v3.insert("Random Access".to_string(), group(&[])); + let report = diff(&v2, &v3); + assert!(!report.is_clean(), "expected regression, got clean"); + let row = &report.group_chart_diffs[0]; + assert_eq!(row.missing_in_v3_regression, vec!["NEW CHART NAME"]); + } + + #[test] + fn diff_flags_undocumented_only_in_v3_chart() { + let mut v2 = BTreeMap::new(); + v2.insert("Random Access".to_string(), group(&[])); + let mut v3 = BTreeMap::new(); + v3.insert("Random Access".to_string(), group(&["MYSTERY CHART"])); + let report = diff(&v2, &v3); + assert!(!report.is_clean(), "expected regression, got clean"); + let row = &report.group_chart_diffs[0]; + assert_eq!(row.missing_in_v2_regression, vec!["MYSTERY CHART"]); + } + + #[test] + fn diff_documented_empty_fan_out_group_not_a_regression() { + // v2 metadata always lists `TPC-H (NVMe) (SF=1000)` with zero + // charts (the FAN_OUT_GROUPS hard-coding); v3 doesn't + // materialize empty groups. The verifier should accept this + // and route the asymmetry to the intentional list. + let mut v2 = BTreeMap::new(); + v2.insert("TPC-H (NVMe) (SF=1000)".to_string(), group(&[])); + v2.insert("Clickbench".to_string(), group(&["CLICKBENCH Q0"])); + let mut v3 = BTreeMap::new(); + v3.insert("Clickbench".to_string(), group(&["CLICKBENCH Q0"])); + let report = diff(&v2, &v3); + assert!( + report.is_clean(), + "documented empty fan-out should not be a regression: {report}" + ); + assert!(report.only_in_v2.is_empty()); + assert_eq!( + report.only_in_v2_intentional, + vec!["TPC-H (NVMe) (SF=1000)"] + ); + } + + #[test] + fn diff_undocumented_only_in_v2_group_is_a_regression() { + let mut v2 = BTreeMap::new(); + v2.insert("Brand New Group".to_string(), group(&["X"])); + let v3 = BTreeMap::new(); + let report = diff(&v2, &v3); + assert!(!report.is_clean()); + assert_eq!(report.only_in_v2, vec!["Brand New Group"]); + } + + #[test] + fn diff_documented_only_in_v3_group_not_a_regression() { + // `Fineweb` is on the v3 query-suite allowlist (CI still emits + // fineweb data); v2's `getGroup` skips fineweb so its metadata + // never lists the group. The verifier should accept this and + // surface the asymmetry as intentional. + let v2 = BTreeMap::new(); + let mut v3 = BTreeMap::new(); + v3.insert("Fineweb".to_string(), group(&["FINEWEB Q0"])); + let report = diff(&v2, &v3); + assert!(report.is_clean(), "documented v3-only group: {report}"); + assert!(report.only_in_v3.is_empty()); + assert_eq!(report.only_in_v3_intentional, vec!["Fineweb"]); + } + + #[test] + fn diff_undocumented_only_in_v3_group_is_a_regression() { + let v2 = BTreeMap::new(); + let mut v3 = BTreeMap::new(); + v3.insert("Mystery Group".to_string(), group(&["X"])); + let report = diff(&v2, &v3); + assert!(!report.is_clean()); + assert_eq!(report.only_in_v3, vec!["Mystery Group"]); + } } diff --git a/benchmarks-website/migrate/tests/classifier.rs b/benchmarks-website/migrate/tests/classifier.rs index 300c610bd24..71e97cb6c9a 100644 --- a/benchmarks-website/migrate/tests/classifier.rs +++ b/benchmarks-website/migrate/tests/classifier.rs @@ -260,7 +260,6 @@ fn compression_size_records(#[case] name: &str, #[case] expected: V3Bin) { #[case::ratio_size_vortex_raw("vortex:raw size/clickbench")] #[case::throughput("compress throughput/clickbench")] #[case::nonsense_prefix("not-a-known-bench/series")] -#[case::random_access_2_part_legacy("random-access/parquet-tokio-local-disk")] #[case::random_access_3_part("random-access/taxi/parquet-tokio-local-disk")] fn unmapped_records_yield_none(#[case] name: &str) { let r = record(name); @@ -271,20 +270,74 @@ fn unmapped_records_yield_none(#[case] name: &str) { ); } -#[test] -fn random_access_2_part_legacy_is_skip_not_unknown() { - // The 2-part legacy shape `random-access/-tokio-local-disk` - // carries no dataset, so `bin_random_access` returns None. That - // None must route through `Outcome::Skip` (an intentional drop), - // NOT `Outcome::Unknown`, otherwise these records count against - // the 5% uncategorized gate in `migrate::run`. Top-level - // `classify()` returns None for both Skip and Unknown, so this - // assertion has to go through `classify_outcome`. - let r = record("random-access/parquet-tokio-local-disk"); - let outcome = classify_outcome(&r); +#[rstest] +#[case::parquet_2_part( + "random-access/parquet-tokio-local-disk", + V3Bin::RandomAccess { + dataset: "taxi".into(), + format: "parquet".into(), + }, +)] +#[case::vortex_2_part( + "random-access/vortex-tokio-local-disk", + V3Bin::RandomAccess { + dataset: "taxi".into(), + format: "vortex-file-compressed".into(), + }, +)] +#[case::lance_2_part( + "random-access/lance-tokio-local-disk", + V3Bin::RandomAccess { + dataset: "taxi".into(), + format: "lance".into(), + }, +)] +fn random_access_2_part_legacy_recovered_as_taxi(#[case] name: &str, #[case] expected: V3Bin) { + // The 2-part shape `random-access/-tokio-local-disk` is + // emitted by `random-access-bench`'s legacy taxi run (no + // `AccessPattern`, see `measurement_name` in + // `benchmarks/random-access-bench/src/main.rs`). The live v3 + // emitter writes `dataset="taxi"` for those measurements, so the + // historical 2-part records on S3 must land in the same v3 + // chart instead of being dropped as `UnsupportedShape`. + let r = record(name); + assert_eq!( + classify(&r), + Some(expected), + "2-part legacy random-access must recover as dataset=taxi" + ); +} + +#[rstest] +#[case::parquet_footer("random-access/parquet-tokio-local-disk-footer")] +#[case::vortex_footer("random-access/vortex-tokio-local-disk-footer")] +#[case::lance_footer("random-access/lance-tokio-local-disk-footer")] +fn random_access_2_part_footer_is_deprecated(#[case] name: &str) { + // The reopen-mode `-footer` variant is a different access pattern + // (file is reopened per take). The live v3 emitter passes the + // bare `format.name()` for both reopen and cached, so it can't + // distinguish them on the wire. Keep migration consistent with + // that by routing `-footer` 2-part records to Skip::Deprecated + // (they don't strip clean to a v3-allowlisted format). + let r = record(name); + assert!( + matches!(classify_outcome(&r), Outcome::Skip(Skip::Deprecated)), + "2-part `-footer` random-access must be Skip::Deprecated" + ); +} + +#[rstest] +#[case::parquet_footer("random-access/taxi/correlated/parquet-tokio-local-disk-footer")] +#[case::vortex_footer("random-access/feature-vectors/uniform/vortex-tokio-local-disk-footer")] +#[case::lance_footer("random-access/nested-structs/correlated/lance-tokio-local-disk-footer")] +fn random_access_4_part_footer_is_deprecated(#[case] name: &str) { + // Same reasoning as 2-part `-footer`: the format string ends in + // `-tokio-local-disk-footer`, the strip_suffix doesn't match, and + // the unstripped value fails the V3_FORMATS allowlist. + let r = record(name); assert!( - matches!(outcome, Outcome::Skip(_)), - "2-part legacy random-access must Skip, not Unknown; got {outcome:?}" + matches!(classify_outcome(&r), Outcome::Skip(Skip::Deprecated)), + "4-part `-footer` random-access must be Skip::Deprecated" ); } From ce775663fcea099ddf06edce08cd26886505a9cf Mon Sep 17 00:00:00 2001 From: Connor Tsui Date: Wed, 29 Apr 2026 11:57:43 -0400 Subject: [PATCH 22/26] fix migration skips Signed-off-by: Connor Tsui --- benchmarks-website/migrate/src/classifier.rs | 67 ++-- benchmarks-website/migrate/src/main.rs | 6 +- benchmarks-website/migrate/src/migrate.rs | 38 +- benchmarks-website/migrate/src/verify.rs | 349 ++---------------- .../migrate/tests/classifier.rs | 81 +--- 5 files changed, 63 insertions(+), 478 deletions(-) diff --git a/benchmarks-website/migrate/src/classifier.rs b/benchmarks-website/migrate/src/classifier.rs index 0524069e5b9..ebc14dea39b 100644 --- a/benchmarks-website/migrate/src/classifier.rs +++ b/benchmarks-website/migrate/src/classifier.rs @@ -539,11 +539,12 @@ pub fn classify_outcome(record: &V2Record) -> Outcome { let bin = match &cls.group { V2Group::RandomAccess => match bin_random_access(record) { Some(b) => Some(b), - // `bin_random_access` only returns None for malformed - // shapes (empty dataset/pattern segment, empty/`default` - // format). Route them to Skip so the `Outcome::Unknown` - // arm below — and the 5% uncategorized gate in - // `migrate::run` — don't trip on them. + // Legacy 2-part `random-access/-…` records carry + // no dataset and are intentionally dropped by + // `bin_random_access`. Route them to Skip so the + // `Outcome::Unknown` arm below — and the 5% + // uncategorized gate in `migrate::run` — don't trip on + // them. None => return Outcome::Skip(Skip::UnsupportedShape), }, V2Group::Compression => bin_compression_time(&cls, record), @@ -566,22 +567,13 @@ pub fn classify_outcome(record: &V2Record) -> Outcome { fn bin_random_access(record: &V2Record) -> Option { // Pull dataset and format from the raw, pre-rename v2 name so v3 - // stores meaningful values. Two raw shapes are supported: - // - // - 4-part `random-access///-tokio-local-disk` - // - 2-part legacy `random-access/-tokio-local-disk` - // - // The 2-part shape is what `random-access-bench`'s `measurement_name` - // emits when called without an `AccessPattern`, and per its source - // comment that path is only taken for the legacy taxi run - // (`if dataset.name() == "taxi"` in `benchmarks/random-access-bench/ - // src/main.rs`). The live v3 emitter `random_access_record` writes - // `dataset="taxi"` for those same measurements, so the historical - // 2-part records are taxi too — assigning `dataset="taxi"` here - // recovers the time series instead of letting it disappear under - // v2's "RANDOM ACCESS" placeholder. Deriving from the raw name - // (rather than `cls.chart`) keeps this independent of v2's - // `normalizeChartName`. + // stores meaningful values. Raw shape is + // `random-access///-tokio-local-disk` + // (4-part). 2-part legacy records (`random-access/-…`) + // carry no dataset and historically rendered as the placeholder + // string "RANDOM ACCESS"; drop them rather than emit a fake + // dataset. Deriving from the raw name (rather than `cls.chart`) + // also keeps this independent of v2's `normalizeChartName`. // // After stripping the `-tokio-local-disk` suffix, map the v2 // random-access ext label (`vortex`, from `Format::ext()`) to the @@ -592,32 +584,19 @@ fn bin_random_access(record: &V2Record) -> Option { // `vortex-compact`), but v2's random-access bench only emitted // `OnDiskVortex`, so mapping to `vortex-file-compressed` is // correct for all historical data. - // - // Records whose `` segment ends in `-footer` (the bench's - // reopen-mode variant, e.g. `parquet-tokio-local-disk-footer`) - // intentionally do not strip clean to a v3-allowlisted format; the - // outer `is_v3_dim` filter then routes them to `Skip::Deprecated`. - // The live v3 emitter doesn't distinguish reopen vs cached either - // (`random_access_record` uses `format.name()` for both), so - // dropping `-footer` here keeps migration consistent with what - // v3 ingests live. let parts: Vec<&str> = record.name.split('/').collect(); - let (dataset, raw_format) = match parts.as_slice() { - [_, ds, pat, format] => { - if ds.is_empty() || pat.is_empty() { - return None; - } - (format!("{ds}/{pat}").to_lowercase(), *format) - } - [_, format] => ("taxi".to_string(), *format), - _ => return None, - }; - if raw_format.is_empty() || raw_format == "default" { + if parts.len() != 4 { + return None; + } + if parts[1].is_empty() || parts[2].is_empty() { + return None; + } + let dataset = format!("{}/{}", parts[1], parts[2]).to_lowercase(); + let raw = parts[3]; + if raw.is_empty() || raw == "default" { return None; } - let stripped = raw_format - .strip_suffix("-tokio-local-disk") - .unwrap_or(raw_format); + let stripped = raw.strip_suffix("-tokio-local-disk").unwrap_or(raw); let format = match stripped { "vortex" => "vortex-file-compressed".to_string(), other => other.to_lowercase(), diff --git a/benchmarks-website/migrate/src/main.rs b/benchmarks-website/migrate/src/main.rs index fa2858cd44f..366834ed441 100644 --- a/benchmarks-website/migrate/src/main.rs +++ b/benchmarks-website/migrate/src/main.rs @@ -105,11 +105,7 @@ fn run() -> Result<()> { Command::Verify { against, duckdb } => { let report = verify::run(&against, &duckdb)?; print!("{report}"); - // Non-zero exit when any per-chart asymmetry isn't on the - // documented `INTENTIONAL_ONLY_IN_V2` / `INTENTIONAL_ONLY_IN_V3` - // allowlist. Group-level membership is part of `is_clean()` - // too, so this also catches an undocumented missing group. - if !report.is_clean() { + if !report.v2_groups_covered() { std::process::exit(1); } Ok(()) diff --git a/benchmarks-website/migrate/src/migrate.rs b/benchmarks-website/migrate/src/migrate.rs index 633230f788c..93885e5d431 100644 --- a/benchmarks-website/migrate/src/migrate.rs +++ b/benchmarks-website/migrate/src/migrate.rs @@ -74,11 +74,6 @@ pub struct MigrationSummary { pub commit_warnings: u64, pub skipped_no_value: u64, pub skipped_intentional: u64, - /// Per-`Skip` reason counts. Lets future agents see at a glance - /// which categories the classifier is throwing away records under, - /// so a regression that pushes data into the wrong `Skip` variant - /// jumps out of the run summary. - pub skipped_by_reason: BTreeMap<&'static str, u64>, pub commits_inserted: u64, pub deduped: u64, /// Number of records dropped by dedup whose `value_ns` (or @@ -322,12 +317,8 @@ fn apply_v2_record( let bin = match classifier::classify_outcome(record) { classifier::Outcome::Bin(b) => b, - classifier::Outcome::Skip(reason) => { + classifier::Outcome::Skip(_) => { summary.skipped_intentional += 1; - *summary - .skipped_by_reason - .entry(skip_reason_name(reason)) - .or_insert(0) += 1; return; } classifier::Outcome::Unknown => { @@ -830,23 +821,6 @@ fn build_list_int64(values: Vec>) -> ListArray { ) } -/// Stable, machine-friendly name for each [`classifier::Skip`] variant. -/// -/// Used as a histogram bucket key in [`MigrationSummary::skipped_by_reason`]. -/// Names are kept short and stable so the summary text stays diffable -/// across runs. -fn skip_reason_name(s: classifier::Skip) -> &'static str { - match s { - classifier::Skip::DerivedRatio => "DerivedRatio", - classifier::Skip::Throughput => "Throughput", - classifier::Skip::SkippedSuite => "SkippedSuite", - classifier::Skip::UnsupportedShape => "UnsupportedShape", - classifier::Skip::NoValue => "NoValue", - classifier::Skip::Deprecated => "Deprecated", - classifier::Skip::HistoricalMemory => "HistoricalMemory", - } -} - /// Print the summary in a human-readable form. Returned by the CLI. impl std::fmt::Display for MigrationSummary { fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { @@ -877,16 +851,6 @@ impl std::fmt::Display for MigrationSummary { self.uncategorized, 100.0 * self.uncategorized_fraction() )?; - if !self.skipped_by_reason.is_empty() { - writeln!(f, "Skip histogram (by reason):")?; - // Sort largest first so a regression that shifts records - // into an unexpected bucket is easy to spot. - let mut by_reason: Vec<_> = self.skipped_by_reason.iter().collect(); - by_reason.sort_by(|a, b| b.1.cmp(a.1).then(a.0.cmp(b.0))); - for (reason, n) in by_reason { - writeln!(f, " {reason:>20} : {n}")?; - } - } if !self.uncategorized_prefixes.is_empty() { let mut top: Vec<_> = self.uncategorized_prefixes.iter().collect(); top.sort_by(|a, b| b.1.cmp(a.1)); diff --git a/benchmarks-website/migrate/src/verify.rs b/benchmarks-website/migrate/src/verify.rs index c4cdea3d14d..eb4caef6df7 100644 --- a/benchmarks-website/migrate/src/verify.rs +++ b/benchmarks-website/migrate/src/verify.rs @@ -9,13 +9,6 @@ //! stores raw and the chart query divides. Group/chart structural //! equivalence is enough to spot classifier regressions before //! cutover. -//! -//! The diff distinguishes documented intentional asymmetries (e.g. -//! ratio charts that v3 derives at read time, the legacy -//! `RANDOM ACCESS` placeholder) from regression candidates so a clean -//! run shows only known-good differences and a regression jumps out -//! immediately. See [`INTENTIONAL_ONLY_IN_V2`] and -//! [`INTENTIONAL_ONLY_IN_V3`] for the live list. use std::collections::BTreeMap; use std::collections::BTreeSet; @@ -28,58 +21,25 @@ use serde::Deserialize; use crate::classifier::QUERY_SUITES; -/// One row of [`VerifyReport::group_chart_diffs`]: per-group lists of -/// chart names that are missing on either side, split into intentional -/// asymmetries and regression candidates. -#[derive(Debug, Default, Clone)] -pub struct GroupChartDiff { - pub group: String, - pub v2_count: usize, - pub v3_count: usize, - pub missing_in_v3_intentional: Vec, - pub missing_in_v3_regression: Vec, - pub missing_in_v2_intentional: Vec, - pub missing_in_v2_regression: Vec, -} - -impl GroupChartDiff { - /// True if every chart-name asymmetry between v2 and v3 for this - /// group is documented as intentional. False means at least one - /// regression candidate is on the list. - pub fn is_clean(&self) -> bool { - self.missing_in_v3_regression.is_empty() && self.missing_in_v2_regression.is_empty() - } -} - /// Result of one `verify` run. #[derive(Debug, Default)] pub struct VerifyReport { pub matched_groups: Vec, - /// Groups that exist in v3 but not v2, where the asymmetry is NOT - /// on the documented allowlist — counts as a regression. pub only_in_v3: Vec, - /// Groups that exist in v2 but not v3, where the asymmetry is NOT - /// on the documented allowlist — counts as a regression. pub only_in_v2: Vec, - /// Groups whose v2/v3 asymmetry is on the documented allowlist - /// (e.g. `Fineweb` in v3, an empty `TPC-H (NVMe) (SF=1000)` fan-out - /// in v2). Surfaced for the human reader; not a regression. - pub only_in_v3_intentional: Vec, - pub only_in_v2_intentional: Vec, - pub group_chart_diffs: Vec, + pub chart_diffs: Vec, } -impl VerifyReport { - /// True if every v2 group is represented in v3 *and* every per- - /// chart-name asymmetry is documented as intentional. The CLI's - /// exit code reflects this. - pub fn is_clean(&self) -> bool { - self.only_in_v2.is_empty() - && self.only_in_v3.is_empty() - && self.group_chart_diffs.iter().all(|d| d.is_clean()) - } +#[derive(Debug, Clone)] +pub struct ChartDiff { + pub group: String, + pub v2_count: usize, + pub v3_count: usize, +} - /// Backwards-compatible: were all v2 groups covered by v3? +impl VerifyReport { + /// True if every v2 group is represented in v3. The CLI's exit + /// code reflects this. pub fn v2_groups_covered(&self) -> bool { self.only_in_v2.is_empty() } @@ -94,30 +54,18 @@ impl std::fmt::Display for VerifyReport { if !self.only_in_v2.is_empty() { writeln!(f, "Groups only in v2 (regression candidates):")?; for g in &self.only_in_v2 { - writeln!(f, " ✗ {g}")?; - } - } - if !self.only_in_v2_intentional.is_empty() { - writeln!(f, "Groups only in v2 (documented intentional skip):")?; - for g in &self.only_in_v2_intentional { - writeln!(f, " · {g}")?; + writeln!(f, " - {g}")?; } } if !self.only_in_v3.is_empty() { - writeln!(f, "Groups only in v3 (regression candidates):")?; + writeln!(f, "Groups only in v3:")?; for g in &self.only_in_v3 { - writeln!(f, " ✗ {g}")?; - } - } - if !self.only_in_v3_intentional.is_empty() { - writeln!(f, "Groups only in v3 (documented intentional addition):")?; - for g in &self.only_in_v3_intentional { - writeln!(f, " · {g}")?; + writeln!(f, " + {g}")?; } } - if !self.group_chart_diffs.is_empty() { - writeln!(f, "Chart name diffs (per group):")?; - for d in &self.group_chart_diffs { + if !self.chart_diffs.is_empty() { + writeln!(f, "Chart count diffs:")?; + for d in &self.chart_diffs { writeln!( f, " {} : v2={} v3={} (delta={})", @@ -126,28 +74,8 @@ impl std::fmt::Display for VerifyReport { d.v3_count, d.v3_count as i64 - d.v2_count as i64, )?; - for c in &d.missing_in_v3_regression { - writeln!(f, " ✗ only in v2 (regression candidate): {c}")?; - } - for c in &d.missing_in_v2_regression { - writeln!(f, " ✗ only in v3 (regression candidate): {c}")?; - } - for c in &d.missing_in_v3_intentional { - writeln!(f, " · only in v2 (documented intentional skip): {c}")?; - } - for c in &d.missing_in_v2_intentional { - writeln!( - f, - " · only in v3 (documented intentional addition): {c}" - )?; - } } } - if self.is_clean() { - writeln!(f, "verify: clean (every asymmetry is documented).")?; - } else { - writeln!(f, "verify: regression candidates present (see ✗ above).")?; - } Ok(()) } } @@ -178,76 +106,6 @@ pub fn run(v2_server: &str, duckdb_path: &Path) -> Result { Ok(diff(&v2, &v3)) } -/// Charts present in v2 metadata but intentionally absent from v3. -/// -/// Each entry is a `(group, normalized_chart_name)` pair. The -/// normalization matches [`normalize_chart`] so the lookup uses the -/// same key shape the diff produces. Update this list whenever the -/// classifier deliberately drops a v2 chart pattern (e.g. a derived -/// ratio, a placeholder, a deprecated format) so a future regression -/// shows up as a fresh ✗ instead of getting lost in the noise. -const INTENTIONAL_ONLY_IN_V2: &[(&str, &str)] = &[ - // Compression ratios are derived at read time from compression_sizes - // (joined to itself); the migrator routes them to Skip::DerivedRatio. - ("Compression", "VORTEX:LANCE RATIO COMPRESS TIME"), - ("Compression", "VORTEX:LANCE RATIO DECOMPRESS TIME"), - ("Compression", "VORTEX:PARQUET ZSTD RATIO COMPRESS TIME"), - ("Compression", "VORTEX:PARQUET ZSTD RATIO DECOMPRESS TIME"), - // Compression-size ratios — same story. - ("Compression Size", "VORTEX:LANCE SIZE"), - ("Compression Size", "VORTEX:PARQUET ZSTD SIZE"), - ("Compression Size", "VORTEX:RAW SIZE"), - // The legacy 2-part `random-access/-tokio-local-disk` records - // render in v2 under a "RANDOM ACCESS" placeholder chart. The - // migrator recovers their *values* under `dataset="taxi"` (see - // `bin_random_access`) instead of carrying the placeholder name - // forward, so v3 has a "TAXI" chart and v2 has "RANDOM ACCESS". - // Both sides are documented intentional asymmetries. - ("Random Access", "RANDOM ACCESS"), -]; - -/// Charts emitted by the migrator that v2 intentionally doesn't render. -/// -/// Pair shape matches [`INTENTIONAL_ONLY_IN_V2`]. -const INTENTIONAL_ONLY_IN_V3: &[(&str, &str)] = &[ - // `vortex-compact` size rows come in via `migrate_file_sizes` (the - // file-sizes-*.json.gz path). v2 never rendered the format because - // its `getGroup` didn't recognize the `vortex-compact` suite. - ("Compression Size", "VORTEX COMPACT SIZE"), - // 2-part legacy random-access records (per the "RANDOM ACCESS" - // entry in INTENTIONAL_ONLY_IN_V2 above) are recovered in v3 as - // dataset="taxi". v2 never had a chart by that name in Random - // Access — its taxi dataset always rode the `taxi/correlated` - // and `taxi/uniform` 4-part patterns. - ("Random Access", "TAXI"), -]; - -/// Groups intentionally surfaced by v3 but skipped by v2's metadata. -/// -/// `fineweb` is on `V3_QUERY_SUITES` because the live CI workflow still -/// emits fineweb measurements; v2's `getGroup` marks the suite -/// `skip: true` so the v2 server never builds metadata for it. -const INTENTIONAL_ONLY_IN_V3_GROUPS: &[&str] = &["Fineweb"]; - -/// Groups intentionally listed by v2 metadata that v3 doesn't materialize. -/// -/// v2's `FAN_OUT_GROUPS` registers TPC-H and TPC-DS group names for -/// every `(storage, scale_factor)` pair the UI knows about, even when -/// no records exist (the chart list comes back empty). The migrator -/// only writes a group when matching rows exist, so empty fan-outs -/// don't appear in v3 — which is the intended behavior. -const INTENTIONAL_ONLY_IN_V2_GROUPS: &[&str] = &[ - "TPC-DS (NVMe) (SF=10)", - "TPC-H (NVMe) (SF=1000)", - "TPC-H (S3) (SF=1000)", -]; - -fn is_intentional(table: &[(&str, &str)], group: &str, chart: &str) -> bool { - table - .iter() - .any(|(g, c)| *g == group && normalize_chart(c) == chart) -} - fn collect_v3_groups(duckdb_path: &Path) -> Result>> { let conn = Connection::open(duckdb_path) .with_context(|| format!("opening DuckDB at {}", duckdb_path.display()))?; @@ -371,76 +229,27 @@ fn diff( let mut report = VerifyReport::default(); let v2_keys: BTreeSet<&String> = v2.keys().collect(); let v3_keys: BTreeSet<&String> = v3.keys().collect(); - - // Group-level membership. An entry that's "only in v2" but with - // zero charts (e.g. a pre-registered FAN_OUT_GROUPS placeholder) - // and that's on the documented allowlist isn't a regression. for g in v2_keys.intersection(&v3_keys) { report.matched_groups.push((**g).clone()); let v2_charts = &v2[*g]; let v3_charts = &v3[*g]; - let only_v3 = v3_charts.difference(v2_charts).cloned().collect::>(); - let only_v2 = v2_charts.difference(v3_charts).cloned().collect::>(); - if only_v3.is_empty() && only_v2.is_empty() { - continue; - } - let mut row = GroupChartDiff { - group: (**g).clone(), - v2_count: v2_charts.len(), - v3_count: v3_charts.len(), - ..Default::default() - }; - for c in only_v2 { - if is_intentional(INTENTIONAL_ONLY_IN_V2, g, &c) { - row.missing_in_v3_intentional.push(c); - } else { - row.missing_in_v3_regression.push(c); - } + if v2_charts.len() != v3_charts.len() { + report.chart_diffs.push(ChartDiff { + group: (**g).clone(), + v2_count: v2_charts.len(), + v3_count: v3_charts.len(), + }); } - for c in only_v3 { - if is_intentional(INTENTIONAL_ONLY_IN_V3, g, &c) { - row.missing_in_v2_intentional.push(c); - } else { - row.missing_in_v2_regression.push(c); - } - } - row.missing_in_v3_intentional.sort(); - row.missing_in_v3_regression.sort(); - row.missing_in_v2_intentional.sort(); - row.missing_in_v2_regression.sort(); - report.group_chart_diffs.push(row); } for g in v3_keys.difference(&v2_keys) { - // Group exists only in v3. If documented (e.g. fineweb), shunt - // it to the intentional list; otherwise it's a regression - // candidate. - if INTENTIONAL_ONLY_IN_V3_GROUPS.contains(&g.as_str()) { - report.only_in_v3_intentional.push((**g).clone()); - } else { - report.only_in_v3.push((**g).clone()); - } + report.only_in_v3.push((**g).clone()); } for g in v2_keys.difference(&v3_keys) { - // Group exists only in v2. Documented empty fan-outs (the - // hard-coded TPC-H/TPC-DS slots in v2's `FAN_OUT_GROUPS`) don't - // count as regressions; surface them as intentional. - let charts = &v2[*g]; - let documented_empty = - INTENTIONAL_ONLY_IN_V2_GROUPS.contains(&g.as_str()) && charts.is_empty(); - if documented_empty { - report.only_in_v2_intentional.push((**g).clone()); - } else { - report.only_in_v2.push((**g).clone()); - } + report.only_in_v2.push((**g).clone()); } report.matched_groups.sort(); report.only_in_v3.sort(); - report.only_in_v3_intentional.sort(); report.only_in_v2.sort(); - report.only_in_v2_intentional.sort(); - report - .group_chart_diffs - .sort_by(|a, b| a.group.cmp(&b.group)); report } @@ -515,10 +324,6 @@ fn normalize_chart(s: &str) -> String { mod tests { use super::*; - fn group(charts: &[&str]) -> BTreeSet { - charts.iter().map(|s| normalize_chart(s)).collect() - } - #[test] fn normalize_chart_canonicalizes() { assert_eq!(normalize_chart("taxi/take"), "TAXI/TAKE"); @@ -542,110 +347,4 @@ mod tests { "Clickbench" ); } - - #[test] - fn diff_clean_when_only_documented_asymmetries() { - // v2 has "RANDOM ACCESS" placeholder; v3 has "TAXI" recovered. - // Both are on the intentional allowlist. - let mut v2 = BTreeMap::new(); - v2.insert( - "Random Access".to_string(), - group(&["TAXI/CORRELATED", "TAXI/UNIFORM", "RANDOM ACCESS"]), - ); - let mut v3 = BTreeMap::new(); - v3.insert( - "Random Access".to_string(), - group(&["TAXI/CORRELATED", "TAXI/UNIFORM", "TAXI"]), - ); - let report = diff(&v2, &v3); - assert!(report.is_clean(), "expected clean, got: {report}"); - let row = &report.group_chart_diffs[0]; - assert_eq!(row.group, "Random Access"); - assert_eq!(row.missing_in_v3_intentional, vec!["RANDOM ACCESS"]); - assert_eq!(row.missing_in_v2_intentional, vec!["TAXI"]); - assert!(row.missing_in_v3_regression.is_empty()); - assert!(row.missing_in_v2_regression.is_empty()); - } - - #[test] - fn diff_flags_undocumented_only_in_v2_chart() { - let mut v2 = BTreeMap::new(); - v2.insert("Random Access".to_string(), group(&["NEW CHART NAME"])); - let mut v3 = BTreeMap::new(); - v3.insert("Random Access".to_string(), group(&[])); - let report = diff(&v2, &v3); - assert!(!report.is_clean(), "expected regression, got clean"); - let row = &report.group_chart_diffs[0]; - assert_eq!(row.missing_in_v3_regression, vec!["NEW CHART NAME"]); - } - - #[test] - fn diff_flags_undocumented_only_in_v3_chart() { - let mut v2 = BTreeMap::new(); - v2.insert("Random Access".to_string(), group(&[])); - let mut v3 = BTreeMap::new(); - v3.insert("Random Access".to_string(), group(&["MYSTERY CHART"])); - let report = diff(&v2, &v3); - assert!(!report.is_clean(), "expected regression, got clean"); - let row = &report.group_chart_diffs[0]; - assert_eq!(row.missing_in_v2_regression, vec!["MYSTERY CHART"]); - } - - #[test] - fn diff_documented_empty_fan_out_group_not_a_regression() { - // v2 metadata always lists `TPC-H (NVMe) (SF=1000)` with zero - // charts (the FAN_OUT_GROUPS hard-coding); v3 doesn't - // materialize empty groups. The verifier should accept this - // and route the asymmetry to the intentional list. - let mut v2 = BTreeMap::new(); - v2.insert("TPC-H (NVMe) (SF=1000)".to_string(), group(&[])); - v2.insert("Clickbench".to_string(), group(&["CLICKBENCH Q0"])); - let mut v3 = BTreeMap::new(); - v3.insert("Clickbench".to_string(), group(&["CLICKBENCH Q0"])); - let report = diff(&v2, &v3); - assert!( - report.is_clean(), - "documented empty fan-out should not be a regression: {report}" - ); - assert!(report.only_in_v2.is_empty()); - assert_eq!( - report.only_in_v2_intentional, - vec!["TPC-H (NVMe) (SF=1000)"] - ); - } - - #[test] - fn diff_undocumented_only_in_v2_group_is_a_regression() { - let mut v2 = BTreeMap::new(); - v2.insert("Brand New Group".to_string(), group(&["X"])); - let v3 = BTreeMap::new(); - let report = diff(&v2, &v3); - assert!(!report.is_clean()); - assert_eq!(report.only_in_v2, vec!["Brand New Group"]); - } - - #[test] - fn diff_documented_only_in_v3_group_not_a_regression() { - // `Fineweb` is on the v3 query-suite allowlist (CI still emits - // fineweb data); v2's `getGroup` skips fineweb so its metadata - // never lists the group. The verifier should accept this and - // surface the asymmetry as intentional. - let v2 = BTreeMap::new(); - let mut v3 = BTreeMap::new(); - v3.insert("Fineweb".to_string(), group(&["FINEWEB Q0"])); - let report = diff(&v2, &v3); - assert!(report.is_clean(), "documented v3-only group: {report}"); - assert!(report.only_in_v3.is_empty()); - assert_eq!(report.only_in_v3_intentional, vec!["Fineweb"]); - } - - #[test] - fn diff_undocumented_only_in_v3_group_is_a_regression() { - let v2 = BTreeMap::new(); - let mut v3 = BTreeMap::new(); - v3.insert("Mystery Group".to_string(), group(&["X"])); - let report = diff(&v2, &v3); - assert!(!report.is_clean()); - assert_eq!(report.only_in_v3, vec!["Mystery Group"]); - } } diff --git a/benchmarks-website/migrate/tests/classifier.rs b/benchmarks-website/migrate/tests/classifier.rs index 71e97cb6c9a..300c610bd24 100644 --- a/benchmarks-website/migrate/tests/classifier.rs +++ b/benchmarks-website/migrate/tests/classifier.rs @@ -260,6 +260,7 @@ fn compression_size_records(#[case] name: &str, #[case] expected: V3Bin) { #[case::ratio_size_vortex_raw("vortex:raw size/clickbench")] #[case::throughput("compress throughput/clickbench")] #[case::nonsense_prefix("not-a-known-bench/series")] +#[case::random_access_2_part_legacy("random-access/parquet-tokio-local-disk")] #[case::random_access_3_part("random-access/taxi/parquet-tokio-local-disk")] fn unmapped_records_yield_none(#[case] name: &str) { let r = record(name); @@ -270,74 +271,20 @@ fn unmapped_records_yield_none(#[case] name: &str) { ); } -#[rstest] -#[case::parquet_2_part( - "random-access/parquet-tokio-local-disk", - V3Bin::RandomAccess { - dataset: "taxi".into(), - format: "parquet".into(), - }, -)] -#[case::vortex_2_part( - "random-access/vortex-tokio-local-disk", - V3Bin::RandomAccess { - dataset: "taxi".into(), - format: "vortex-file-compressed".into(), - }, -)] -#[case::lance_2_part( - "random-access/lance-tokio-local-disk", - V3Bin::RandomAccess { - dataset: "taxi".into(), - format: "lance".into(), - }, -)] -fn random_access_2_part_legacy_recovered_as_taxi(#[case] name: &str, #[case] expected: V3Bin) { - // The 2-part shape `random-access/-tokio-local-disk` is - // emitted by `random-access-bench`'s legacy taxi run (no - // `AccessPattern`, see `measurement_name` in - // `benchmarks/random-access-bench/src/main.rs`). The live v3 - // emitter writes `dataset="taxi"` for those measurements, so the - // historical 2-part records on S3 must land in the same v3 - // chart instead of being dropped as `UnsupportedShape`. - let r = record(name); - assert_eq!( - classify(&r), - Some(expected), - "2-part legacy random-access must recover as dataset=taxi" - ); -} - -#[rstest] -#[case::parquet_footer("random-access/parquet-tokio-local-disk-footer")] -#[case::vortex_footer("random-access/vortex-tokio-local-disk-footer")] -#[case::lance_footer("random-access/lance-tokio-local-disk-footer")] -fn random_access_2_part_footer_is_deprecated(#[case] name: &str) { - // The reopen-mode `-footer` variant is a different access pattern - // (file is reopened per take). The live v3 emitter passes the - // bare `format.name()` for both reopen and cached, so it can't - // distinguish them on the wire. Keep migration consistent with - // that by routing `-footer` 2-part records to Skip::Deprecated - // (they don't strip clean to a v3-allowlisted format). - let r = record(name); - assert!( - matches!(classify_outcome(&r), Outcome::Skip(Skip::Deprecated)), - "2-part `-footer` random-access must be Skip::Deprecated" - ); -} - -#[rstest] -#[case::parquet_footer("random-access/taxi/correlated/parquet-tokio-local-disk-footer")] -#[case::vortex_footer("random-access/feature-vectors/uniform/vortex-tokio-local-disk-footer")] -#[case::lance_footer("random-access/nested-structs/correlated/lance-tokio-local-disk-footer")] -fn random_access_4_part_footer_is_deprecated(#[case] name: &str) { - // Same reasoning as 2-part `-footer`: the format string ends in - // `-tokio-local-disk-footer`, the strip_suffix doesn't match, and - // the unstripped value fails the V3_FORMATS allowlist. - let r = record(name); +#[test] +fn random_access_2_part_legacy_is_skip_not_unknown() { + // The 2-part legacy shape `random-access/-tokio-local-disk` + // carries no dataset, so `bin_random_access` returns None. That + // None must route through `Outcome::Skip` (an intentional drop), + // NOT `Outcome::Unknown`, otherwise these records count against + // the 5% uncategorized gate in `migrate::run`. Top-level + // `classify()` returns None for both Skip and Unknown, so this + // assertion has to go through `classify_outcome`. + let r = record("random-access/parquet-tokio-local-disk"); + let outcome = classify_outcome(&r); assert!( - matches!(classify_outcome(&r), Outcome::Skip(Skip::Deprecated)), - "4-part `-footer` random-access must be Skip::Deprecated" + matches!(outcome, Outcome::Skip(_)), + "2-part legacy random-access must Skip, not Unknown; got {outcome:?}" ); } From ed968dc877f240eb338b561b076d07f822480d08 Mon Sep 17 00:00:00 2001 From: Claude Date: Wed, 29 Apr 2026 16:06:15 +0000 Subject: [PATCH 23/26] benchmarks-website: sort tooltip rows, hide URL, click point opens PR MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Three small UX fixes for the v3 inline charts: 1. Sort tooltip rows by current y-value descending via Chart.js `tooltip.itemSort` so they match the visual top-to-bottom stack of series at the hovered x. 2. Drop the bare `github.com/.../commit/...` URL from the tooltip footer. Show ` · ` instead. The full URL is no longer rendered as text but is still reachable via the click handler below. 3. Add `onClick` on each chart that picks the nearest x-index, parses `(#NNNN)` from the squash-merged commit message, and opens the corresponding `vortex-data/vortex` pull request in a new tab. Falls back to the commit URL when the regex doesn't match (which is only expected for non-squash merges). Pure JS change in `static/chart-init.js`; no Rust/API touched, so no fmt/clippy/public-api work was needed. Snapshot tests already pass — they assert the served HTML, which only references the JS file by URL. Signed-off-by: Claude --- .../server/static/chart-init.js | 60 +++++++++++++++---- 1 file changed, 50 insertions(+), 10 deletions(-) diff --git a/benchmarks-website/server/static/chart-init.js b/benchmarks-website/server/static/chart-init.js index b074a4bb0db..c8877c1dfda 100644 --- a/benchmarks-website/server/static/chart-init.js +++ b/benchmarks-website/server/static/chart-init.js @@ -52,6 +52,21 @@ return s.length > max ? s.slice(0, max - 1) + "…" : s; } + function firstLine(s) { + if (typeof s !== "string") return ""; + var nl = s.indexOf("\n"); + return nl >= 0 ? s.slice(0, nl) : s; + } + + // Vortex commits to `develop` are squash-merged from PRs; the squash subject + // ends with `(#NNNN)`. Returning just the number lets callers build either a + // PR or commit URL. + function parsePrNumber(message) { + if (typeof message !== "string") return null; + var m = message.match(/\(#(\d+)\)/); + return m ? m[1] : null; + } + function escapeHtml(s) { return String(s) .replace(/&/g, "&") @@ -144,9 +159,9 @@ // previous implementation flipped it to `auto` when visible; the cursor // would land on the tooltip, fire mouseout on the canvas, the tooltip // would hide, the cursor would re-enter the canvas, and the cycle would - // repeat at event-loop frequency. The cost of `pointer-events: none` is - // that the github-link in the tooltip footer is no longer clickable, but - // the chart-card title already links to the permalink. + // repeat at event-loop frequency. Clicks on a data point are handled by + // the chart's `onClick` (opens the PR or commit URL in a new tab), so the + // tooltip itself never needs to be interactive. // ----------------------------------------------------------------------- function externalTooltipHandler(canvas, host) { return function (context) { @@ -195,13 +210,16 @@ + escapeHtml(shortDate(commit.timestamp)) + "
"; - var msg = commit.message ? truncate(commit.message, 120) : ""; - var footerHtml = (msg || commit.url) - ? '" + // Show short SHA + first-line commit message, truncated. The full URL + // (or PR URL) is wired up via the chart's onClick handler, so we don't + // render it as text here. + var msg = truncate(firstLine(commit.message || ""), 80); + var footerLine = commit.sha + ? (msg ? escapeHtml(shortSha(commit.sha)) + " · " + escapeHtml(msg) + : escapeHtml(shortSha(commit.sha))) + : escapeHtml(msg); + var footerHtml = footerLine + ? '" : ""; host.innerHTML = titleHtml + '
' + rows + "
" + footerHtml; @@ -298,6 +316,21 @@ // the column. Combined with `pointer-events: none` on the tooltip // host, this is the flicker fix. interaction: { mode: "index", intersect: false, axis: "x" }, + onClick: function (event, _activeElements, chart) { + var points = chart.getElementsAtEventForMode( + event, "nearest", { intersect: false, axis: "x" }, true, + ); + if (!points.length) return; + var idx = points[0].index; + var commits = (canvas.__bench_payload || {}).commits || []; + var commit = commits[idx]; + if (!commit) return; + var pr = parsePrNumber(commit.message); + var url = pr + ? "https://github.com/vortex-data/vortex/pull/" + pr + : commit.url; + if (url) window.open(url, "_blank", "noopener"); + }, scales: { y: { type: state.y === "log" ? "logarithmic" : "linear", @@ -315,6 +348,13 @@ tooltip: { enabled: false, external: externalTooltipHandler(canvas, host), + // Order rows top-to-bottom by current y-value descending so the + // tooltip matches the visual stack of the lines at the hovered x. + itemSort: function (a, b) { + var av = a.parsed && Number.isFinite(a.parsed.y) ? a.parsed.y : -Infinity; + var bv = b.parsed && Number.isFinite(b.parsed.y) ? b.parsed.y : -Infinity; + return bv - av; + }, }, // chartjs-plugin-zoom config — wheel-zoom is disabled because we // want wheel-pan instead (handled by the canvas wheel listener From 1cd7166b0123d3c25e59943b1761f0f1d6232314 Mon Sep 17 00:00:00 2001 From: Claude Date: Wed, 29 Apr 2026 16:31:19 +0000 Subject: [PATCH 24/26] feat(benchmarks-website): per-chart range scrollbar strip Add a thin draggable strip below each chart's that mirrors the fetched commit history and highlights the currently visible window. The highlight can be panned by dragging its body or resized by dragging either edge handle; bare-track clicks recentre the window at the cursor. Wired bidirectionally with chartjs-plugin-zoom: drag-pan and drag-rect-zoom gestures refresh the strip via the plugin's onPan/onZoom/onPanComplete/onZoomComplete hooks, while toolbar slider changes and wheel-pan call canvas.__bench_strip_render directly. Strip drags clamp to the data range, mirror the resulting window size onto the toolbar slider, and trigger chart.update("none"). The strip is ~14px tall (18px on mobile), keeps pointer-events: auto (unlike the tooltip host), and lays out via percentages so it tracks the chart canvas width without extra wiring on resize. Local checks: - cargo test -p vortex-bench-server --test web_ui (15 passed; insta snapshots updated for landing/chart/group pages to include the strip markup; new chart_card_carries_per_chart_toolbar assertions cover range-strip / window / handles). - cargo +nightly fmt --all - cargo clippy -p vortex-bench-server --all-targets --all-features (clean). Browser smoke test: not run in this environment; the snapshot tests exercise the full SSR rendering path against a fixture-seeded DB and confirm one strip per chart-card on /, /chart/{slug}, and /group/{slug}. Signed-off-by: Claude --- benchmarks-website/server/src/html.rs | 27 ++- .../server/static/chart-init.js | 171 ++++++++++++++++++ benchmarks-website/server/static/style.css | 57 ++++++ .../tests/snapshots/chart_page_query.snap | 2 +- .../tests/snapshots/group_page_query.snap | 2 +- .../server/tests/snapshots/landing_page.snap | 2 +- benchmarks-website/server/tests/web_ui.rs | 12 ++ 7 files changed, 269 insertions(+), 4 deletions(-) diff --git a/benchmarks-website/server/src/html.rs b/benchmarks-website/server/src/html.rs index 46b6a7c006b..e638a4f9e9a 100644 --- a/benchmarks-website/server/src/html.rs +++ b/benchmarks-website/server/src/html.rs @@ -73,7 +73,7 @@ const CHART_INIT_JS: &[u8] = include_bytes!("../static/chart-init.js"); const STYLE_CSS: &[u8] = include_bytes!("../static/style.css"); const VORTEX_BLACK_SVG: &[u8] = include_bytes!("../../public/vortex_black_nobg.svg"); const VORTEX_WHITE_SVG: &[u8] = include_bytes!("../../public/vortex_white_nobg.svg"); -const STATIC_ASSET_VERSION: &str = "bench-v3-ui-7"; +const STATIC_ASSET_VERSION: &str = "bench-v3-ui-8"; /// HTML routes mounted under `/`. pub fn router() -> Router { @@ -469,6 +469,7 @@ fn chart_card(link: &api::ChartLink, idx: usize, inlined: Option<&NamedChartResp div.chart-wrap { canvas data-chart-index=(idx) {} } + (range_strip(idx)) @if let Some(item) = inlined { script id={ "chart-data-" (idx) } type="application/json" { (PreEscaped(escape_json_for_script( @@ -497,6 +498,7 @@ fn chart_body(chart: &ChartResponse, slug: &str, payload_json: &str) -> Markup { div.chart-wrap { canvas data-chart-index="0" {} } + (range_strip(0)) // Embedded JSON; rendered as text content so JSON `<` / `>` are HTML-escaped. script id="chart-data-0" type="application/json" { (PreEscaped(escape_json_for_script(payload_json))) @@ -527,6 +529,7 @@ fn group_body(group: &GroupChartsResponse) -> Markup { div.chart-wrap { canvas data-chart-index=(i) {} } + (range_strip(i)) script id={ "chart-data-" (i) } type="application/json" { (PreEscaped(escape_json_for_script( &serde_json::to_string(&item.chart) @@ -701,6 +704,28 @@ fn per_chart_toolbar(idx: usize) -> Markup { } } +/// Render the per-chart range scrollbar strip. A thin track that spans the +/// full chart width and shows which slice of the fetched commit history is +/// currently visible. `chart-init.js` hydrates the strip on chart construction +/// and wires bidirectional drag/resize to the chart's pan/zoom state. +fn range_strip(idx: usize) -> Markup { + html! { + div.chart-range-strip data-chart-index=(idx) + data-role="range-strip" + aria-label="Visible commit range" + role="slider" { + div.chart-range-strip-track { + div.chart-range-strip-window data-role="range-window" { + span.chart-range-strip-handle.chart-range-strip-handle--left + data-role="range-handle-left" aria-hidden="true" {} + span.chart-range-strip-handle.chart-range-strip-handle--right + data-role="range-handle-right" aria-hidden="true" {} + } + } + } + } +} + /// Make a JSON string safe to embed inside a `

unit: ns · 2 series · 3 commits

Show
Y
+tpch sf=1 Q1 [nvme] — bench.vortex.dev

unit: ns · 2 series · 3 commits

Show
Y
diff --git a/benchmarks-website/server/tests/snapshots/group_page_query.snap b/benchmarks-website/server/tests/snapshots/group_page_query.snap index 3592a362b95..deb003ce2e0 100644 --- a/benchmarks-website/server/tests/snapshots/group_page_query.snap +++ b/benchmarks-website/server/tests/snapshots/group_page_query.snap @@ -2,4 +2,4 @@ source: benchmarks-website/server/tests/web_ui.rs expression: body --- -TPC-H (NVMe) (SF=1) — bench.vortex.dev

2 charts

Performance Summary

#1datafusion:vortex-file-compressed1.11x1.80 ms
#2duckdb:parquet1.60x900.00 us
Geomean of query time ratio to fastest (lower is better)

Q1

Show
Y

Q2

Show
Y
+TPC-H (NVMe) (SF=1) — bench.vortex.dev

2 charts

Performance Summary

#1datafusion:vortex-file-compressed1.11x1.80 ms
#2duckdb:parquet1.60x900.00 us
Geomean of query time ratio to fastest (lower is better)

Q1

Show
Y

Q2

Show
Y
diff --git a/benchmarks-website/server/tests/snapshots/landing_page.snap b/benchmarks-website/server/tests/snapshots/landing_page.snap index 494a1d9daa8..c2619d39dd8 100644 --- a/benchmarks-website/server/tests/snapshots/landing_page.snap +++ b/benchmarks-website/server/tests/snapshots/landing_page.snap @@ -2,4 +2,4 @@ source: benchmarks-website/server/tests/web_ui.rs expression: body --- -bench.vortex.dev
Random Access1 chart

Random Access Performance

#1vortex-file-compressed100.50 us1.00x
#2parquet201.00 us2.00x
Random access time | Ratio to fastest (lower is better)

taxi

Show
Y
Compression1 chart

Compression Throughput vs Parquet

Write Speed (Compression)2.00x
📤Scan Speed (Decompression)2.00x
Inverse geomean of Vortex/Parquet ratios (higher is better)

tpch-lineitem

Show
Y
Compression Size1 chart

Compression Size Summary

⬇️Min Size Ratio0.50x
📊Mean Size Ratio0.50x
⬆️Max Size Ratio0.50x
Geomean of Vortex/Parquet size ratios (lower is better)

tpch-lineitem

Show
Y
TPC-H (NVMe) (SF=1)2 charts

Performance Summary

#1datafusion:vortex-file-compressed1.11x1.80 ms
#2duckdb:parquet1.60x900.00 us
Geomean of query time ratio to fastest (lower is better)

Q1

Show
Y

Q2

Show
Y
cohere-large-10m / partitioned1 chart

threshold=0.75

Show
Y
+bench.vortex.dev
Random Access1 chart

Random Access Performance

#1vortex-file-compressed100.50 us1.00x
#2parquet201.00 us2.00x
Random access time | Ratio to fastest (lower is better)

taxi

Show
Y
Compression1 chart

Compression Throughput vs Parquet

Write Speed (Compression)2.00x
📤Scan Speed (Decompression)2.00x
Inverse geomean of Vortex/Parquet ratios (higher is better)

tpch-lineitem

Show
Y
Compression Size1 chart

Compression Size Summary

⬇️Min Size Ratio0.50x
📊Mean Size Ratio0.50x
⬆️Max Size Ratio0.50x
Geomean of Vortex/Parquet size ratios (lower is better)

tpch-lineitem

Show
Y
TPC-H (NVMe) (SF=1)2 charts

Performance Summary

#1datafusion:vortex-file-compressed1.11x1.80 ms
#2duckdb:parquet1.60x900.00 us
Geomean of query time ratio to fastest (lower is better)

Q1

Show
Y

Q2

Show
Y
cohere-large-10m / partitioned1 chart

threshold=0.75

Show
Y
diff --git a/benchmarks-website/server/tests/web_ui.rs b/benchmarks-website/server/tests/web_ui.rs index ab3f752be59..a014d52156f 100644 --- a/benchmarks-website/server/tests/web_ui.rs +++ b/benchmarks-website/server/tests/web_ui.rs @@ -458,11 +458,23 @@ async fn chart_card_carries_per_chart_toolbar() -> Result<()> { let card_count = body.matches(r#"
0, "landing page must render chart cards"); assert_eq!( toolbar_count, card_count, "every chart-card must contain a toolbar--card ({card_count} cards / {toolbar_count} toolbars)" ); + assert_eq!( + strip_count, card_count, + "every chart-card must carry a range-strip below the canvas \ + ({card_count} cards / {strip_count} strips)" + ); + assert!( + body.contains(r#"data-role="range-window""#) + && body.contains(r#"data-role="range-handle-left""#) + && body.contains(r#"data-role="range-handle-right""#), + "range-strip must include a draggable window and two resize handles" + ); assert!( !body.contains(r#"data-mode="#), "abs/rel mode buttons should not render" From 05af25fd684ccfd899fc9c370a1b10cc5bc5fb95 Mon Sep 17 00:00:00 2001 From: Claude Date: Wed, 29 Apr 2026 16:38:42 +0000 Subject: [PATCH 25/26] feat(benchmarks-website): global engine/format filter bar MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Add a sticky filter bar at the top of the landing page with rows of toggle chips for engines (datafusion, duckdb, …) and formats (vortex-file-compressed, parquet, …). Clicking a chip hides every series whose engine or format doesn't match across every chart at once. Per-card legend toggles still work and are tracked as overrides — once you click a series's legend on a card, the global filter no longer touches that series on that card. The chip universe is sourced from a `SELECT DISTINCT` over the fact tables, so adding a new engine or format in ingest grows the bar with no code change. Filter state round-trips through `?engine=…&format=…`. The landing page reads the params on load, the client `history.replaceState`s on every chip click, and the permalink pages (`/chart/{slug}`, `/group/{slug}`) embed the same JSON state so a shared deep link applies the filter on hydration even though they don't render the bar themselves. Wire shape: each `ChartResponse` now carries an optional `series_meta` map keyed by series name with `{engine?, format?}` tags so the client has the metadata it needs to drive bulk hide/show without parsing series labels heuristically. Series without an engine tag (compression times, random access, vector search) are unaffected by the engine filter, and similarly for the format filter — a "duckdb only" toggle shouldn't nuke charts that have no engine dimension. Snapshot tests for the bar markup; smoke tested in a real browser (playwright + chromium): chip clicks hide the right datasets across every chart, URL updates, override survives a global re-toggle, and a refresh restores the filter. Signed-off-by: Claude ui(benchmarks-website): move filter bar into navbar dropdown; toggle chips independently Drop the standalone filter bar that sat below the header and put the chips inside a "Filters" dropdown anchored to the sticky navbar, so adjusting visibility no longer requires scrolling back to the top of the page. The trigger button shows a small badge counting how many chips are currently off; the panel opens/closes on click and dismisses on click-outside or Escape. Permalink pages render the same dropdown in their navbar (previously they had no UI for it, only honouring URL state). Toggle semantics changed to be per-chip independent. Previously the first chip click in a row pivoted from "all visible" to "only this one"; now each chip flips just its own active state. The "all" chip is a one-shot reset that forces every chip in that row back to active — it never holds an active state itself. Internal model: `globalFilter.{engines,formats}` now tracks the active (visible) set rather than an allowlist that's empty when no filter is applied. The universe is read from the rendered chip DOM so the client doesn't have to mirror the server enums. The URL stays as an allowlist (`?engine=duckdb` = "show only duckdb") for stability, and we omit the param whenever the active set equals the universe so the no-filter URL is clean. Override fix: the legend onClick now flips both `dataset.hidden` and `setDatasetVisibility` so subsequent global filter passes (which write to `dataset.hidden`) don't drift from the legend's overrides. cargo test, clippy, fmt clean. Browser smoke (playwright + chromium): clicking duckdb hides only the duckdb series; "all" restores every chip; legend override on a card sticks across further chip changes; click-outside closes the panel; URL updates as expected. Signed-off-by: Claude --- benchmarks-website/server/src/api.rs | 96 +++++- benchmarks-website/server/src/html.rs | 217 ++++++++++++- .../server/static/chart-init.js | 289 +++++++++++++++++- benchmarks-website/server/static/style.css | 90 ++++++ .../tests/snapshots/chart_page_query.snap | 2 +- .../tests/snapshots/group_page_query.snap | 2 +- .../server/tests/snapshots/landing_page.snap | 2 +- .../snapshots/landing_page_filter_bar.snap | 5 + benchmarks-website/server/tests/web_ui.rs | 254 +++++++++++++++ 9 files changed, 937 insertions(+), 20 deletions(-) create mode 100644 benchmarks-website/server/tests/snapshots/landing_page_filter_bar.snap diff --git a/benchmarks-website/server/src/api.rs b/benchmarks-website/server/src/api.rs index 544adef9968..6fa10d04f48 100644 --- a/benchmarks-website/server/src/api.rs +++ b/benchmarks-website/server/src/api.rs @@ -282,6 +282,35 @@ pub struct ChartResponse { pub unit: &'static str, pub commits: Vec, pub series: serde_json::Map, + /// Per-series engine/format classification, used by the global filter + /// bar to hide/show whole engines or formats across every chart at once. + /// Keyed by series name; values are populated only for series whose name + /// encodes an engine and/or format. Series without a classification (e.g. + /// vector-search flavors) are simply absent from this map. + #[serde(skip_serializing_if = "BTreeMap::is_empty")] + pub series_meta: BTreeMap, +} + +/// Engine/format tag for one series. Both fields are optional because not +/// every fact table records both dimensions: `query_measurements` carries +/// engine + format, while `compression_*` and `random_access_times` only +/// carry format. Vector-search series have neither and are omitted from the +/// map entirely. +#[derive(Debug, Default, Serialize)] +pub struct SeriesTag { + #[serde(skip_serializing_if = "Option::is_none")] + pub engine: Option, + #[serde(skip_serializing_if = "Option::is_none")] + pub format: Option, +} + +/// Universe of engine + format chips the global filter bar can toggle. +/// Returned as a separate, cheap-to-compute summary so the landing page can +/// render the bar without iterating every chart payload. +#[derive(Debug, Default, Serialize)] +pub struct FilterUniverse { + pub engines: Vec, + pub formats: Vec, } #[derive(Debug, Serialize)] @@ -1067,6 +1096,44 @@ fn geo_mean(values: &[f64]) -> Option { (n > 0).then(|| (sum_ln / n as f64).exp()) } +/// Collect the set of distinct engines and formats observed across the fact +/// tables. Used by the landing page to seed the global filter bar's chip +/// universe, so adding a new engine or format in ingest automatically +/// surfaces a chip without a code change. +/// +/// Engines come from `query_measurements` only — the other fact tables don't +/// record an engine. Formats are unioned across `query_measurements`, +/// `compression_times`, `compression_sizes`, and `random_access_times`; +/// `vector_search_runs` is intentionally excluded because its `flavor` +/// column is not a format in the same sense the chip filter is matching on. +pub fn collect_filter_universe(conn: &Connection) -> Result { + let mut engines: BTreeSet = BTreeSet::new(); + let mut formats: BTreeSet = BTreeSet::new(); + + let mut stmt = + conn.prepare("SELECT DISTINCT engine FROM query_measurements WHERE engine IS NOT NULL")?; + for row in stmt.query_map([], |r| r.get::<_, String>(0))? { + engines.insert(row?); + } + + for sql in [ + "SELECT DISTINCT format FROM query_measurements WHERE format IS NOT NULL", + "SELECT DISTINCT format FROM compression_times WHERE format IS NOT NULL", + "SELECT DISTINCT format FROM compression_sizes WHERE format IS NOT NULL", + "SELECT DISTINCT format FROM random_access_times WHERE format IS NOT NULL", + ] { + let mut stmt = conn.prepare(sql)?; + for row in stmt.query_map([], |r| r.get::<_, String>(0))? { + formats.insert(row?); + } + } + + Ok(FilterUniverse { + engines: engines.into_iter().collect(), + formats: formats.into_iter().collect(), + }) +} + /// Build the JSON payload for one chart by key. This is the shared /// implementation behind `GET /api/chart/{slug}`, the inline `

unit: ns · 2 series · 3 commits

Show
Y
+tpch sf=1 Q1 [nvme] — bench.vortex.dev

unit: ns · 2 series · 3 commits

Show
Y
diff --git a/benchmarks-website/server/tests/snapshots/group_page_query.snap b/benchmarks-website/server/tests/snapshots/group_page_query.snap index deb003ce2e0..7bdf3a8847f 100644 --- a/benchmarks-website/server/tests/snapshots/group_page_query.snap +++ b/benchmarks-website/server/tests/snapshots/group_page_query.snap @@ -2,4 +2,4 @@ source: benchmarks-website/server/tests/web_ui.rs expression: body --- -TPC-H (NVMe) (SF=1) — bench.vortex.dev

2 charts

Performance Summary

#1datafusion:vortex-file-compressed1.11x1.80 ms
#2duckdb:parquet1.60x900.00 us
Geomean of query time ratio to fastest (lower is better)

Q1

Show
Y

Q2

Show
Y
+TPC-H (NVMe) (SF=1) — bench.vortex.dev

2 charts

Performance Summary

#1datafusion:vortex-file-compressed1.11x1.80 ms
#2duckdb:parquet1.60x900.00 us
Geomean of query time ratio to fastest (lower is better)

Q1

Show
Y

Q2

Show
Y
diff --git a/benchmarks-website/server/tests/snapshots/landing_page.snap b/benchmarks-website/server/tests/snapshots/landing_page.snap index c2619d39dd8..d42416fde8e 100644 --- a/benchmarks-website/server/tests/snapshots/landing_page.snap +++ b/benchmarks-website/server/tests/snapshots/landing_page.snap @@ -2,4 +2,4 @@ source: benchmarks-website/server/tests/web_ui.rs expression: body --- -bench.vortex.dev
Random Access1 chart

Random Access Performance

#1vortex-file-compressed100.50 us1.00x
#2parquet201.00 us2.00x
Random access time | Ratio to fastest (lower is better)

taxi

Show
Y
Compression1 chart

Compression Throughput vs Parquet

Write Speed (Compression)2.00x
📤Scan Speed (Decompression)2.00x
Inverse geomean of Vortex/Parquet ratios (higher is better)

tpch-lineitem

Show
Y
Compression Size1 chart

Compression Size Summary

⬇️Min Size Ratio0.50x
📊Mean Size Ratio0.50x
⬆️Max Size Ratio0.50x
Geomean of Vortex/Parquet size ratios (lower is better)

tpch-lineitem

Show
Y
TPC-H (NVMe) (SF=1)2 charts

Performance Summary

#1datafusion:vortex-file-compressed1.11x1.80 ms
#2duckdb:parquet1.60x900.00 us
Geomean of query time ratio to fastest (lower is better)

Q1

Show
Y

Q2

Show
Y
cohere-large-10m / partitioned1 chart

threshold=0.75

Show
Y
+bench.vortex.dev
Random Access1 chart

Random Access Performance

#1vortex-file-compressed100.50 us1.00x
#2parquet201.00 us2.00x
Random access time | Ratio to fastest (lower is better)

taxi

Show
Y
Compression1 chart

Compression Throughput vs Parquet

Write Speed (Compression)2.00x
📤Scan Speed (Decompression)2.00x
Inverse geomean of Vortex/Parquet ratios (higher is better)

tpch-lineitem

Show
Y
Compression Size1 chart

Compression Size Summary

⬇️Min Size Ratio0.50x
📊Mean Size Ratio0.50x
⬆️Max Size Ratio0.50x
Geomean of Vortex/Parquet size ratios (lower is better)

tpch-lineitem

Show
Y
TPC-H (NVMe) (SF=1)2 charts

Performance Summary

#1datafusion:vortex-file-compressed1.11x1.80 ms
#2duckdb:parquet1.60x900.00 us
Geomean of query time ratio to fastest (lower is better)

Q1

Show
Y

Q2

Show
Y
cohere-large-10m / partitioned1 chart

threshold=0.75

Show
Y
diff --git a/benchmarks-website/server/tests/snapshots/landing_page_filter_bar.snap b/benchmarks-website/server/tests/snapshots/landing_page_filter_bar.snap new file mode 100644 index 00000000000..1a995f8a01f --- /dev/null +++ b/benchmarks-website/server/tests/snapshots/landing_page_filter_bar.snap @@ -0,0 +1,5 @@ +--- +source: benchmarks-website/server/tests/web_ui.rs +expression: filter_bar_section(&body) +--- +
diff --git a/benchmarks-website/server/tests/web_ui.rs b/benchmarks-website/server/tests/web_ui.rs index a014d52156f..a0a95f1ec92 100644 --- a/benchmarks-website/server/tests/web_ui.rs +++ b/benchmarks-website/server/tests/web_ui.rs @@ -898,6 +898,260 @@ async fn empty_landing_page_renders() -> Result<()> { Ok(()) } +/// Landing page renders the global filter dropdown inside the sticky +/// header, with chip rows for engine and format sourced from the seeded +/// data — no hard-coding. +#[tokio::test] +async fn landing_page_renders_global_filter_bar() -> Result<()> { + let server = Server::start().await?; + seed(&server).await?; + + let client = reqwest::Client::new(); + let body = client.get(server.url("/")).send().await?.text().await?; + + // The dropdown lives inside the sticky header so it stays on-screen + // while the user scrolls. + let header_chunk = body + .split(r#"class="sticky-header""#) + .nth(1) + .and_then(|s| s.split("").next()) + .context("sticky header chunk")?; + assert!( + header_chunk.contains(r#"data-role="global-filter-bar""#), + "filter dropdown must live inside the sticky header" + ); + assert!(header_chunk.contains(r#"data-role="filter-trigger""#)); + assert!(header_chunk.contains(r#"data-role="filter-panel""#)); + assert!(header_chunk.contains(r#"data-filter="engine""#)); + assert!(header_chunk.contains(r#"data-filter="format""#)); + // Engines + formats from the seed fixture must appear as chips. + assert!(body.contains(r#"data-value="datafusion""#)); + assert!(body.contains(r#"data-value="duckdb""#)); + assert!(body.contains(r#"data-value="vortex-file-compressed""#)); + assert!(body.contains(r#"data-value="parquet""#)); + // Both rows have an "all" reset chip. + assert!(body.matches(r#"data-value="*""#).count() >= 2); + // The "all" chip is now a one-shot reset and is never rendered active — + // active chips reflect the visible engine/format set. + assert!( + !body.contains(r#"class="filter-chip filter-chip--all filter-chip--active""#), + "the 'all' chip should never start active" + ); + // No filter applied by default → every specific chip is active. + let engine_section = filter_section(&body, "engine"); + for engine in ["datafusion", "duckdb"] { + assert!( + extract_chip(&engine_section, engine).contains("filter-chip--active"), + "engine chip {engine} should be active when no filter is applied" + ); + } + // No badge on the trigger when nothing is hidden. + assert!( + !body.contains(r#"data-role="filter-badge""#), + "filter badge should be absent when no chips are off" + ); + // Embedded filter state JSON for the client to pick up. + assert!(body.contains(r#"id="bench-filter-state""#)); + + insta_settings().bind(|| { + insta::assert_snapshot!("landing_page_filter_bar", filter_bar_section(&body)); + }); + Ok(()) +} + +/// Landing page honours `?engine=`/`?format=` and reflects them as the +/// active chip set + initial filter-state JSON, so a refresh preserves view. +#[tokio::test] +async fn landing_page_honours_filter_query_params() -> Result<()> { + let server = Server::start().await?; + seed(&server).await?; + + let client = reqwest::Client::new(); + let body = client + .get(server.url("/?engine=duckdb&format=vortex-file-compressed")) + .send() + .await? + .text() + .await?; + + assert!( + body.contains(r#"{"engines":["duckdb"],"formats":["vortex-file-compressed"]}"#), + "filter state JSON should reflect query params" + ); + let engine_section = filter_section(&body, "engine"); + assert!( + engine_section.contains(r#"data-value="duckdb""#) + && extract_chip(&engine_section, "duckdb").contains("filter-chip--active"), + "duckdb chip should be active" + ); + assert!( + !extract_chip(&engine_section, "datafusion").contains("filter-chip--active"), + "datafusion chip should NOT be active when engine=duckdb" + ); + assert!( + !extract_chip(&engine_section, "*").contains("filter-chip--active"), + "the 'all' chip is a reset, never active" + ); + // Trigger should show a badge counting the off chips (1 engine + 1 format). + assert!( + body.contains(r#"data-role="filter-badge""#), + "trigger should render a badge when chips are filtered off" + ); + Ok(()) +} + +/// Permalink pages render the same filter dropdown in the navbar (so the +/// user can adjust visibility from any page) and embed the filter-state +/// JSON so chart-init.js applies the filter on hydration. +#[tokio::test] +async fn permalink_pages_embed_filter_state() -> Result<()> { + let server = Server::start().await?; + seed(&server).await?; + + let client = reqwest::Client::new(); + let chart_slug = pick_chart_slug(&server, |s| s.starts_with("TPC-H")).await?; + let group_slug = pick_group_slug(&server, |s| s.starts_with("TPC-H")).await?; + + let chart_body = client + .get(server.url(&format!("/chart/{chart_slug}?engine=duckdb&format=parquet"))) + .send() + .await? + .text() + .await?; + assert!( + chart_body.contains(r#"id="bench-filter-state""#), + "chart permalink must embed filter state" + ); + assert!( + chart_body.contains(r#"{"engines":["duckdb"],"formats":["parquet"]}"#), + "chart permalink must echo the query-param filter state" + ); + + let group_body = client + .get(server.url(&format!("/group/{group_slug}?engine=duckdb"))) + .send() + .await? + .text() + .await?; + assert!( + group_body.contains(r#"{"engines":["duckdb"],"formats":[]}"#), + "group permalink must echo the query-param filter state" + ); + Ok(()) +} + +/// Chart payload exposes per-series engine/format tags so the global filter +/// has the metadata it needs to drive bulk hide/show. +#[tokio::test] +async fn chart_payload_includes_series_meta() -> Result<()> { + let server = Server::start().await?; + seed(&server).await?; + + let slug = pick_chart_slug(&server, |s| s.starts_with("TPC-H")).await?; + let client = reqwest::Client::new(); + let body: Value = client + .get(server.url(&format!("/api/chart/{slug}"))) + .send() + .await? + .json() + .await?; + + let meta = body["series_meta"] + .as_object() + .context("series_meta must be present for query measurements")?; + let row = meta + .get("datafusion:vortex-file-compressed") + .context("expected series tag")?; + assert_eq!(row["engine"].as_str(), Some("datafusion")); + assert_eq!(row["format"].as_str(), Some("vortex-file-compressed")); + + // Compression-time series carry a format tag but no engine. + let comp_slug = pick_chart_slug(&server, |s| s == "Compression").await?; + let comp_body: Value = client + .get(server.url(&format!("/api/chart/{comp_slug}"))) + .send() + .await? + .json() + .await?; + let comp_meta = comp_body["series_meta"] + .as_object() + .context("series_meta must be present for compression times")?; + let row = comp_meta + .get("vortex-file-compressed:encode") + .context("expected encode series tag")?; + assert!(row["engine"].is_null() || row.get("engine").is_none()); + assert_eq!(row["format"].as_str(), Some("vortex-file-compressed")); + Ok(()) +} + +/// Pull just the `
` substring of the +/// filter dropdown — its trigger button and the chip panel. Keeps the +/// snapshot focused on the chip markup and stable against changes elsewhere +/// on the page. +fn filter_bar_section(body: &str) -> String { + let needle = r#"
".to_string(); + }; + let tail = &body[start..]; + // The dropdown is `
...
`. + // We need to find the matching `
` for the outer wrapper. The + // simplest robust approach is to scan and balance. + let bytes = tail.as_bytes(); + let mut depth = 0usize; + let mut i = 0usize; + while i < bytes.len() { + if bytes[i] == b'<' { + if tail[i..].starts_with("") { + depth -= 1; + if depth == 0 { + return tail[..i + "".len()].to_string(); + } + i += "".len(); + continue; + } + } + i += 1; + } + tail.to_string() +} + +/// Pull the `
` containing chips for one +/// dimension (`"engine"` or `"format"`). +fn filter_section(body: &str, dim: &str) -> String { + let bar = filter_bar_section(body); + let needle = format!(r#"data-filter="{dim}""#); + let Some(_) = bar.find(&needle) else { + return String::new(); + }; + // Walk back to the enclosing `
`. + let row_open = r#"
"#; + let row_close = "
"; + bar.split(row_open) + .find(|chunk| chunk.contains(&needle)) + .and_then(|chunk| chunk.split(row_close).next()) + .map(str::to_string) + .unwrap_or_default() +} + +/// Pull a single chip's opening tag for assertions. +fn extract_chip(section: &str, value: &str) -> String { + let needle = format!(r#"data-value="{value}""#); + let Some(idx) = section.find(&needle) else { + return String::new(); + }; + let head = §ion[..idx]; + let chip_start = head.rfind("').map(|p| p + 1).unwrap_or(tail.len()); + tail[..chip_end].to_string() +} + #[tokio::test] async fn static_assets_are_served() -> Result<()> { let server = Server::start().await?; From 7113963e2f096d697dca57428628dd03fac7077e Mon Sep 17 00:00:00 2001 From: Connor Tsui <87130162+connortsui20@users.noreply.github.com> Date: Wed, 29 Apr 2026 17:04:51 -0400 Subject: [PATCH 26/26] UI refactors of benchmarks v3 (#7723) ## Summary Fixes the UI of the benchmarks v3 website. - no longer max of 1000 commits - LTTB dynamic downsampling on the client side - a bunch of other stuff ## Testing More snapshot testing. --------- Signed-off-by: Claude Signed-off-by: Connor Tsui Co-authored-by: Claude --- benchmarks-website/server/src/api.rs | 31 +- benchmarks-website/server/src/html.rs | 105 ++-- .../server/static/chart-init.js | 488 +++++++++++++++--- benchmarks-website/server/static/style.css | 27 + .../tests/snapshots/chart_page_query.snap | 2 +- .../tests/snapshots/group_page_query.snap | 2 +- .../server/tests/snapshots/landing_page.snap | 2 +- benchmarks-website/server/tests/web_ui.rs | 193 ++++++- 8 files changed, 713 insertions(+), 137 deletions(-) diff --git a/benchmarks-website/server/src/api.rs b/benchmarks-website/server/src/api.rs index 6fa10d04f48..42c3ab6c83c 100644 --- a/benchmarks-website/server/src/api.rs +++ b/benchmarks-website/server/src/api.rs @@ -31,10 +31,10 @@ use crate::error::ApiError; use crate::slug::ChartKey; use crate::slug::GroupKey; -/// Default cap on the number of commits returned per chart. +/// Default cap on the number of commits returned per chart when no `?n=` is +/// supplied. The HTML routes override this with their own per-page defaults +/// (see [`crate::html`]). pub const DEFAULT_COMMIT_WINDOW: u32 = 100; -/// Hard server-side ceiling on `?n=NNN`. -pub const MAX_COMMIT_WINDOW: u32 = 1000; /// Canonical group ordering, ported from the v2 site's hard-coded list at /// `origin/ct/vfvb:benchmarks-website/index.html`. Group names not in this @@ -90,7 +90,11 @@ impl Default for CommitWindow { impl CommitWindow { /// Parse the `?n=...` query string parameter. `None` and malformed values /// fall back to [`CommitWindow::default`]. `"all"` (any case) means - /// unbounded. Numeric values are clamped to `[1, MAX_COMMIT_WINDOW]`. + /// unbounded. Numeric values are floored to `1` so `?n=0` becomes + /// `?n=1`; there is no upper bound — large histories are kept as-is. + /// Any further reduction in rendered point count happens client-side + /// (see `static/chart-init.js` for the LTTB pass on the visible + /// commit range). pub fn parse(raw: Option<&str>) -> Self { let Some(s) = raw else { return Self::default(); @@ -102,7 +106,7 @@ impl CommitWindow { trimmed .parse::() .ok() - .map(|v| v.clamp(1, MAX_COMMIT_WINDOW)) + .map(|v| v.max(1)) .and_then(NonZeroU32::new) .map(Self::Last) .unwrap_or_default() @@ -276,7 +280,7 @@ pub struct ChartLink { pub slug: String, } -#[derive(Debug, Serialize)] +#[derive(Debug, Clone, Serialize)] pub struct ChartResponse { pub display_name: String, pub unit: &'static str, @@ -296,7 +300,7 @@ pub struct ChartResponse { /// engine + format, while `compression_*` and `random_access_times` only /// carry format. Vector-search series have neither and are omitted from the /// map entirely. -#[derive(Debug, Default, Serialize)] +#[derive(Debug, Default, Clone, Serialize)] pub struct SeriesTag { #[serde(skip_serializing_if = "Option::is_none")] pub engine: Option, @@ -313,7 +317,7 @@ pub struct FilterUniverse { pub formats: Vec, } -#[derive(Debug, Serialize)] +#[derive(Debug, Clone, Serialize)] pub struct CommitPoint { pub sha: String, pub timestamp: String, @@ -1644,13 +1648,18 @@ mod tests { } #[test] - fn commit_window_parse_clamps() { + fn commit_window_parse_floors_zero_but_keeps_large_values() { + // Large values are kept as-is — full history is no longer clamped + // server-side. Visual downsampling happens client-side in + // `static/chart-init.js`, on the currently visible commit range. let CommitWindow::Last(n) = CommitWindow::parse(Some("99999")) else { panic!() }; - assert_eq!(n.get(), MAX_COMMIT_WINDOW); + assert_eq!(n.get(), 99_999); + + // 0 floors to 1 since the underlying type is `NonZeroU32`. let CommitWindow::Last(n) = CommitWindow::parse(Some("0")) else { - panic!("clamp of 0 should round to 1") + panic!("floor of 0 should round to 1") }; assert_eq!(n.get(), 1); } diff --git a/benchmarks-website/server/src/html.rs b/benchmarks-website/server/src/html.rs index d9408e1e901..9eebedc2e07 100644 --- a/benchmarks-website/server/src/html.rs +++ b/benchmarks-website/server/src/html.rs @@ -4,25 +4,33 @@ //! HTML routes for the bench.vortex.dev v3 web UI. //! //! Three pages, all backed by the same per-chart UX: -//! - `GET /` — landing page. Every group is a collapsible `
`. The -//! first group is open by default and its charts pre-inline their JSON -//! payload for a fast first paint; closed groups carry only the chart-card -//! shell and their payloads are fetched on first toggle (`details.open`). +//! - `GET /` — landing page. Every group is a collapsible `
`, +//! all collapsed by default; the user picks which to expand. The +//! *first* group's chart payloads are still pre-inlined in the HTML +//! so opening it skips the JS fetch round-trip; every other group +//! ships only chart-card shells and is fetched on first toggle. //! - `GET /chart/{slug}` — single chart page; permalink for sharing. //! - `GET /group/{slug}` — every chart in one group on a single page. //! //! Each chart card owns its own compact toolbar (scope slider + Y-axis). There //! is no page-level toolbar — every chart is independent. Scope is -//! **zoom-as-scope**: each chart fetches up to [`api::MAX_COMMIT_WINDOW`] -//! commits once, then the toolbar manipulates `chart.options.scales.x.min`/ -//! `max` to set the visible window. No refetches on scope change. +//! **zoom-as-scope**: each chart fetches a generous window once, then the +//! toolbar manipulates `chart.options.scales.x.min`/`max` to set the visible +//! window. No refetches on scope change. //! -//! URL query params (`?n=`) are accepted as power-user overrides on the -//! initial fetch but are not written back from the toolbar. Per-chart UI +//! Every HTML route defaults to the unbounded commit window +//! ([`CommitWindow::All`]) so users can pan/zoom all the way back to the +//! very first commit. The chart payload is sent **raw** — any visual +//! downsampling happens client-side in `chart-init.js`, applied only to +//! the currently visible commit range. The common case (a chart zoomed in +//! to the last ~100 commits) renders raw with no LTTB at all. +//! +//! URL query param `?n=` is accepted as a power-user override on the +//! initial fetch but is not written back from the toolbar. Per-chart UI //! state is intentionally not persisted in the URL — the user feedback //! emphasised that this UX should feel local-and-immediate, not "share a -//! perfect view via URL". Permalinks (`/chart/{slug}`, `/group/{slug}`) are -//! the sharing mechanism, not query strings. +//! perfect view via URL". Permalinks (`/chart/{slug}`, `/group/{slug}`) +//! are the sharing mechanism, not query strings. //! //! Slugs are opaque strings the server received from `/api/groups`; the //! handler echoes them straight into [`crate::slug::ChartKey::from_slug`] @@ -32,8 +40,6 @@ //! script) are served from `/static/...` via [`include_bytes!`] so the //! binary is fully self-contained. -use std::num::NonZeroU32; - use anyhow::Result; use axum::Router; use axum::extract::Path; @@ -62,10 +68,11 @@ use crate::db; use crate::slug::ChartKey; use crate::slug::GroupKey; -/// How many commits each chart pre-fetches. The toolbar's scope slider zooms -/// into smaller windows of this slice; we never refetch on scope change. -/// Capped at the API ceiling so a future bigger ceiling is picked up here too. -const PER_CHART_FETCH_N: u32 = api::MAX_COMMIT_WINDOW; +// All HTML routes default to the unbounded commit window. The wire payload +// is the raw `(commits, series)` data; visual downsampling (LTTB on the +// currently visible commit range) happens client-side in +// `static/chart-init.js`. `?n=` remains a power-user override on the +// commit window itself (not on the rendered point count). const CHART_JS: &[u8] = include_bytes!("../static/chart.umd.js"); const CHART_ZOOM_JS: &[u8] = include_bytes!("../static/chartjs-plugin-zoom.umd.min.js"); @@ -73,7 +80,7 @@ const CHART_INIT_JS: &[u8] = include_bytes!("../static/chart-init.js"); const STYLE_CSS: &[u8] = include_bytes!("../static/style.css"); const VORTEX_BLACK_SVG: &[u8] = include_bytes!("../../public/vortex_black_nobg.svg"); const VORTEX_WHITE_SVG: &[u8] = include_bytes!("../../public/vortex_white_nobg.svg"); -const STATIC_ASSET_VERSION: &str = "bench-v3-ui-10"; +const STATIC_ASSET_VERSION: &str = "bench-v3-ui-15"; /// HTML routes mounted under `/`. pub fn router() -> Router { @@ -92,15 +99,14 @@ pub fn router() -> Router { .route("/vortex_white_nobg.svg", get(serve_vortex_white_svg)) } -/// Query string for HTML routes. `?n=` overrides the per-chart fetch size; +/// Query string for HTML routes. `?n=` overrides the commit window; /// `?engine=` and `?format=` carry the global filter bar's selection so a /// shared link or refresh preserves which engines/formats are visible. The /// per-chart toolbar (Y axis, scope slider) remains local-only — its state /// is intentionally not in the URL. #[derive(Debug, Default, Deserialize)] pub struct UiQuery { - /// Override for the per-chart fetch size. Defaults to `PER_CHART_FETCH_N`. - /// Accepts `25|50|100|250|all`. + /// Override for the per-chart fetch size. Accepts `25|50|100|250|all`. pub n: Option, /// Comma-separated list of engines to keep visible across every chart. /// Empty / unset means no engine filter is active. Unknown engines are @@ -113,14 +119,15 @@ pub struct UiQuery { } impl UiQuery { - /// Resolve the [`CommitWindow`] for the initial fetch. When `?n=` is - /// unset, falls back to [`PER_CHART_FETCH_N`]. + /// Resolve the [`CommitWindow`] for HTML routes. Defaults to + /// [`CommitWindow::All`] so users can pan/zoom all the way back to + /// the very first commit on every chart, including the first + /// (open-by-default) group on the landing page. Visual downsampling + /// happens client-side on the visible commit range only. fn fetch_window(&self) -> CommitWindow { match self.n.as_deref() { Some(_) => CommitWindow::parse(self.n.as_deref()), - None => { - CommitWindow::Last(NonZeroU32::new(PER_CHART_FETCH_N).expect("non-zero default")) - } + None => CommitWindow::All, } } @@ -190,8 +197,9 @@ async fn landing(State(state): State, Query(ui): Query) -> Re /// One group's worth of data for the landing page. /// -/// The first group (in canonical order) ships with `charts` populated so the -/// open-by-default `
` paints immediately. Subsequent groups ship +/// The first group (in canonical order) ships with `charts` populated so +/// the moment the user expands it the chart hydrates from the inline +/// JSON without a network round-trip. Every other group ships /// with `charts` empty and only their chart-card shells — payloads are /// fetched client-side on first `details.toggle` to keep the cold landing /// HTML small. @@ -220,8 +228,9 @@ fn collect_landing_groups(conn: &Connection, window: &CommitWindow) -> Result Markup { // `

unit: ns · 2 series · 3 commits

Show
Y
+tpch sf=1 Q1 [nvme] — bench.vortex.dev

unit: ns · 2 series · 3 commits

Show
Y
diff --git a/benchmarks-website/server/tests/snapshots/group_page_query.snap b/benchmarks-website/server/tests/snapshots/group_page_query.snap index 7bdf3a8847f..9e4a3d2f5bc 100644 --- a/benchmarks-website/server/tests/snapshots/group_page_query.snap +++ b/benchmarks-website/server/tests/snapshots/group_page_query.snap @@ -2,4 +2,4 @@ source: benchmarks-website/server/tests/web_ui.rs expression: body --- -TPC-H (NVMe) (SF=1) — bench.vortex.dev

2 charts

Performance Summary

#1datafusion:vortex-file-compressed1.11x1.80 ms
#2duckdb:parquet1.60x900.00 us
Geomean of query time ratio to fastest (lower is better)

Q1

Show
Y

Q2

Show
Y
+TPC-H (NVMe) (SF=1) — bench.vortex.dev

2 charts

Performance Summary

#1datafusion:vortex-file-compressed1.11x1.80 ms
#2duckdb:parquet1.60x900.00 us
Geomean of query time ratio to fastest (lower is better)

Q1

Show
Y

Q2

Show
Y
diff --git a/benchmarks-website/server/tests/snapshots/landing_page.snap b/benchmarks-website/server/tests/snapshots/landing_page.snap index d42416fde8e..8c5a360d8d5 100644 --- a/benchmarks-website/server/tests/snapshots/landing_page.snap +++ b/benchmarks-website/server/tests/snapshots/landing_page.snap @@ -2,4 +2,4 @@ source: benchmarks-website/server/tests/web_ui.rs expression: body --- -bench.vortex.dev
Random Access1 chart

Random Access Performance

#1vortex-file-compressed100.50 us1.00x
#2parquet201.00 us2.00x
Random access time | Ratio to fastest (lower is better)

taxi

Show
Y
Compression1 chart

Compression Throughput vs Parquet

Write Speed (Compression)2.00x
📤Scan Speed (Decompression)2.00x
Inverse geomean of Vortex/Parquet ratios (higher is better)

tpch-lineitem

Show
Y
Compression Size1 chart

Compression Size Summary

⬇️Min Size Ratio0.50x
📊Mean Size Ratio0.50x
⬆️Max Size Ratio0.50x
Geomean of Vortex/Parquet size ratios (lower is better)

tpch-lineitem

Show
Y
TPC-H (NVMe) (SF=1)2 charts

Performance Summary

#1datafusion:vortex-file-compressed1.11x1.80 ms
#2duckdb:parquet1.60x900.00 us
Geomean of query time ratio to fastest (lower is better)

Q1

Show
Y

Q2

Show
Y
cohere-large-10m / partitioned1 chart

threshold=0.75

Show
Y
+bench.vortex.dev
Random Access1 chart

Random Access Performance

#1vortex-file-compressed100.50 us1.00x
#2parquet201.00 us2.00x
Random access time | Ratio to fastest (lower is better)

taxi

Show
Y
Compression1 chart

Compression Throughput vs Parquet

Write Speed (Compression)2.00x
📤Scan Speed (Decompression)2.00x
Inverse geomean of Vortex/Parquet ratios (higher is better)

tpch-lineitem

Show
Y
Compression Size1 chart

Compression Size Summary

⬇️Min Size Ratio0.50x
📊Mean Size Ratio0.50x
⬆️Max Size Ratio0.50x
Geomean of Vortex/Parquet size ratios (lower is better)

tpch-lineitem

Show
Y
TPC-H (NVMe) (SF=1)2 charts

Performance Summary

#1datafusion:vortex-file-compressed1.11x1.80 ms
#2duckdb:parquet1.60x900.00 us
Geomean of query time ratio to fastest (lower is better)

Q1

Show
Y

Q2

Show
Y
cohere-large-10m / partitioned1 chart

threshold=0.75

Show
Y
diff --git a/benchmarks-website/server/tests/web_ui.rs b/benchmarks-website/server/tests/web_ui.rs index a0a95f1ec92..e09249ef950 100644 --- a/benchmarks-website/server/tests/web_ui.rs +++ b/benchmarks-website/server/tests/web_ui.rs @@ -240,6 +240,90 @@ async fn seed(server: &Server) -> Result<()> { Ok(()) } +/// Slim ingest envelope carrying just a `random_access_time` pair so we can +/// drive a long-history fixture cheaply (the full envelope is ~12 records; +/// this is two). Used by the downsample tests. +fn ra_envelope_for(sha: &str, ts: &str, msg: &str, bias: i64) -> Value { + json!({ + "run_meta": { + "benchmark_id": "downsample-fixture", + "schema_version": 1, + "started_at": ts + }, + "commit": { + "sha": sha, + "timestamp": ts, + "message": msg, + "author_name": "Test Author", + "author_email": "author@example.com", + "committer_name": "Test Committer", + "committer_email": "committer@example.com", + "tree_sha": "fedcba9876543210fedcba9876543210fedcba98", + "url": format!("https://github.com/vortex-data/vortex/commit/{sha}") + }, + "records": [ + { + "kind": "random_access_time", + "commit_sha": sha, + "dataset": "taxi", + "format": "vortex-file-compressed", + "value_ns": 500 + bias, + "all_runtimes_ns": [500 + bias] + }, + { + "kind": "random_access_time", + "commit_sha": sha, + "dataset": "taxi", + "format": "parquet", + "value_ns": 1_000 + (2 * bias), + "all_runtimes_ns": [1_000 + (2 * bias)] + } + ] + }) +} + +/// Seed a `Random Access` chart with `n` synthetic commits so the +/// downsampler has something to chew on. SHAs are deterministic +/// `{i:040x}`; timestamps are 1 minute apart starting 2025-01-01 so the +/// commits sort stably. +async fn seed_long_history(server: &Server, n: usize) -> Result<()> { + let client = reqwest::Client::new(); + for i in 0..n { + let sha = format!("{i:040x}"); + let minutes = i; + let ts = format!( + "2025-01-01T{:02}:{:02}:00Z", + (minutes / 60) % 24, + minutes % 60 + ); + // Sinusoidal bias so the series has interior peaks LTTB will retain. + let bias = ((i as f64).sin() * 1_000.0) as i64 + i as i64 * 10; + let resp = client + .post(server.url("/api/ingest")) + .bearer_auth(TOKEN) + .json(&ra_envelope_for(&sha, &ts, "synthetic", bias)) + .send() + .await?; + anyhow::ensure!( + resp.status().is_success(), + "long-history ingest #{i} failed: {}", + resp.status() + ); + } + Ok(()) +} + +/// Pull the inline `` JSON out of an +/// HTML body. Returns `None` if the script tag isn't present. +fn extract_chart_data(body: &str, idx: usize) -> Option { + let needle = format!(r#"")? + start; + // Reverse the ` insta::Settings { let mut s = insta::Settings::clone_current(); s.set_snapshot_path("snapshots"); @@ -374,11 +458,12 @@ async fn landing_page_snapshot() -> Result<()> { Ok(()) } -/// The first group disclosure is rendered with the `open` attribute; every -/// other group lacks it, so the user sees only the first group's charts on -/// first paint. +/// All group disclosures render closed by default — the user picks which +/// to expand. The first group's chart payloads are still inlined in the +/// HTML (so opening it skips the JS fetch), but the disclosure itself +/// stays collapsed until clicked. #[tokio::test] -async fn details_first_group_open_others_closed() -> Result<()> { +async fn details_all_groups_closed_by_default() -> Result<()> { let server = Server::start().await?; seed(&server).await?; @@ -393,10 +478,15 @@ async fn details_first_group_open_others_closed() -> Result<()> { }) .collect(); assert!(!opens.is_empty(), "landing page must render
"); - assert!(opens[0], "first group must be open"); - for (i, is_open) in opens.iter().enumerate().skip(1) { + for (i, is_open) in opens.iter().enumerate() { assert!(!is_open, "group #{i} must be closed by default"); } + // The first group's chart payload should still be inlined — fast + // hydration on toggle without a network round-trip. + assert!( + body.contains(r#"id="chart-data-0""#), + "first group's chart payload should be inlined for fast on-toggle hydration", + ); Ok(()) } @@ -795,7 +885,8 @@ async fn chart_page_window_caps_commits() -> Result<()> { let slug = pick_chart_slug(&server, |s| s.starts_with("TPC-H")).await?; let client = reqwest::Client::new(); - // Without ?n, default is the 1000-commit per-chart cap — fixture has 3. + // Without `?n`, the API default is `Last(DEFAULT_COMMIT_WINDOW)`. The + // fixture has 3 commits which fits comfortably. let full: Value = client .get(server.url(&format!("/api/chart/{slug}"))) .send() @@ -815,7 +906,7 @@ async fn chart_page_window_caps_commits() -> Result<()> { let one_count = one["commits"].as_array().map(|a| a.len()).unwrap_or(0); assert_eq!(one_count, 1, "?n=1 should keep exactly one commit"); - // ?n=all bypasses the cap. + // ?n=all returns the unbounded view (the per-chart hard cap is gone). let all: Value = client .get(server.url(&format!("/api/chart/{slug}?n=all"))) .send() @@ -825,6 +916,19 @@ async fn chart_page_window_caps_commits() -> Result<()> { let all_count = all["commits"].as_array().map(|a| a.len()).unwrap_or(0); assert_eq!(all_count, full_count, "?n=all should match unbounded view"); + // Even very large `?n` survives without being clamped. + let huge: Value = client + .get(server.url(&format!("/api/chart/{slug}?n=99999"))) + .send() + .await? + .json() + .await?; + let huge_count = huge["commits"].as_array().map(|a| a.len()).unwrap_or(0); + assert_eq!( + huge_count, full_count, + "?n=99999 should no longer be clamped to 1000" + ); + // Malformed ?n gracefully falls back to default. let bad = client .get(server.url(&format!("/api/chart/{slug}?n=banana"))) @@ -834,6 +938,79 @@ async fn chart_page_window_caps_commits() -> Result<()> { Ok(()) } +/// `/chart/{slug}` and `/group/{slug}` permalinks default to the unbounded +/// commit window, and the inlined JSON payload contains the full raw +/// history (no server-side downsampling). Visual downsampling now lives in +/// `chart-init.js` and runs on the *visible* commit range only. +#[tokio::test] +async fn permalink_pages_inline_full_raw_history() -> Result<()> { + let server = Server::start().await?; + seed_long_history(&server, 200).await?; + + let chart_slug = pick_chart_slug(&server, |s| s == "Random Access").await?; + let group_slug = pick_group_slug(&server, |s| s == "Random Access").await?; + let client = reqwest::Client::new(); + + let chart_body = client + .get(server.url(&format!("/chart/{chart_slug}"))) + .send() + .await? + .text() + .await?; + let chart_payload = + extract_chart_data(&chart_body, 0).context("chart inline payload present")?; + assert_eq!( + chart_payload["commits"] + .as_array() + .context("commits is array")? + .len(), + 200, + "/chart permalink should inline the full raw history", + ); + + let group_body = client + .get(server.url(&format!("/group/{group_slug}"))) + .send() + .await? + .text() + .await?; + let group_payload = + extract_chart_data(&group_body, 0).context("group inline payload present")?; + assert_eq!( + group_payload["commits"] + .as_array() + .context("commits is array")? + .len(), + 200, + "/group permalink should inline the full raw history", + ); + + Ok(()) +} + +/// The wire payload no longer carries a `raw_commit_count` field — visual +/// downsampling moved to the client, so the server has no opinion on +/// rendered point count. +#[tokio::test] +async fn chart_payload_does_not_carry_raw_commit_count() -> Result<()> { + let server = Server::start().await?; + seed_long_history(&server, 50).await?; + + let slug = pick_chart_slug(&server, |s| s == "Random Access").await?; + let client = reqwest::Client::new(); + let body: Value = client + .get(server.url(&format!("/api/chart/{slug}"))) + .send() + .await? + .json() + .await?; + assert!( + body.get("raw_commit_count").is_none(), + "raw_commit_count should not appear on the wire; got {body:?}" + ); + Ok(()) +} + #[tokio::test] async fn chart_page_round_trips_every_slug() -> Result<()> { let server = Server::start().await?;