diff --git a/.github/workflows/bench.yml b/.github/workflows/bench.yml index 0b3874d0c68..4e745acc91f 100644 --- a/.github/workflows/bench.yml +++ b/.github/workflows/bench.yml @@ -92,7 +92,7 @@ jobs: VORTEX_EXPERIMENTAL_PATCHED_ARRAY: "1" FLAT_LAYOUT_INLINE_ARRAY_NODE: "1" run: | - bash scripts/bench-taskset.sh target/release_debug/${{ matrix.benchmark.id }} --formats ${{ matrix.benchmark.formats }} -d gh-json -o results.json + bash scripts/bench-taskset.sh target/release_debug/${{ matrix.benchmark.id }} --formats ${{ matrix.benchmark.formats }} -d gh-json -o results.json --gh-json-v3 results.v3.jsonl - name: Setup AWS CLI uses: aws-actions/configure-aws-credentials@v6 @@ -105,6 +105,19 @@ jobs: run: | bash scripts/cat-s3.sh vortex-ci-benchmark-results data.json.gz results.json + - name: Ingest results to v3 server + if: vars.V3_INGEST_URL != '' + continue-on-error: true + shell: bash + env: + INGEST_BEARER_TOKEN: ${{ secrets.INGEST_BEARER_TOKEN }} + run: | + python3 scripts/post-ingest.py results.v3.jsonl \ + --server "${{ vars.V3_INGEST_URL }}" \ + --commit-sha "${{ github.sha }}" \ + --benchmark-id "${{ matrix.benchmark.id }}" \ + --repo-url "${{ github.server_url }}/${{ github.repository }}" + - name: Alert incident.io if: failure() uses: ./.github/actions/alert-incident-io diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index c6b146d35b2..08e3be97996 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -358,7 +358,8 @@ jobs: if: matrix.os == 'windows-x64' run: | cargo nextest run --cargo-profile ci --locked --workspace --all-features --no-fail-fast ` - --exclude vortex-bench --exclude vortex-python --exclude vortex-duckdb ` + --exclude vortex-bench --exclude vortex-bench-server ` + --exclude vortex-python --exclude vortex-duckdb ` --exclude vortex-fuzz --exclude vortex-cuda --exclude vortex-nvcomp ` --exclude vortex-cub --exclude vortex-test-e2e-cuda --exclude duckdb-bench ` --exclude lance-bench --exclude datafusion-bench --exclude random-access-bench ` diff --git a/.github/workflows/publish-benchmarks-website.yml b/.github/workflows/publish-bench-server.yml similarity index 58% rename from .github/workflows/publish-benchmarks-website.yml rename to .github/workflows/publish-bench-server.yml index e7eeefb8ecc..0bfcb6d3293 100644 --- a/.github/workflows/publish-benchmarks-website.yml +++ b/.github/workflows/publish-bench-server.yml @@ -1,18 +1,23 @@ -name: Publish Benchmarks Website +name: Publish Bench Server on: push: branches: [develop] paths: - - "benchmarks-website/**" + - "benchmarks-website/server/**" + - "vortex-bench/**" + - "Cargo.lock" + - ".github/workflows/publish-bench-server.yml" + workflow_dispatch: jobs: publish: runs-on: ubuntu-latest - timeout-minutes: 10 + timeout-minutes: 30 permissions: contents: read packages: write + id-token: write steps: - uses: actions/checkout@v6 @@ -32,7 +37,10 @@ jobs: - name: Build and push uses: docker/build-push-action@v7 with: - context: ./benchmarks-website + context: . + file: ./benchmarks-website/server/Dockerfile platforms: linux/arm64 push: true - tags: ghcr.io/${{ github.repository }}/benchmarks-website:latest + tags: | + ghcr.io/${{ github.repository }}/vortex-bench-server:latest + ghcr.io/${{ github.repository }}/vortex-bench-server:${{ github.sha }} diff --git a/.github/workflows/sql-benchmarks.yml b/.github/workflows/sql-benchmarks.yml index 8dcb56bceda..d8eef8cd246 100644 --- a/.github/workflows/sql-benchmarks.yml +++ b/.github/workflows/sql-benchmarks.yml @@ -376,6 +376,7 @@ jobs: bash scripts/bench-taskset.sh uv run --project bench-orchestrator vx-bench run "${{ matrix.subcommand }}" \ --targets-json '${{ steps.targets.outputs.targets_json }}' \ --output results.json \ + --gh-json-v3 results.v3.jsonl \ --no-build \ --runner "ec2_${{ inputs.machine_type }}" \ ${{ matrix.iterations && format('--iterations {0}', matrix.iterations) || '' }} \ @@ -395,6 +396,7 @@ jobs: bash scripts/bench-taskset.sh uv run --project bench-orchestrator vx-bench run "${{ matrix.subcommand }}" \ --targets-json '${{ steps.targets.outputs.targets_json }}' \ --output results.json \ + --gh-json-v3 results.v3.jsonl \ --no-build \ --runner "ec2_${{ inputs.machine_type }}" \ ${{ matrix.iterations && format('--iterations {0}', matrix.iterations) || '' }} \ @@ -499,6 +501,19 @@ jobs: run: | bash scripts/cat-s3.sh vortex-ci-benchmark-results data.json.gz results.json + - name: Ingest results to v3 server + if: inputs.mode == 'develop' && vars.V3_INGEST_URL != '' + continue-on-error: true + shell: bash + env: + INGEST_BEARER_TOKEN: ${{ secrets.INGEST_BEARER_TOKEN }} + run: | + python3 scripts/post-ingest.py results.v3.jsonl \ + --server "${{ vars.V3_INGEST_URL }}" \ + --commit-sha "${{ github.sha }}" \ + --benchmark-id "${{ matrix.id }}" \ + --repo-url "${{ github.server_url }}/${{ github.repository }}" + - name: Upload File Sizes if: inputs.mode == 'develop' && matrix.remote_storage == null shell: bash diff --git a/.github/workflows/v3-commit-metadata.yml b/.github/workflows/v3-commit-metadata.yml new file mode 100644 index 00000000000..8f18be93e53 --- /dev/null +++ b/.github/workflows/v3-commit-metadata.yml @@ -0,0 +1,35 @@ +# Posts a v3 ingest envelope with no records on every push to develop, so the +# `commits` dim stays populated even when no benchmark ran. + +name: v3 commit metadata + +on: + push: + branches: [develop] + workflow_dispatch: + +permissions: + contents: read + +jobs: + commit-metadata: + runs-on: ubuntu-latest + timeout-minutes: 10 + steps: + - uses: actions/checkout@v6 + with: + fetch-depth: 2 + + - name: Ingest commit metadata to v3 server + if: vars.V3_INGEST_URL != '' + continue-on-error: true + shell: bash + env: + INGEST_BEARER_TOKEN: ${{ secrets.INGEST_BEARER_TOKEN }} + run: | + echo -n > empty.jsonl + python3 scripts/post-ingest.py empty.jsonl \ + --server "${{ vars.V3_INGEST_URL }}" \ + --commit-sha "${{ github.sha }}" \ + --benchmark-id "commit-metadata" \ + --repo-url "${{ github.server_url }}/${{ github.repository }}" diff --git a/.gitignore b/.gitignore index 7fa79fb2162..bcc8ef746ee 100644 --- a/.gitignore +++ b/.gitignore @@ -242,3 +242,6 @@ trace*.pb # pytest-benchmark output vortex-python/.benchmarks/ +# For local benchmarks website server and things like the WAL +**.duckdb* +.bench-env diff --git a/Cargo.lock b/Cargo.lock index 92b8f535503..fbf7c8dbcfd 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -197,6 +197,9 @@ name = "arbitrary" version = "1.4.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "c3d036a3c4ab069c7b410a2ce876bd74808d2d0888a82667669f8e783a898bf1" +dependencies = [ + "derive_arbitrary", +] [[package]] name = "arc-swap" @@ -687,9 +690,9 @@ dependencies = [ [[package]] name = "async-compression" -version = "0.4.41" +version = "0.4.42" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d0f9ee0f6e02ffd7ad5816e9464499fba7b3effd01123b515c41d1697c43dad1" +checksum = "e79b3f8a79cccc2898f31920fc69f304859b3bd567490f75ebf51ae1c792a9ac" dependencies = [ "compression-codecs", "compression-core", @@ -900,6 +903,58 @@ dependencies = [ "fs_extra", ] +[[package]] +name = "axum" +version = "0.8.9" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "31b698c5f9a010f6573133b09e0de5408834d0c82f8d7475a89fc1867a71cd90" +dependencies = [ + "axum-core", + "bytes", + "form_urlencoded", + "futures-util", + "http", + "http-body", + "http-body-util", + "hyper", + "hyper-util", + "itoa", + "matchit", + "memchr", + "mime", + "percent-encoding", + "pin-project-lite", + "serde_core", + "serde_json", + "serde_path_to_error", + "serde_urlencoded", + "sync_wrapper", + "tokio", + "tower", + "tower-layer", + "tower-service", + "tracing", +] + +[[package]] +name = "axum-core" +version = "0.5.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "08c78f31d7b1291f7ee735c1c6780ccde7785daae9a9206026862dab7d8792d1" +dependencies = [ + "bytes", + "futures-core", + "http", + "http-body", + "http-body-util", + "mime", + "pin-project-lite", + "sync_wrapper", + "tower-layer", + "tower-service", + "tracing", +] + [[package]] name = "base16ct" version = "1.0.0" @@ -1025,9 +1080,9 @@ dependencies = [ [[package]] name = "blake3" -version = "1.8.4" +version = "1.8.5" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "4d2d5991425dfd0785aed03aedcf0b321d61975c9b5b3689c774a2610ae0b51e" +checksum = "0aa83c34e62843d924f905e0f5c866eb1dd6545fc4d719e803d9ba6030371fce" dependencies = [ "arrayref", "arrayvec", @@ -1298,9 +1353,9 @@ dependencies = [ [[package]] name = "cc" -version = "1.2.60" +version = "1.2.61" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "43c5703da9466b66a946814e1adf53ea2c90f10063b86290cc9eb67ce3478a20" +checksum = "d16d90359e986641506914ba71350897565610e87ce0ad9e6f28569db3dd5c6d" dependencies = [ "find-msvc-tools", "jobserver", @@ -1314,12 +1369,6 @@ version = "0.4.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "4f4c707c6a209cbe82d10abd08e1ea8995e9ea937d2550646e02798948992be0" -[[package]] -name = "cesu8" -version = "1.1.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "6d43a04d8753f35258c91f8ec639f792891f748a1edbd759cf1dcea3382ad83c" - [[package]] name = "cexpr" version = "0.6.0" @@ -1606,10 +1655,11 @@ dependencies = [ [[package]] name = "comfy-table" -version = "7.2.2" +version = "7.1.4" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "958c5d6ecf1f214b4c2bbbbf6ab9523a864bd136dcf71a7e8904799acfe1ad47" +checksum = "4a65ebfec4fb190b6f90e944a817d60499ee0744e582530e2c9900a22e591d9a" dependencies = [ + "crossterm 0.28.1", "unicode-segmentation", "unicode-width 0.2.2", ] @@ -1652,9 +1702,9 @@ dependencies = [ [[package]] name = "compression-codecs" -version = "0.4.37" +version = "0.4.38" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "eb7b51a7d9c967fc26773061ba86150f19c50c0d65c887cb1fbe295fd16619b7" +checksum = "ce2548391e9c1929c21bf6aa2680af86fe4c1b33e6cea9ac1cfeec0bd11218cf" dependencies = [ "bzip2", "compression-core", @@ -1667,9 +1717,9 @@ dependencies = [ [[package]] name = "compression-core" -version = "0.4.31" +version = "0.4.32" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "75984efb6ed102a0d42db99afb6c1948f0380d1d91808d5529916e6c08b49d8d" +checksum = "cc14f565cf027a105f7a44ccf9e5b424348421a1d8952a8fc9d499d313107789" [[package]] name = "concurrent-queue" @@ -1910,6 +1960,19 @@ version = "0.8.21" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "d0a5c400df2834b80a4c3327b3aad3a4c4cd4de0629063962b03235697506a28" +[[package]] +name = "crossterm" +version = "0.28.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "829d955a0bb380ef178a640b91779e3987da38c9aea133b20614cfed8cdea9c6" +dependencies = [ + "bitflags", + "crossterm_winapi", + "parking_lot", + "rustix 0.38.44", + "winapi", +] + [[package]] name = "crossterm" version = "0.29.0" @@ -3550,6 +3613,17 @@ dependencies = [ "serde_core", ] +[[package]] +name = "derive_arbitrary" +version = "1.4.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1e567bd82dcff979e4b03460c307b3cdc9e96fde3d73bed1496d2bc75d9dd62a" +dependencies = [ + "proc-macro2", + "quote", + "syn 2.0.117", +] + [[package]] name = "derive_more" version = "2.1.1" @@ -3660,6 +3734,25 @@ version = "0.2.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "ab23e69df104e2fd85ee63a533a22d2132ef5975dc6b36f9f3e5a7305e4a8ed7" +[[package]] +name = "duckdb" +version = "1.10502.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0fdc796383b176dd5a45353fbb5e64583c0ee4da12cb62c9e510b785324b2488" +dependencies = [ + "arrow 58.1.0", + "cast", + "comfy-table", + "fallible-iterator", + "fallible-streaming-iterator", + "hashlink", + "libduckdb-sys", + "num", + "num-integer", + "rust_decimal", + "strum 0.27.2", +] + [[package]] name = "duckdb-bench" version = "0.1.0" @@ -3861,6 +3954,18 @@ dependencies = [ "ext-trait", ] +[[package]] +name = "fallible-iterator" +version = "0.3.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2acce4a10f12dc2fb14a218589d4f1f62ef011b2d0cc4b3cb1bba8e94da14649" + +[[package]] +name = "fallible-streaming-iterator" +version = "0.1.9" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7360491ce676a36bf9bb3c56c1aa791658183a54d2744120f27285738d90465a" + [[package]] name = "fast-float2" version = "0.2.3" @@ -4003,9 +4108,9 @@ checksum = "42703706b716c37f96a77aea830392ad231f44c9e9a67872fa5548707e11b11c" [[package]] name = "fsst" -version = "4.0.0" +version = "4.0.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "2195cc7f87e84bd695586137de99605e7e9579b26ec5e01b82960ddb4d0922f2" +checksum = "2b3a6f3550e61b999febd7168d462db953948eff4fc3448276b3d10d10324dbb" dependencies = [ "arrow-array 57.3.0", "rand 0.9.4", @@ -4356,6 +4461,15 @@ dependencies = [ "foldhash 0.2.0", ] +[[package]] +name = "hashlink" +version = "0.10.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7382cf6263419f2d8df38c55d7da83da5c18aef87fc7a7fc1fb1e344edfe14c1" +dependencies = [ + "hashbrown 0.15.5", +] + [[package]] name = "heck" version = "0.5.0" @@ -4428,6 +4542,12 @@ version = "1.10.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "6dbf3de79e51f3d586ab4cb9d5c3e2c14aa28ed23d180cf89b4df0454a69cc87" +[[package]] +name = "httpdate" +version = "1.0.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "df3b46402a9d5adb4c86a0cf463f42e19994e3ee891101b1841f30a545cb49a9" + [[package]] name = "humansize" version = "2.1.3" @@ -4445,9 +4565,9 @@ checksum = "135b12329e5e3ce057a9f972339ea52bc954fe1e9358ef27f95e89716fbc5424" [[package]] name = "hybrid-array" -version = "0.4.10" +version = "0.4.11" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "3944cf8cf766b40e2a1a333ee5e9b563f854d5fa49d6a8ca2764e97c6eddb214" +checksum = "08d46837a0ed51fe95bd3b05de33cd64a1ee88fc797477ca48446872504507c5" dependencies = [ "typenum", ] @@ -4466,6 +4586,7 @@ dependencies = [ "http", "http-body", "httparse", + "httpdate", "itoa", "pin-project-lite", "smallvec", @@ -4487,6 +4608,7 @@ dependencies = [ "tokio", "tokio-rustls", "tower-service", + "webpki-roots", ] [[package]] @@ -4852,9 +4974,9 @@ dependencies = [ [[package]] name = "jiff" -version = "0.2.23" +version = "0.2.24" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1a3546dc96b6d42c5f24902af9e2538e82e39ad350b0c766eb3fbf2d8f3d8359" +checksum = "f00b5dbd620d61dfdcb6007c9c1f6054ebd75319f163d886a9055cec1155073d" dependencies = [ "jiff-static", "jiff-tzdb-platform", @@ -4867,9 +4989,9 @@ dependencies = [ [[package]] name = "jiff-static" -version = "0.2.23" +version = "0.2.24" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "2a8c8b344124222efd714b73bb41f8b5120b27a7cc1c75593a6ff768d9d05aa4" +checksum = "e000de030ff8022ea1da3f466fbb0f3a809f5e51ed31f6dd931c35181ad8e6d7" dependencies = [ "proc-macro2", "quote", @@ -4891,22 +5013,6 @@ dependencies = [ "jiff-tzdb", ] -[[package]] -name = "jni" -version = "0.21.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1a87aa2bb7d2af34197c04845522473242e1aa17c12f4935d5856491a7fb8c97" -dependencies = [ - "cesu8", - "cfg-if", - "combine", - "jni-sys 0.3.1", - "log", - "thiserror 1.0.69", - "walkdir", - "windows-sys 0.45.0", -] - [[package]] name = "jni" version = "0.22.4" @@ -4917,7 +5023,7 @@ dependencies = [ "combine", "java-locator", "jni-macros", - "jni-sys 0.4.1", + "jni-sys", "libloading", "log", "simd_cesu8", @@ -4939,15 +5045,6 @@ dependencies = [ "syn 2.0.117", ] -[[package]] -name = "jni-sys" -version = "0.3.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "41a652e1f9b6e0275df1f15b32661cf0d4b78d4d87ddec5e0c3c20f097433258" -dependencies = [ - "jni-sys 0.4.1", -] - [[package]] name = "jni-sys" version = "0.4.1" @@ -5047,9 +5144,9 @@ checksum = "a4933f3f57a8e9d9da04db23fb153356ecaf00cbd14aee46279c33dc80925c37" [[package]] name = "lance" -version = "4.0.0" +version = "4.0.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "efe6c3ddd79cdfd2b7e1c23cafae52806906bc40fbd97de9e8cf2f8c7a75fc04" +checksum = "f63e285ceee2b4ca8eb3a8742266cc1ac8161599767a8ecb4d8c2f9fd43d8b29" dependencies = [ "arrow 57.3.0", "arrow-arith 57.3.0", @@ -5113,9 +5210,9 @@ dependencies = [ [[package]] name = "lance-arrow" -version = "4.0.0" +version = "4.0.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5d9f5d95bdda2a2b790f1fb8028b5b6dcf661abeb3133a8bca0f3d24b054af87" +checksum = "5c55e62fc04422ef4cd4af6f863ada32641ae23124f9b2e9c567a40d617e8c97" dependencies = [ "arrow-array 57.3.0", "arrow-buffer 57.3.0", @@ -5153,9 +5250,9 @@ dependencies = [ [[package]] name = "lance-bitpacking" -version = "4.0.0" +version = "4.0.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f827d6ab9f8f337a9509d5ad66a12f3314db8713868260521c344ef6135eb4e4" +checksum = "a48d232a2908645af0040f96c60a6387fea2df75e762d7033e93e17bb420c6a1" dependencies = [ "arrayref", "paste", @@ -5164,9 +5261,9 @@ dependencies = [ [[package]] name = "lance-core" -version = "4.0.0" +version = "4.0.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "0f1e25df6a79bf72ee6bcde0851f19b1cd36c5848c1b7db83340882d3c9fdecb" +checksum = "ce071baaff88fcdcf67f1dd0af54e17656f52ae75aaeb75f25f9cf4da29241f2" dependencies = [ "arrow-array 57.3.0", "arrow-buffer 57.3.0", @@ -5203,9 +5300,9 @@ dependencies = [ [[package]] name = "lance-datafusion" -version = "4.0.0" +version = "4.0.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "93146de8ae720cb90edef81c2f2d0a1b065fc2f23ecff2419546f389b0fa70a4" +checksum = "11ebc97ee94fa8e1af6fd0520066c7e7e0eab38a100e750ba9aabad644c5aa57" dependencies = [ "arrow 57.3.0", "arrow-array 57.3.0", @@ -5235,9 +5332,9 @@ dependencies = [ [[package]] name = "lance-datagen" -version = "4.0.0" +version = "4.0.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ccec8ce4d8e0a87a99c431dab2364398029f2ffb649c1a693c60c79e05ed30dd" +checksum = "9b90dbb2829875b3a3d00f88fd3a3e39a9e4c7d34c266f67da6550fcda54c76e" dependencies = [ "arrow 57.3.0", "arrow-array 57.3.0", @@ -5255,9 +5352,9 @@ dependencies = [ [[package]] name = "lance-encoding" -version = "4.0.0" +version = "4.0.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5c1aec0bbbac6bce829bc10f1ba066258126100596c375fb71908ecf11c2c2a5" +checksum = "65ec429cc2e18ad1b7e43cc7ec57a2f2e49229cfbd934da45e619751a886b8cd" dependencies = [ "arrow-arith 57.3.0", "arrow-array 57.3.0", @@ -5294,9 +5391,9 @@ dependencies = [ [[package]] name = "lance-file" -version = "4.0.0" +version = "4.0.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "14a8c548804f5b17486dc2d3282356ed1957095a852780283bc401fdd69e9075" +checksum = "418afe3f82487615fa09222b95a4b5853103f3f0425996d24a537ca750381f83" dependencies = [ "arrow-arith 57.3.0", "arrow-array 57.3.0", @@ -5328,9 +5425,9 @@ dependencies = [ [[package]] name = "lance-index" -version = "4.0.0" +version = "4.0.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "2da212f0090ea59f79ac3686660f596520c167fe1cb5f408900cf71d215f0e03" +checksum = "936b3deeb6ee075646d18f27b01cf2d2e846c3f5f6c5fa45b30aa41dd5b4c4e2" dependencies = [ "arrow 57.3.0", "arrow-arith 57.3.0", @@ -5394,9 +5491,9 @@ dependencies = [ [[package]] name = "lance-io" -version = "4.0.0" +version = "4.0.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "41d958eb4b56f03bbe0f5f85eb2b4e9657882812297b6f711f201ffc995f259f" +checksum = "4103e4cebe146af15bfb198c8142d6ea37d5b25fa04158bf2d9be4597bf174d3" dependencies = [ "arrow 57.3.0", "arrow-arith 57.3.0", @@ -5433,9 +5530,9 @@ dependencies = [ [[package]] name = "lance-linalg" -version = "4.0.0" +version = "4.0.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "0285b70da35def7ed95e150fae1d5308089554e1290470403ed3c50cb235bc5e" +checksum = "c00c7ad71eca93635404519e77add6689947c9342134bb2133578f81249bf809" dependencies = [ "arrow-array 57.3.0", "arrow-buffer 57.3.0", @@ -5451,9 +5548,9 @@ dependencies = [ [[package]] name = "lance-namespace" -version = "4.0.0" +version = "4.0.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5f78e2a828b654e062a495462c6e3eb4fcf0e7e907d761b8f217fc09ccd3ceac" +checksum = "e0c59a574e72a4b72da8096bcaaa1b1e5b44f6a83da164cc714c286fab30c369" dependencies = [ "arrow 57.3.0", "async-trait", @@ -5479,9 +5576,9 @@ dependencies = [ [[package]] name = "lance-table" -version = "4.0.0" +version = "4.0.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "3df9c4adca3eb2074b3850432a9fb34248a3d90c3d6427d158b13ff9355664ee" +checksum = "943b9c503f23ebab9e0dbee356f528bc4cbcafded87a6848451f205b0bb473d7" dependencies = [ "arrow 57.3.0", "arrow-array 57.3.0", @@ -5634,9 +5731,26 @@ checksum = "b3a6a8c165077efc8f3a971534c50ea6a1a18b329ef4a66e897a7e3a1494565f" [[package]] name = "libc" -version = "0.2.185" +version = "0.2.186" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "52ff2c0fe9bc6cb6b14a0592c2ff4fa9ceb83eea9db979b0487cd054946a2b8f" +checksum = "68ab91017fe16c622486840e4c83c9a37afeff978bd239b5293d61ece587de66" + +[[package]] +name = "libduckdb-sys" +version = "1.10502.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8d7401630ae2abcff642f7156294289e50f2d222e061c026ad797b01bf20c215" +dependencies = [ + "cc", + "flate2", + "pkg-config", + "reqwest 0.12.28", + "serde", + "serde_json", + "tar", + "vcpkg", + "zip 6.0.0", +] [[package]] name = "libfuzzer-sys" @@ -5888,6 +6002,12 @@ dependencies = [ "regex-automata", ] +[[package]] +name = "matchit" +version = "0.8.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "47e1ffaa40ddd1f3ed91f717a33c8c0ee23fff369e3aa8772b9605cc1d22f4c3" + [[package]] name = "matrixmultiply" version = "0.3.10" @@ -5901,6 +6021,30 @@ dependencies = [ "thread-tree", ] +[[package]] +name = "maud" +version = "0.27.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8156733e27020ea5c684db5beac5d1d611e1272ab17901a49466294b84fc217e" +dependencies = [ + "axum-core", + "http", + "itoa", + "maud_macros", +] + +[[package]] +name = "maud_macros" +version = "0.27.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7261b00f3952f617899bc012e3dbd56e4f0110a038175929fa5d18e5a19913ca" +dependencies = [ + "proc-macro2", + "proc-macro2-diagnostics", + "quote", + "syn 2.0.117", +] + [[package]] name = "md-5" version = "0.10.6" @@ -6211,6 +6355,20 @@ dependencies = [ "windows-sys 0.61.2", ] +[[package]] +name = "num" +version = "0.4.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "35bd024e8b2ff75562e5f34e7f4905839deb4b22955ef5e73d2fea1b9813cb23" +dependencies = [ + "num-bigint", + "num-complex", + "num-integer", + "num-iter", + "num-rational", + "num-traits", +] + [[package]] name = "num-bigint" version = "0.4.6" @@ -6246,6 +6404,28 @@ dependencies = [ "num-traits", ] +[[package]] +name = "num-iter" +version = "0.1.45" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1429034a0490724d0075ebb2bc9e875d6503c3cf69e235a8941aa757d83ef5bf" +dependencies = [ + "autocfg", + "num-integer", + "num-traits", +] + +[[package]] +name = "num-rational" +version = "0.4.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f83d14da390562dca69fc84082e73e548e1ad308d24accdedd2720017cb37824" +dependencies = [ + "num-bigint", + "num-integer", + "num-traits", +] + [[package]] name = "num-traits" version = "0.2.19" @@ -7054,6 +7234,18 @@ dependencies = [ "unicode-ident", ] +[[package]] +name = "proc-macro2-diagnostics" +version = "0.10.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "af066a9c399a26e020ada66a034357a868728e72cd426f3adcd35f80d88d88c8" +dependencies = [ + "proc-macro2", + "quote", + "syn 2.0.117", + "version_check", +] + [[package]] name = "prost" version = "0.12.6" @@ -7585,7 +7777,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "577c9b9f652b4c121fb25c6a391dd06406d3b092ba68827e6d2f09550edc54b3" dependencies = [ "cfg-if", - "crossterm", + "crossterm 0.29.0", "instability", "ratatui-core", ] @@ -7824,13 +8016,14 @@ dependencies = [ "wasm-bindgen-futures", "wasm-streams 0.4.2", "web-sys", + "webpki-roots", ] [[package]] name = "reqwest" -version = "0.13.2" +version = "0.13.3" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ab3f43e3283ab1488b624b44b0e988d0acea0b3214e694730a055cb6b2efa801" +checksum = "62e0021ea2c22aed41653bc7e1419abb2c97e038ff2c33d0e1309e49a97deec0" dependencies = [ "base64", "bytes", @@ -7854,6 +8047,8 @@ dependencies = [ "rustls", "rustls-pki-types", "rustls-platform-verifier", + "serde", + "serde_json", "sync_wrapper", "tokio", "tokio-rustls", @@ -7913,9 +8108,9 @@ dependencies = [ [[package]] name = "roaring" -version = "0.11.3" +version = "0.11.4" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "8ba9ce64a8f45d7fc86358410bb1a82e8c987504c0d4900e9141d69a9f26c885" +checksum = "1dedc5658c6ecb3bdb5ef5f3295bb9253f42dcf3fd1402c03f6b1f7659c3c4a9" dependencies = [ "bytemuck", "byteorder", @@ -8047,9 +8242,9 @@ dependencies = [ [[package]] name = "rustls" -version = "0.23.38" +version = "0.23.39" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "69f9466fb2c14ea04357e91413efb882e2a6d4a406e625449bc0a5d360d53a21" +checksum = "7c2c118cb077cca2822033836dfb1b975355dfb784b5e8da48f7b6c5db74e60e" dependencies = [ "aws-lc-rs", "once_cell", @@ -8074,9 +8269,9 @@ dependencies = [ [[package]] name = "rustls-pki-types" -version = "1.14.0" +version = "1.14.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "be040f8b0a225e40375822a563fa9524378b9d63112f53e19ffff34df5d33fdd" +checksum = "30a7197ae7eb376e574fe940d068c30fe0462554a3ddbe4eca7838e049c937a9" dependencies = [ "web-time", "zeroize", @@ -8084,13 +8279,13 @@ dependencies = [ [[package]] name = "rustls-platform-verifier" -version = "0.6.2" +version = "0.7.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1d99feebc72bae7ab76ba994bb5e121b8d83d910ca40b36e0921f53becc41784" +checksum = "26d1e2536ce4f35f4846aa13bff16bd0ff40157cdb14cc056c7b14ba41233ba0" dependencies = [ "core-foundation 0.10.1", "core-foundation-sys", - "jni 0.21.1", + "jni", "log", "once_cell", "rustls", @@ -8329,6 +8524,17 @@ dependencies = [ "zmij", ] +[[package]] +name = "serde_path_to_error" +version = "0.1.20" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "10a9ff822e371bb5403e391ecd83e182e0e77ba7f6fe0160b795797109d1b457" +dependencies = [ + "itoa", + "serde", + "serde_core", +] + [[package]] name = "serde_repr" version = "0.1.20" @@ -9228,7 +9434,7 @@ dependencies = [ "chrono", "num_cpus", "ping", - "reqwest 0.13.2", + "reqwest 0.13.3", "sysinfo", "test-with-derive", "uzers", @@ -9249,7 +9455,7 @@ dependencies = [ "proc-macro2", "quote", "regex", - "reqwest 0.13.2", + "reqwest 0.13.3", "syn 2.0.117", "sysinfo", "uzers", @@ -9590,6 +9796,7 @@ dependencies = [ "tokio", "tower-layer", "tower-service", + "tracing", ] [[package]] @@ -9613,6 +9820,7 @@ dependencies = [ "tower", "tower-layer", "tower-service", + "tracing", ] [[package]] @@ -9948,6 +10156,12 @@ version = "0.1.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "ba73ea9cf16a25df0c8caa16c51acb937d5712a8429db78a3ee29d5dcacd3a65" +[[package]] +name = "vcpkg" +version = "0.2.15" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "accd4ea62f7bb7a82fe23066fb0957d48ef677f6eeb8215f372f52e48bb32426" + [[package]] name = "vector-search-bench" version = "0.1.0" @@ -10138,6 +10352,7 @@ dependencies = [ "glob", "humansize", "indicatif", + "insta", "itertools 0.14.0", "mimalloc", "noodles-bgzf", @@ -10146,7 +10361,7 @@ dependencies = [ "parquet 58.1.0", "rand 0.10.1", "regex", - "reqwest 0.13.2", + "reqwest 0.13.3", "serde", "serde_json", "sysinfo", @@ -10166,6 +10381,53 @@ dependencies = [ "vortex-tensor", ] +[[package]] +name = "vortex-bench-migrate" +version = "0.1.0-alpha.0" +dependencies = [ + "anyhow", + "arrow-array 58.1.0", + "arrow-buffer 58.1.0", + "arrow-schema 58.1.0", + "clap", + "duckdb", + "flate2", + "reqwest 0.13.3", + "rstest", + "serde", + "serde_json", + "tempfile", + "tokio", + "tracing", + "tracing-subscriber", + "vortex-bench-server", + "vortex-utils", +] + +[[package]] +name = "vortex-bench-server" +version = "0.1.0-alpha.0" +dependencies = [ + "anyhow", + "axum", + "base64", + "duckdb", + "insta", + "maud", + "reqwest 0.13.3", + "serde", + "serde_json", + "subtle", + "tempfile", + "thiserror 2.0.18", + "tokio", + "tower", + "tower-http", + "tracing", + "tracing-subscriber", + "twox-hash", +] + [[package]] name = "vortex-btrblocks" version = "0.1.0" @@ -10242,7 +10504,7 @@ dependencies = [ "clap", "futures", "parquet 58.1.0", - "reqwest 0.13.2", + "reqwest 0.13.3", "serde", "serde_json", "sha2 0.11.0", @@ -10424,7 +10686,7 @@ dependencies = [ "object_store 0.13.2", "parking_lot", "paste", - "reqwest 0.13.2", + "reqwest 0.13.3", "rstest", "tempfile", "tracing", @@ -10434,7 +10696,7 @@ dependencies = [ "vortex-runend", "vortex-sequence", "vortex-utils", - "zip", + "zip 8.6.0", ] [[package]] @@ -10646,7 +10908,7 @@ dependencies = [ "arrow-array 58.1.0", "arrow-schema 58.1.0", "futures", - "jni 0.22.4", + "jni", "object_store 0.13.2", "parking_lot", "thiserror 2.0.18", @@ -10726,7 +10988,7 @@ dependencies = [ "bindgen", "libloading", "liblzma", - "reqwest 0.13.2", + "reqwest 0.13.3", "tar", "vortex-cuda-macros", ] @@ -10937,7 +11199,7 @@ dependencies = [ "arrow-schema 58.1.0", "clap", "console_error_panic_hook", - "crossterm", + "crossterm 0.29.0", "datafusion 53.1.0", "env_logger", "flatbuffers", @@ -11070,6 +11332,7 @@ dependencies = [ "cfg-if", "once_cell", "rustversion", + "serde", "wasm-bindgen-macro", "wasm-bindgen-shared", ] @@ -11205,6 +11468,15 @@ dependencies = [ "rustls-pki-types", ] +[[package]] +name = "webpki-roots" +version = "1.0.7" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "52f5ee44c96cf55f1b349600768e3ece3a8f26010c05265ab73f945bb1a2eb9d" +dependencies = [ + "rustls-pki-types", +] + [[package]] name = "which" version = "8.0.2" @@ -11357,15 +11629,6 @@ dependencies = [ "windows-link", ] -[[package]] -name = "windows-sys" -version = "0.45.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "75283be5efb2831d37ea142365f009c02ec203cd29a3ebecbc093d52315b66d0" -dependencies = [ - "windows-targets 0.42.2", -] - [[package]] name = "windows-sys" version = "0.52.0" @@ -11402,21 +11665,6 @@ dependencies = [ "windows-link", ] -[[package]] -name = "windows-targets" -version = "0.42.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "8e5180c00cd44c9b1c88adb3693291f1cd93605ded80c250a75d472756b4d071" -dependencies = [ - "windows_aarch64_gnullvm 0.42.2", - "windows_aarch64_msvc 0.42.2", - "windows_i686_gnu 0.42.2", - "windows_i686_msvc 0.42.2", - "windows_x86_64_gnu 0.42.2", - "windows_x86_64_gnullvm 0.42.2", - "windows_x86_64_msvc 0.42.2", -] - [[package]] name = "windows-targets" version = "0.52.6" @@ -11459,12 +11707,6 @@ dependencies = [ "windows-link", ] -[[package]] -name = "windows_aarch64_gnullvm" -version = "0.42.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "597a5118570b68bc08d8d59125332c54f1ba9d9adeedeef5b99b02ba2b0698f8" - [[package]] name = "windows_aarch64_gnullvm" version = "0.52.6" @@ -11477,12 +11719,6 @@ version = "0.53.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "a9d8416fa8b42f5c947f8482c43e7d89e73a173cead56d044f6a56104a6d1b53" -[[package]] -name = "windows_aarch64_msvc" -version = "0.42.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e08e8864a60f06ef0d0ff4ba04124db8b0fb3be5776a5cd47641e942e58c4d43" - [[package]] name = "windows_aarch64_msvc" version = "0.52.6" @@ -11495,12 +11731,6 @@ version = "0.53.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "b9d782e804c2f632e395708e99a94275910eb9100b2114651e04744e9b125006" -[[package]] -name = "windows_i686_gnu" -version = "0.42.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c61d927d8da41da96a81f029489353e68739737d3beca43145c8afec9a31a84f" - [[package]] name = "windows_i686_gnu" version = "0.52.6" @@ -11525,12 +11755,6 @@ version = "0.53.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "fa7359d10048f68ab8b09fa71c3daccfb0e9b559aed648a8f95469c27057180c" -[[package]] -name = "windows_i686_msvc" -version = "0.42.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "44d840b6ec649f480a41c8d80f9c65108b92d89345dd94027bfe06ac444d1060" - [[package]] name = "windows_i686_msvc" version = "0.52.6" @@ -11543,12 +11767,6 @@ version = "0.53.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "1e7ac75179f18232fe9c285163565a57ef8d3c89254a30685b57d83a38d326c2" -[[package]] -name = "windows_x86_64_gnu" -version = "0.42.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "8de912b8b8feb55c064867cf047dda097f92d51efad5b491dfb98f6bbb70cb36" - [[package]] name = "windows_x86_64_gnu" version = "0.52.6" @@ -11561,12 +11779,6 @@ version = "0.53.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "9c3842cdd74a865a8066ab39c8a7a473c0778a3f29370b5fd6b4b9aa7df4a499" -[[package]] -name = "windows_x86_64_gnullvm" -version = "0.42.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "26d41b46a36d453748aedef1486d5c7a85db22e56aff34643984ea85514e94a3" - [[package]] name = "windows_x86_64_gnullvm" version = "0.52.6" @@ -11579,12 +11791,6 @@ version = "0.53.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "0ffa179e2d07eee8ad8f57493436566c7cc30ac536a3379fdf008f47f6bb7ae1" -[[package]] -name = "windows_x86_64_msvc" -version = "0.42.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9aec5da331524158c6d1a4ac0ab1541149c0b9505fde06423b02f5ef0106b9f0" - [[package]] name = "windows_x86_64_msvc" version = "0.52.6" @@ -11882,6 +12088,20 @@ dependencies = [ "num-traits", ] +[[package]] +name = "zip" +version = "6.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "eb2a05c7c36fde6c09b08576c9f7fb4cda705990f73b58fe011abf7dfb24168b" +dependencies = [ + "arbitrary", + "crc32fast", + "flate2", + "indexmap", + "memchr", + "zopfli", +] + [[package]] name = "zip" version = "8.6.0" diff --git a/Cargo.toml b/Cargo.toml index 4ddcfbe3d43..d56fc893658 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -60,6 +60,9 @@ members = [ "benchmarks/duckdb-bench", "benchmarks/random-access-bench", "benchmarks/vector-search-bench", + # Benchmarks website v3 (alpha) - leaf binary, not part of vortex-* API + "benchmarks-website/server", + "benchmarks-website/migrate", ] exclude = ["java/testfiles", "wasm-test"] resolver = "2" diff --git a/REUSE.toml b/REUSE.toml index 161f6e3086a..8e406c95c90 100644 --- a/REUSE.toml +++ b/REUSE.toml @@ -36,7 +36,7 @@ SPDX-FileCopyrightText = "Copyright the Vortex contributors" SPDX-License-Identifier = "CC-BY-4.0" [[annotations]] -path = ["**/.gitignore", ".gitmodules", ".python-version", "**/*.lock", "**/*.lockfile", "**/*.toml", "**/*.json", ".idea/**", ".github/**", "codecov.yml", "java/gradle/wrapper/gradle-wrapper.properties"] +path = ["**/.gitignore", ".gitmodules", ".python-version", "**/*.lock", "**/*.lockfile", "**/*.toml", "**/*.json", ".idea/**", ".github/**", "codecov.yml", "java/gradle/wrapper/gradle-wrapper.properties", "**.duckdb*"] precedence = "override" SPDX-FileCopyrightText = "Copyright the Vortex contributors" SPDX-License-Identifier = "Apache-2.0" diff --git a/_typos.toml b/_typos.toml index 62c3b0d6358..2b9bc571e76 100644 --- a/_typos.toml +++ b/_typos.toml @@ -8,7 +8,7 @@ extend-ignore-re = [ ] [files] -extend-exclude = ["/vortex-bench/**", "/docs/references.bib", "benchmarks/**", "vortex-sqllogictest/slt/**", "encodings/fsst/src/dfa/tests.rs", "encodings/fsst/src/dfa/flat_contains.rs"] +extend-exclude = ["/vortex-bench/**", "/docs/references.bib", "benchmarks/**", "vortex-sqllogictest/slt/**", "encodings/fsst/src/dfa/tests.rs", "encodings/fsst/src/dfa/flat_contains.rs", "benchmarks-website/server/static/**"] [type.py] extend-ignore-identifiers-re = [ diff --git a/bench-orchestrator/bench_orchestrator/cli.py b/bench-orchestrator/bench_orchestrator/cli.py index d497d85ed13..6c200015182 100644 --- a/bench-orchestrator/bench_orchestrator/cli.py +++ b/bench-orchestrator/bench_orchestrator/cli.py @@ -210,6 +210,10 @@ def run( Path | None, typer.Option("--output", help="Optional path for compatibility JSONL output"), ] = None, + gh_json_v3: Annotated[ + Path | None, + typer.Option("--gh-json-v3", help="Optional path for v3 JSONL records emitted by the benchmark binary"), + ] = None, options: Annotated[list[str] | None, typer.Option("--opt", help="Engine or benchmark specific options")] = None, ) -> None: """Run benchmarks with specified configuration.""" @@ -294,6 +298,7 @@ def run( sample_rate=sample_rate, tracing=tracing, runner=runner, + gh_json_v3=gh_json_v3, on_result=lambda line, store_writer=ctx.write_raw_json, compatibility=compatibility_file: ( write_result_line( line, diff --git a/bench-orchestrator/bench_orchestrator/runner/executor.py b/bench-orchestrator/bench_orchestrator/runner/executor.py index b895afdc2e1..32ed9c91132 100644 --- a/bench-orchestrator/bench_orchestrator/runner/executor.py +++ b/bench-orchestrator/bench_orchestrator/runner/executor.py @@ -40,6 +40,7 @@ def build_command( sample_rate: int | None = None, tracing: bool = False, runner: str | None = None, + gh_json_v3: Path | None = None, ) -> list[str]: """Build the command used to execute a benchmark binary.""" cmd = [ @@ -67,6 +68,8 @@ def build_command( cmd.append("--tracing") if runner: cmd.extend(["--runner", runner]) + if gh_json_v3 is not None: + cmd.extend(["--gh-json-v3", str(gh_json_v3)]) if options: for key, value in options.items(): cmd.extend(["--opt", f"{key}={value}"]) @@ -98,6 +101,7 @@ def run( sample_rate: int | None = None, tracing: bool = False, runner: str | None = None, + gh_json_v3: Path | None = None, on_result: Callable[[str], None] | None = None, ) -> list[str]: """ @@ -128,6 +132,7 @@ def run( sample_rate=sample_rate, tracing=tracing, runner=runner, + gh_json_v3=gh_json_v3, ) if self.verbose: diff --git a/bench-orchestrator/tests/test_executor.py b/bench-orchestrator/tests/test_executor.py index ade3dde1a67..dd3253a22ff 100644 --- a/bench-orchestrator/tests/test_executor.py +++ b/bench-orchestrator/tests/test_executor.py @@ -48,6 +48,31 @@ def test_build_command_omits_formats_for_lance_backend() -> None: assert "1,3" in cmd +def test_build_command_includes_gh_json_v3_when_set() -> None: + executor = BenchmarkExecutor(Path("/tmp/duckdb-bench"), Engine.DUCKDB) + + cmd = executor.build_command( + benchmark=Benchmark.TPCH, + formats=[Format.PARQUET], + gh_json_v3=Path("results.v3.jsonl"), + ) + + assert "--gh-json-v3" in cmd + flag_idx = cmd.index("--gh-json-v3") + assert cmd[flag_idx + 1] == "results.v3.jsonl" + + +def test_build_command_omits_gh_json_v3_when_unset() -> None: + executor = BenchmarkExecutor(Path("/tmp/duckdb-bench"), Engine.DUCKDB) + + cmd = executor.build_command( + benchmark=Benchmark.TPCH, + formats=[Format.PARQUET], + ) + + assert "--gh-json-v3" not in cmd + + def test_run_streams_logs_without_counting_them(tmp_path: Path) -> None: script = tmp_path / "fake-bench.py" script.write_text( diff --git a/benchmarks-website/Dockerfile b/benchmarks-website/Dockerfile deleted file mode 100644 index 1f87a7148b5..00000000000 --- a/benchmarks-website/Dockerfile +++ /dev/null @@ -1,16 +0,0 @@ -FROM node:24-alpine AS build -WORKDIR /app -COPY package.json package-lock.json ./ -RUN npm ci -COPY . . -RUN npm run build - -FROM node:24-alpine -WORKDIR /app -COPY package.json package-lock.json ./ -RUN npm ci --omit=dev -COPY --from=build /app/dist ./dist -COPY server.js . -COPY src/config.js ./src/config.js -EXPOSE 3000 -CMD ["node", "server.js"] diff --git a/benchmarks-website/docker-compose.yml b/benchmarks-website/docker-compose.yml deleted file mode 100644 index 4c2e9682329..00000000000 --- a/benchmarks-website/docker-compose.yml +++ /dev/null @@ -1,15 +0,0 @@ -services: - benchmarks-website: - image: ghcr.io/vortex-data/vortex/benchmarks-website:latest - ports: - - "80:3000" - restart: unless-stopped - - watchtower: - image: containrrr/watchtower - volumes: - - /var/run/docker.sock:/var/run/docker.sock - environment: - - WATCHTOWER_POLL_INTERVAL=60 - - WATCHTOWER_CLEANUP=true - restart: unless-stopped diff --git a/benchmarks-website/ec2-init.txt b/benchmarks-website/ec2-init.txt deleted file mode 100644 index 1c2459b3bee..00000000000 --- a/benchmarks-website/ec2-init.txt +++ /dev/null @@ -1,17 +0,0 @@ - 1. Install Docker - # Amazon Linux 2023 - sudo yum install -y docker - sudo systemctl enable --now docker - sudo usermod -aG docker $USER - newgrp docker - - 2. Install Docker Compose plugin - sudo mkdir -p /usr/local/lib/docker/cli-plugins - sudo curl -SL https://github.com/docker/compose/releases/latest/download/docker-compose-linux-aarch64 -o /usr/local/lib/docker/cli-plugins/docker-compose - sudo chmod +x /usr/local/lib/docker/cli-plugins/docker-compose - - 3. Set up and start the app - sudo mkdir -p /opt/benchmarks-website - sudo cp docker-compose.yml /opt/benchmarks-website/ - cd /opt/benchmarks-website - docker compose up -d \ No newline at end of file diff --git a/benchmarks-website/index.html b/benchmarks-website/index.html deleted file mode 100644 index e475f3ad254..00000000000 --- a/benchmarks-website/index.html +++ /dev/null @@ -1,36 +0,0 @@ - - - - - - Vortex Benchmarks - - - - - - - - - - - - - - - - - -
- - - diff --git a/benchmarks-website/migrate/Cargo.toml b/benchmarks-website/migrate/Cargo.toml new file mode 100644 index 00000000000..45a752df397 --- /dev/null +++ b/benchmarks-website/migrate/Cargo.toml @@ -0,0 +1,41 @@ +# SPDX-License-Identifier: Apache-2.0 +# SPDX-FileCopyrightText: Copyright the Vortex contributors + +[package] +name = "vortex-bench-migrate" +version = "0.1.0-alpha.0" +edition = "2024" +rust-version = "1.91.0" +license = "Apache-2.0" +description = "One-shot historical migrator from the v2 benchmarks S3 dataset to a v3 DuckDB file" +publish = false + +[[bin]] +name = "vortex-bench-migrate" +path = "src/main.rs" + +# Throwaway binary, not part of the vortex-* public API surface. +# Errors use anyhow, and the crate is intentionally outside the +# workspace public-api lockfile set. + +[dependencies] +anyhow = { workspace = true } +arrow-array = { workspace = true } +arrow-buffer = { workspace = true } +arrow-schema = { workspace = true } +clap = { workspace = true, features = ["derive"] } +# track vortex-duckdb's bundled engine version (build.rs) +duckdb = { version = "1.10502", features = ["bundled", "appender-arrow"] } +flate2 = "1.1" +reqwest = { workspace = true, features = ["json"] } +serde = { workspace = true, features = ["derive"] } +serde_json = { workspace = true } +tokio = { workspace = true, features = ["rt-multi-thread", "macros"] } +tracing = { workspace = true, features = ["std"] } +tracing-subscriber = { workspace = true, features = ["env-filter", "fmt"] } +vortex-bench-server = { path = "../server" } +vortex-utils = { workspace = true } + +[dev-dependencies] +rstest = { workspace = true } +tempfile = { workspace = true } diff --git a/benchmarks-website/migrate/src/classifier.rs b/benchmarks-website/migrate/src/classifier.rs new file mode 100644 index 00000000000..dfbdb75705b --- /dev/null +++ b/benchmarks-website/migrate/src/classifier.rs @@ -0,0 +1,818 @@ +// SPDX-License-Identifier: Apache-2.0 +// SPDX-FileCopyrightText: Copyright the Vortex contributors + +//! Bug-for-bug port of v2's `getGroup`, `formatQuery`, and +//! `normalizeChartName` from the v2 Express server, plus the +//! mapping from v2 group + name pattern to a v3 fact-table bin. +//! +//! The v2 classifier was the source of truth for what historical +//! records mean. It groups records by name prefix into one of: +//! "Random Access", "Compression", "Compression Size", or one of the +//! SQL query suites (with optional fan-out by storage and scale +//! factor for TPC-H/TPC-DS). This module reproduces that logic and +//! then hops to a v3 fact-table bin, since v3 stores dim values as +//! columns instead of name fragments. +//! +//! Engine and format strings stored in v3 columns are pulled from the +//! raw, pre-rename v2 record name. v2's `ENGINE_RENAMES` was a v2 +//! read-time UI concern (e.g. `vortex-file-compressed` rendered as +//! `vortex` and `parquet-tokio-local-disk` rendered as `parquet-nvme`). +//! v3 stores canonical `Format::name()` strings to match what the v3 +//! live emitter writes, so historical and live records share series. + +use crate::v2::V2Record; +use crate::v2::dataset_scale_factor; + +/// Static port of v2's `QUERY_SUITES`. +pub const QUERY_SUITES: &[QuerySuite] = &[ + QuerySuite { + prefix: "clickbench", + display_name: "Clickbench", + query_prefix: "CLICKBENCH", + dataset_key: None, + fan_out: false, + skip: false, + }, + QuerySuite { + prefix: "statpopgen", + display_name: "Statistical and Population Genetics", + query_prefix: "STATPOPGEN", + dataset_key: None, + fan_out: false, + skip: false, + }, + QuerySuite { + prefix: "polarsignals", + display_name: "PolarSignals Profiling", + query_prefix: "POLARSIGNALS", + dataset_key: None, + fan_out: false, + skip: false, + }, + QuerySuite { + prefix: "gharchive", + display_name: "GhArchive", + query_prefix: "GHARCHIVE", + dataset_key: None, + fan_out: false, + skip: false, + }, + QuerySuite { + prefix: "tpch", + display_name: "TPC-H", + query_prefix: "TPC-H", + dataset_key: Some("tpch"), + fan_out: true, + skip: false, + }, + QuerySuite { + prefix: "tpcds", + display_name: "TPC-DS", + query_prefix: "TPC-DS", + dataset_key: Some("tpcds"), + fan_out: true, + skip: false, + }, + QuerySuite { + prefix: "fineweb", + display_name: "Fineweb", + query_prefix: "FINEWEB", + dataset_key: None, + fan_out: false, + skip: false, + }, +]; + +/// Static port of v2's `ENGINE_RENAMES`. Applied to the "series" half +/// of a benchmark name (the part after the first `/`) before splitting +/// on `:` into engine/format. Order doesn't matter — keys are unique. +const ENGINE_RENAMES: &[(&str, &str)] = &[ + ("datafusion:vortex-file-compressed", "datafusion:vortex"), + ("datafusion:parquet", "datafusion:parquet"), + ("datafusion:arrow", "datafusion:in-memory-arrow"), + ("datafusion:lance", "datafusion:lance"), + ("datafusion:vortex-compact", "datafusion:vortex-compact"), + ("duckdb:vortex-file-compressed", "duckdb:vortex"), + ("duckdb:parquet", "duckdb:parquet"), + ("duckdb:duckdb", "duckdb:duckdb"), + ("duckdb:vortex-compact", "duckdb:vortex-compact"), + ("vortex-tokio-local-disk", "vortex-nvme"), + ("vortex-compact-tokio-local-disk", "vortex-compact-nvme"), + ("lance-tokio-local-disk", "lance-nvme"), + ("parquet-tokio-local-disk", "parquet-nvme"), + ("lance", "lance"), +]; + +/// One entry of `QUERY_SUITES`. +#[derive(Debug, Clone, Copy)] +pub struct QuerySuite { + pub prefix: &'static str, + pub display_name: &'static str, + pub query_prefix: &'static str, + pub dataset_key: Option<&'static str>, + pub fan_out: bool, + pub skip: bool, +} + +/// Group a v2 record falls into. Mirrors v2's `getGroup`, +/// including the fan-out group naming for TPC-H/TPC-DS. +#[derive(Debug, Clone, PartialEq, Eq)] +pub enum V2Group { + RandomAccess, + Compression, + CompressionSize, + Query { + suite_index: usize, + /// `Some` for fan-out suites only. + storage: Option, + /// `Some` for fan-out suites only. + scale_factor: Option, + }, +} + +impl V2Group { + /// Display name as v2 served it from `/api/metadata`. + pub fn display_name(&self) -> String { + match self { + V2Group::RandomAccess => "Random Access".into(), + V2Group::Compression => "Compression".into(), + V2Group::CompressionSize => "Compression Size".into(), + V2Group::Query { + suite_index, + storage, + scale_factor, + } => { + let suite = &QUERY_SUITES[*suite_index]; + if let (Some(storage), Some(sf)) = (storage, scale_factor) { + format!("{} ({}) (SF={})", suite.display_name, storage, sf) + } else { + suite.display_name.to_string() + } + } + } + } +} + +/// Apply v2's `ENGINE_RENAMES`. Reproduces the JS `rename`: +/// `RENAMES[s.toLowerCase()] || RENAMES[s] || s`. +pub fn rename_engine(s: &str) -> String { + let lower = s.to_lowercase(); + for (k, v) in ENGINE_RENAMES { + if *k == lower { + return (*v).to_string(); + } + } + for (k, v) in ENGINE_RENAMES { + if *k == s { + return (*v).to_string(); + } + } + s.to_string() +} + +/// Faithful port of v2's `formatQuery`: maps `clickbench_q07` → +/// `"CLICKBENCH Q7"`. Returns the original (uppercased, +/// `-` and `_` replaced with spaces) when no suite matches. +pub fn format_query(q: &str) -> String { + let lower = q.to_lowercase(); + for suite in QUERY_SUITES { + if suite.skip { + continue; + } + let prefix = suite.prefix; + if let Some(rest) = lower.strip_prefix(prefix) + && let Some(idx) = parse_query_index(rest) + { + return format!("{} Q{}", suite.query_prefix, idx); + } + } + let mut out = q.to_uppercase(); + out = out.replace(['_', '-'], " "); + out +} + +/// Parse the `_q07` / ` q7` / `q42` tail used by `format_query`. +/// Returns the integer query index if the tail matches the v2 regex +/// `^[_ ]?q(\d+)`. +fn parse_query_index(rest: &str) -> Option { + let after_sep = rest + .strip_prefix('_') + .or_else(|| rest.strip_prefix(' ')) + .unwrap_or(rest); + let after_q = after_sep + .strip_prefix('q') + .or_else(|| after_sep.strip_prefix('Q'))?; + let digits: String = after_q.chars().take_while(|c| c.is_ascii_digit()).collect(); + if digits.is_empty() { + return None; + } + digits.parse().ok() +} + +/// Faithful port of v2's `normalizeChartName`. +pub fn normalize_chart_name(group: &V2Group, chart_name: &str) -> String { + if matches!(group, V2Group::CompressionSize) && chart_name == "VORTEX FILE COMPRESSED SIZE" { + return "VORTEX SIZE".into(); + } + chart_name.to_string() +} + +/// Port of v2's `getGroup`. Returns `None` for skipped suites +/// (e.g. `fineweb`) or names that match nothing. +pub fn get_group(record: &V2Record) -> Option { + let lower = record.name.to_lowercase(); + + if lower.starts_with("random-access/") || lower.starts_with("random access/") { + return Some(V2Group::RandomAccess); + } + + if lower.starts_with("vortex size/") + || lower.starts_with("vortex-file-compressed size/") + || lower.starts_with("parquet size/") + || lower.starts_with("parquet-zstd size/") + || lower.starts_with("lance size/") + || lower.contains(":raw size/") + || lower.contains(":parquet-zstd size/") + || lower.contains(":lance size/") + { + return Some(V2Group::CompressionSize); + } + + if lower.starts_with("compress time/") + || lower.starts_with("decompress time/") + || lower.starts_with("parquet_rs-zstd compress") + || lower.starts_with("parquet_rs-zstd decompress") + || lower.starts_with("lance compress") + || lower.starts_with("lance decompress") + || lower.starts_with("vortex:lance ratio") + || lower.starts_with("vortex:parquet-zstd ratio") + // Typo'd v2 emitter wrote `parquet-zst` (no `d`) for some + // ratio records; match both spellings so they classify as + // derived ratios instead of falling through to Unknown. + || lower.starts_with("vortex:parquet-zst ratio") + || lower.starts_with("vortex:raw ratio") + { + return Some(V2Group::Compression); + } + + for (i, suite) in QUERY_SUITES.iter().enumerate() { + let prefix_q = format!("{}_q", suite.prefix); + let prefix_slash = format!("{}/", suite.prefix); + if !lower.starts_with(&prefix_q) && !lower.starts_with(&prefix_slash) { + continue; + } + if suite.skip { + return None; + } + if !suite.fan_out { + return Some(V2Group::Query { + suite_index: i, + storage: None, + scale_factor: None, + }); + } + let storage = match record.storage.as_deref().map(str::to_uppercase).as_deref() { + Some("S3") => "S3", + _ => "NVMe", + }; + let dataset_key = suite.dataset_key.unwrap_or(suite.prefix); + let raw_sf = record + .dataset + .as_ref() + .and_then(|d| dataset_scale_factor(d, dataset_key)); + let sf = raw_sf + .as_deref() + .and_then(|s| s.parse::().ok()) + .map(|f| f.round() as i64) + .unwrap_or(1); + return Some(V2Group::Query { + suite_index: i, + storage: Some(storage.into()), + scale_factor: Some(sf.to_string()), + }); + } + + None +} + +/// Group + chart + series breakdown for a v2 record, using the same +/// rules the v2 server applies in `refresh()`. Equivalent to v2's +/// `(group, chartName, seriesName)` triple after rename / skip rules. +#[derive(Debug, Clone, PartialEq, Eq)] +pub struct V2Classification { + pub group: V2Group, + pub chart: String, + pub series: String, +} + +/// Apply the same chart / series naming v2's `refresh()` does, plus +/// the throughput / `PARQUET-UNC` skip rules. +pub fn classify_v2(record: &V2Record) -> Option { + if record.name.contains(" throughput") { + return None; + } + let group = get_group(record)?; + let parts: Vec<&str> = record.name.split('/').collect(); + let (chart, series) = match (&group, parts.len()) { + (V2Group::RandomAccess, 4) => { + let chart = format!("{}/{}", parts[1], parts[2]) + .to_uppercase() + .replace(['_', '-'], " "); + let series = rename_engine(if parts[3].is_empty() { + "default" + } else { + parts[3] + }); + (chart, series) + } + (V2Group::RandomAccess, 2) => ( + "RANDOM ACCESS".to_string(), + rename_engine(if parts[1].is_empty() { + "default" + } else { + parts[1] + }), + ), + (V2Group::RandomAccess, _) => return None, + _ => { + let series_raw = if parts.len() >= 2 && !parts[1].is_empty() { + parts[1] + } else { + "default" + }; + let series = rename_engine(series_raw); + let chart = format_query(parts[0]); + (chart, series) + } + }; + let chart = normalize_chart_name(&group, &chart); + if chart.contains("PARQUET-UNC") { + return None; + } + Some(V2Classification { + group, + chart, + series, + }) +} + +/// Mapping target: which v3 fact table a v2 record lands in, plus the +/// dim values that table needs. +#[derive(Debug, Clone, PartialEq)] +pub enum V3Bin { + Query { + dataset: String, + dataset_variant: Option, + scale_factor: Option, + query_idx: i32, + storage: String, + engine: String, + format: String, + }, + CompressionTime { + dataset: String, + dataset_variant: Option, + format: String, + op: String, + }, + CompressionSize { + dataset: String, + dataset_variant: Option, + format: String, + }, + RandomAccess { + dataset: String, + format: String, + }, +} + +/// Top-level entry point. Combines `classify_v2` with the v3 fact-table +/// mapping. Returns `None` for records that: +/// +/// - Don't match any v2 group (uncategorized prefix). +/// - Are explicitly skipped by v2 (throughput, PARQUET-UNC, fineweb). +/// - Are computed-at-read-time ratios that v3 derives from +/// `compression_sizes` (`vortex:parquet-zstd ratio …`, +/// `vortex:lance ratio …`, `vortex:raw ratio …`, +/// `vortex:* size/…`). +pub fn classify(record: &V2Record) -> Option { + let cls = classify_v2(record)?; + match &cls.group { + V2Group::RandomAccess => bin_random_access(&cls, record), + V2Group::Compression => bin_compression_time(&cls, record), + V2Group::CompressionSize => bin_compression_size(&cls, record), + V2Group::Query { .. } => bin_query(&cls, record), + } +} + +/// Reason the classifier dropped a record. Intentional skips (v2 +/// patterns v3 deliberately doesn't store) are NOT errors; they don't +/// count against the uncategorized gate. +#[derive(Debug, Clone, Copy, PartialEq, Eq)] +pub enum Skip { + /// `vortex:* ratio …` and `vortex:* size` — derived in v3 from + /// `compression_sizes` joined to itself. + DerivedRatio, + /// `throughput` records — v2 derived these from latencies. + Throughput, + /// A v2 query suite marked `skip: true` in QUERY_SUITES. + SkippedSuite, + /// random-access record with an unsupported part count. + UnsupportedShape, + /// Record had no `value` field. + NoValue, + /// Dim outside the v3 emitter's allowlist (e.g. `parquet-zstd`, + /// historical-only suites no longer in CI). + Deprecated, + /// v2 memory measurements (`*_memory/*` records). Carry top-level + /// `peak_physical_memory` / `peak_virtual_memory` / + /// `physical_memory_delta` / `virtual_memory_delta` fields that + /// `V2Record` doesn't deserialize. Not migrated for alpha; merging + /// into the corresponding QueryMeasurement row is future work. + HistoricalMemory, +} + +/// Engines the v3 emitter produces today. Anything else is historical +/// and gets bucketed as `Skip::Deprecated`. +/// +/// ORCHESTRATOR NOTE: confirm against `vortex-bench`'s `Engine` enum +/// before handing off; edit if the live set differs. +const V3_ENGINES: &[&str] = &["datafusion", "duckdb", "vortex", "arrow"]; + +/// Formats the v3 emitter produces today (`Format::name()` values). +/// +/// ORCHESTRATOR NOTE: confirm against `vortex-bench/src/lib.rs` +/// `Format::name()` before handing off. +const V3_FORMATS: &[&str] = &[ + "vortex-file-compressed", + "vortex-compact", + "parquet", + "lance", + "csv", + "arrow", + "duckdb", +]; + +/// Query suites the v3 CI runs today. Suites outside this list still +/// classify (so historical analyses stay coherent) but get bucketed +/// as `Skip::Deprecated` so they don't render as orphan charts in v3. +/// +/// `fineweb` is included because `.github/workflows/sql-benchmarks.yml` +/// still has `fineweb` and `fineweb-s3` matrix entries. `gharchive` +/// stays excluded — it's defined in `vortex-bench` but no current +/// workflow runs it. +const V3_QUERY_SUITES: &[&str] = &[ + "clickbench", + "tpch", + "tpcds", + "statpopgen", + "polarsignals", + "fineweb", +]; + +/// Returns true if every dim that v3 stores as a column is on the +/// emitter's current allowlist. Dim values outside the allowlist mean +/// historical-only formats / engines that the v3 UI has nothing to +/// render against. +fn is_v3_dim(bin: &V3Bin) -> bool { + match bin { + V3Bin::Query { engine, format, .. } => { + V3_ENGINES.contains(&engine.as_str()) && V3_FORMATS.contains(&format.as_str()) + } + V3Bin::CompressionTime { format, .. } + | V3Bin::CompressionSize { format, .. } + | V3Bin::RandomAccess { format, .. } => V3_FORMATS.contains(&format.as_str()), + } +} + +/// Outcome of running the classifier on a v2 record. Distinguishes +/// "we know we don't want this" (`Skip`) from "we don't recognize this" +/// (`Unknown`); the migrator's 5% gate fires only on the latter. +#[derive(Debug, Clone)] +pub enum Outcome { + Bin(V3Bin), + Skip(Skip), + Unknown, +} + +/// Like [`classify`], but reports *why* a record was dropped. Intended +/// for the migrator so the 5% uncategorized gate doesn't trip on +/// records v2 deliberately doesn't render (ratios, throughput, +/// skipped suites). +pub fn classify_outcome(record: &V2Record) -> Outcome { + if record.name.contains(" throughput") { + return Outcome::Skip(Skip::Throughput); + } + // v2 memory records: e.g. "clickbench_q07_memory/datafusion:parquet". + // Match the `_memory/` infix BEFORE the engine/format split, so they + // route to a known Skip variant instead of slipping through to + // Outcome::Unknown and tripping the 5% gate. + let lower = record.name.to_lowercase(); + if let Some((head, _)) = lower.split_once('/') + && head.ends_with("_memory") + { + return Outcome::Skip(Skip::HistoricalMemory); + } + let Some(group) = get_group(record) else { + return Outcome::Unknown; + }; + if let V2Group::Query { suite_index, .. } = &group + && QUERY_SUITES[*suite_index].skip + { + return Outcome::Skip(Skip::SkippedSuite); + } + let Some(cls) = classify_v2(record) else { + // get_group succeeded but classify_v2 didn't — shape mismatch. + return Outcome::Skip(Skip::UnsupportedShape); + }; + let derived = match &cls.group { + V2Group::Compression => { + let lc = cls.chart.to_lowercase(); + lc.contains("ratio") || lc.contains(':') + } + V2Group::CompressionSize => cls.chart.to_lowercase().contains(':'), + _ => false, + }; + if derived { + return Outcome::Skip(Skip::DerivedRatio); + } + let bin = match &cls.group { + V2Group::RandomAccess => bin_random_access(&cls, record), + V2Group::Compression => bin_compression_time(&cls, record), + V2Group::CompressionSize => bin_compression_size(&cls, record), + V2Group::Query { .. } => bin_query(&cls, record), + }; + let Some(bin) = bin else { + return Outcome::Unknown; + }; + if !is_v3_dim(&bin) { + return Outcome::Skip(Skip::Deprecated); + } + if let V2Group::Query { suite_index, .. } = &group + && !V3_QUERY_SUITES.contains(&QUERY_SUITES[*suite_index].prefix) + { + return Outcome::Skip(Skip::Deprecated); + } + Outcome::Bin(bin) +} + +fn bin_random_access(cls: &V2Classification, record: &V2Record) -> Option { + // v2 chart name shape: "RANDOM ACCESS" or "DATASET/PATTERN" (uppercase). + // We store it as the v3 dataset value verbatim, lowercased so + // `/api/groups` returns canonical lowercase names. + let dataset = cls.chart.to_lowercase(); + if dataset.is_empty() { + return None; + } + // Pull format from the raw, pre-rename v2 name so v3 stores the + // canonical `Format::name()` string (matching what the v3 live + // emitter writes). Raw shape is + // `random-access///-tokio-local-disk` + // (4-part) or `random-access/-tokio-local-disk` (2-part + // legacy). After stripping the `-tokio-local-disk` suffix, map the + // v2 random-access ext label (`vortex`, from `Format::ext()`) to + // the canonical name (`vortex-file-compressed`, from + // `Format::name()`). `parquet` and `lance` match between ext and + // name. The `vortex` ext is shared by both `OnDiskVortex` (name + // `vortex-file-compressed`) and `VortexCompact` (name + // `vortex-compact`), but v2's random-access bench only emitted + // `OnDiskVortex`, so mapping to `vortex-file-compressed` is + // correct for all historical data. + let parts: Vec<&str> = record.name.split('/').collect(); + let raw = match parts.len() { + 4 => parts[3], + 2 => parts[1], + _ => return None, + }; + if raw.is_empty() || raw == "default" { + return None; + } + let stripped = raw.strip_suffix("-tokio-local-disk").unwrap_or(raw); + let format = match stripped { + "vortex" => "vortex-file-compressed".to_string(), + other => other.to_lowercase(), + }; + Some(V3Bin::RandomAccess { dataset, format }) +} + +fn bin_compression_time(cls: &V2Classification, _record: &V2Record) -> Option { + // v2 compression chart names look like (after format_query): + // "COMPRESS TIME" [vortex/encode] + // "DECOMPRESS TIME" [vortex/decode] + // "PARQUET RS ZSTD COMPRESS TIME" [parquet/encode] + // "PARQUET RS ZSTD DECOMPRESS TIME" [parquet/decode] + // "LANCE COMPRESS TIME" [lance/encode] + // "LANCE DECOMPRESS TIME" [lance/decode] + // "VORTEX:LANCE RATIO COMPRESS TIME" [drop] + // "VORTEX:PARQUET-ZSTD RATIO COMPRESS TIME" [drop] + // "VORTEX:RAW RATIO COMPRESS TIME" [drop] + let lc = cls.chart.to_lowercase(); + if lc.contains("ratio") || lc.contains(':') { + // Ratios are computed at read time from compression_sizes. + return None; + } + let (format, op) = if lc.starts_with("compress time") { + ("vortex-file-compressed", "encode") + } else if lc.starts_with("decompress time") { + ("vortex-file-compressed", "decode") + } else if lc.starts_with("parquet rs zstd compress time") { + ("parquet", "encode") + } else if lc.starts_with("parquet rs zstd decompress time") { + ("parquet", "decode") + } else if lc.starts_with("lance compress time") { + ("lance", "encode") + } else if lc.starts_with("lance decompress time") { + ("lance", "decode") + } else { + return None; + }; + let dataset = cls.series.to_lowercase(); + if dataset.is_empty() || dataset == "default" { + return None; + } + Some(V3Bin::CompressionTime { + dataset, + dataset_variant: None, + format: format.to_string(), + op: op.to_string(), + }) +} + +fn bin_compression_size(cls: &V2Classification, record: &V2Record) -> Option { + let lc = cls.chart.to_lowercase(); + // Ratios like "VORTEX:PARQUET ZSTD SIZE" / "VORTEX:LANCE SIZE" / + // "VORTEX:RAW SIZE" are derived from compression_sizes at read + // time, not stored. + if lc.contains(':') { + return None; + } + // `parquet-zstd size` shares a leading "parquet" with `parquet size`, + // so check the more specific prefix first. `format_query` upper-cases + // and replaces `-`/`_` with spaces, so the chart we match against is + // `"PARQUET ZSTD SIZE"` (no hyphen) — same convention as the existing + // `"parquet rs zstd compress time"` branches above. + let format = if lc.starts_with("vortex size") { + "vortex-file-compressed" + } else if lc.starts_with("parquet zstd size") { + "parquet-zstd" + } else if lc.starts_with("parquet size") { + "parquet" + } else if lc.starts_with("lance size") { + "lance" + } else { + return None; + }; + let dataset = cls.series.to_lowercase(); + if dataset.is_empty() || dataset == "default" { + return None; + } + // Mirror the file-sizes ingest path's dataset_variant derivation + // (see `migrate::migrate_file_sizes`): pull the SF out of the v2 + // record's `dataset` object when present, drop empty / "1.0". + // Without this both code paths produce the same `mid` only by + // accident, so SF=10 file-sizes rows wouldn't merge with the + // matching data.json.gz "vortex size/tpch" rows. + let dataset_variant = record + .dataset + .as_ref() + .and_then(|d| crate::v2::dataset_scale_factor(d, dataset.as_str())) + .filter(|s| !s.is_empty() && s.as_str() != "1.0"); + Some(V3Bin::CompressionSize { + dataset, + dataset_variant, + format: format.to_string(), + }) +} + +fn bin_query(cls: &V2Classification, record: &V2Record) -> Option { + let V2Group::Query { + suite_index, + storage, + scale_factor, + } = &cls.group + else { + return None; + }; + let suite = &QUERY_SUITES[*suite_index]; + + // Pull the query index from the *raw* name's first part instead of + // the formatted chart, so we don't have to round-trip "Q07". + let raw_first = record.name.split('/').next().unwrap_or(""); + let query_idx = parse_query_index_from_first(raw_first)?; + + // Pull engine:format from the raw, pre-rename second segment so v3 + // stores canonical `Format::name()` strings (e.g. + // `vortex-file-compressed`) that match what the v3 live emitter + // writes. `cls.series` has been through v2's `ENGINE_RENAMES` for + // UI display and is not appropriate for v3 columns. + // + // Older v2 records emitted display-case engines (e.g. `DataFusion`, + // `DuckDB`); newer ones emit lowercase. Lowercase here so dedup + // collapses both spellings into a single canonical row. + let raw_series = record.name.split('/').nth(1)?; + let (engine, format) = split_engine_format(raw_series)?; + let engine = engine.to_lowercase(); + let format = format.to_lowercase(); + + let storage_v3 = match storage.as_deref() { + Some("S3") => "s3".to_string(), + Some("NVMe") => "nvme".to_string(), + _ => "nvme".to_string(), + }; + + // ClickBench's "flavor" lives in dataset_variant per benchmark-mapping.md + // - we don't have it from a v2 name string, so we leave it None. + Some(V3Bin::Query { + dataset: suite.prefix.to_string(), + dataset_variant: None, + scale_factor: scale_factor.clone(), + query_idx, + storage: storage_v3, + engine, + format, + }) +} + +/// Pull the integer query index out of the leading name part, which is +/// always `_q` or ` q` for SQL query records. +fn parse_query_index_from_first(first: &str) -> Option { + let lower = first.to_lowercase(); + for suite in QUERY_SUITES { + if let Some(rest) = lower.strip_prefix(suite.prefix) + && let Some(idx) = parse_query_index(rest) + { + return Some(idx as i32); + } + } + None +} + +/// Split a renamed series like `datafusion:parquet` into +/// `(engine, format)`. Returns `None` for series with no `:` since +/// v3 requires both columns. +fn split_engine_format(series: &str) -> Option<(String, String)> { + let mut split = series.splitn(2, ':'); + let engine = split.next()?.trim().to_string(); + let format = split.next()?.trim().to_string(); + if engine.is_empty() || format.is_empty() { + return None; + } + Some((engine, format)) +} + +#[cfg(test)] +mod tests { + use super::*; + + fn record(name: &str) -> V2Record { + V2Record { + name: name.to_string(), + commit_id: Some("deadbeef".into()), + unit: None, + value: None, + storage: None, + dataset: None, + all_runtimes: None, + env_triple: None, + } + } + + #[test] + fn format_query_round_trips() { + assert_eq!(format_query("clickbench_q07"), "CLICKBENCH Q7"); + assert_eq!(format_query("tpch_q01"), "TPC-H Q1"); + assert_eq!(format_query("tpcds_q42"), "TPC-DS Q42"); + assert_eq!(format_query("statpopgen_q3"), "STATPOPGEN Q3"); + assert_eq!(format_query("foo bar"), "FOO BAR"); + } + + #[test] + fn rename_engine_canonicalizes_disk_names() { + assert_eq!(rename_engine("vortex-tokio-local-disk"), "vortex-nvme"); + assert_eq!( + rename_engine("datafusion:vortex-file-compressed"), + "datafusion:vortex" + ); + assert_eq!(rename_engine("unknown-engine"), "unknown-engine"); + } + + #[test] + fn parse_query_index_handles_separators() { + assert_eq!(parse_query_index("_q07"), Some(7)); + assert_eq!(parse_query_index(" q7"), Some(7)); + assert_eq!(parse_query_index("q42"), Some(42)); + assert_eq!(parse_query_index("xq7"), None); + } + + #[test] + fn random_access_bins_dataset_pattern() { + let bin = classify(&record("random-access/taxi/take/parquet")).unwrap(); + assert_eq!( + bin, + V3Bin::RandomAccess { + dataset: "taxi/take".into(), + format: "parquet".into(), + } + ); + } +} diff --git a/benchmarks-website/migrate/src/commits.rs b/benchmarks-website/migrate/src/commits.rs new file mode 100644 index 00000000000..28d63a5bd19 --- /dev/null +++ b/benchmarks-website/migrate/src/commits.rs @@ -0,0 +1,100 @@ +// SPDX-License-Identifier: Apache-2.0 +// SPDX-FileCopyrightText: Copyright the Vortex contributors + +//! Commit upserts. Adapts a [`crate::v2::V2Commit`] into the v3 +//! `commits` row shape (a [`vortex_bench_server::records::CommitInfo`]). + +use anyhow::Context as _; +use anyhow::Result; +use duckdb::Transaction; +use duckdb::params; + +use crate::v2::V2Commit; + +/// Insert a v3 `commits` row for one v2 commit. Missing fields are +/// filled with the empty string, matching the v3 schema's `NOT NULL` +/// constraints; the call site logs a warning for each fallback so +/// the operator can spot bad inputs. +pub fn upsert_commit(tx: &Transaction<'_>, commit: &V2Commit) -> Result { + let mut warnings = Vec::new(); + let timestamp = require_field(&commit.timestamp, "timestamp", &commit.id, &mut warnings); + let message = require_field(&commit.message, "message", &commit.id, &mut warnings); + let author_name = require_field( + &commit.author.as_ref().and_then(|p| p.name.clone()), + "author.name", + &commit.id, + &mut warnings, + ); + let author_email = require_field( + &commit.author.as_ref().and_then(|p| p.email.clone()), + "author.email", + &commit.id, + &mut warnings, + ); + let committer_name = require_field( + &commit.committer.as_ref().and_then(|p| p.name.clone()), + "committer.name", + &commit.id, + &mut warnings, + ); + let committer_email = require_field( + &commit.committer.as_ref().and_then(|p| p.email.clone()), + "committer.email", + &commit.id, + &mut warnings, + ); + let tree_sha = require_field(&commit.tree_id, "tree_id", &commit.id, &mut warnings); + let url = require_field(&commit.url, "url", &commit.id, &mut warnings); + + tx.execute( + r#" + INSERT INTO commits ( + commit_sha, timestamp, message, author_name, author_email, + committer_name, committer_email, tree_sha, url + ) VALUES (?, CAST(? AS TIMESTAMPTZ), ?, ?, ?, ?, ?, ?, ?) + ON CONFLICT (commit_sha) DO UPDATE SET + timestamp = excluded.timestamp, + message = excluded.message, + author_name = excluded.author_name, + author_email = excluded.author_email, + committer_name = excluded.committer_name, + committer_email = excluded.committer_email, + tree_sha = excluded.tree_sha, + url = excluded.url + "#, + params![ + commit.id, + timestamp, + message, + author_name, + author_email, + committer_name, + committer_email, + tree_sha, + url, + ], + ) + .with_context(|| format!("upserting commit {}", commit.id))?; + Ok(UpsertOutcome { warnings }) +} + +fn require_field( + field: &Option, + name: &str, + sha: &str, + warnings: &mut Vec, +) -> String { + match field { + Some(s) => s.clone(), + None => { + warnings.push(format!("commit {sha} missing {name}")); + String::new() + } + } +} + +/// Per-call warning bag returned to the caller for logging. +#[derive(Debug, Default)] +pub struct UpsertOutcome { + pub warnings: Vec, +} diff --git a/benchmarks-website/migrate/src/lib.rs b/benchmarks-website/migrate/src/lib.rs new file mode 100644 index 00000000000..f02db73b4b7 --- /dev/null +++ b/benchmarks-website/migrate/src/lib.rs @@ -0,0 +1,21 @@ +// SPDX-License-Identifier: Apache-2.0 +// SPDX-FileCopyrightText: Copyright the Vortex contributors + +//! One-shot historical migrator from v2's S3-hosted benchmark dataset +//! to a v3 DuckDB file. +//! +//! The v2 dataset is JSONL of bare benchmark records keyed by name string. +//! v3 uses five typed fact tables with explicit dim columns. This crate +//! ports v2's `getGroup` classifier from the v2 Express server +//! bug-for-bug so that historical rows survive the migration with the +//! same group / chart / series structure as the live v2 server. +//! +//! The migrator is throwaway: once v3 cuts over, both the binary and +//! the classifier go away. + +pub mod classifier; +pub mod commits; +pub mod migrate; +pub mod source; +pub mod v2; +pub mod verify; diff --git a/benchmarks-website/migrate/src/main.rs b/benchmarks-website/migrate/src/main.rs new file mode 100644 index 00000000000..366834ed441 --- /dev/null +++ b/benchmarks-website/migrate/src/main.rs @@ -0,0 +1,114 @@ +// SPDX-License-Identifier: Apache-2.0 +// SPDX-FileCopyrightText: Copyright the Vortex contributors + +//! `vortex-bench-migrate` CLI: a one-shot historical migrator from +//! v2's S3 dataset into a v3 DuckDB file, plus a structural diff +//! against the live v2 `/api/metadata` endpoint for spotting +//! classifier regressions. + +use std::path::PathBuf; +use std::process::ExitCode; + +use anyhow::Context as _; +use anyhow::Result; +use clap::Parser; +use clap::Subcommand; +use clap::ValueEnum; +use tracing_subscriber::EnvFilter; +use vortex_bench_migrate::migrate; +use vortex_bench_migrate::source::Source; +use vortex_bench_migrate::verify; + +/// One-shot historical migrator from v2's S3 dataset to v3 DuckDB. +#[derive(Debug, Parser)] +#[command(name = "vortex-bench-migrate", version, about)] +struct Cli { + #[command(subcommand)] + command: Command, +} + +#[derive(Debug, Subcommand)] +enum Command { + /// Read v2's data.json.gz / commits.json / file-sizes-*.json.gz + /// and write a fully populated v3 DuckDB at `--output`. + Run { + /// Path to write the v3 DuckDB to. Created if absent. + #[arg(long)] + output: PathBuf, + /// Where to fetch v2 dumps from. + #[arg(long, value_enum, default_value_t = SourceKind::PublicS3)] + source: SourceKind, + /// For `--source=local`, the directory containing + /// `data.json.gz`, `commits.json`, and `file-sizes-*.json.gz`. + #[arg(long, required_if_eq("source", "local"))] + source_dir: Option, + }, + /// Diff a migrated DuckDB against the live v2 `/api/metadata` + /// endpoint. Exits 0 if every v2 group is present in v3, 1 + /// otherwise so this can gate a CI step. + Verify { + /// HTTPS root of a running v2 server (e.g. `https://bench.vortex.dev`). + #[arg(long)] + against: String, + /// Path to the migrated v3 DuckDB. + #[arg(long)] + duckdb: PathBuf, + }, +} + +#[derive(Debug, Clone, Copy, ValueEnum)] +enum SourceKind { + PublicS3, + Local, +} + +fn main() -> ExitCode { + if let Err(err) = run() { + eprintln!("error: {err:#}"); + return ExitCode::from(2); + } + ExitCode::SUCCESS +} + +fn run() -> Result<()> { + tracing_subscriber::fmt() + .with_env_filter( + EnvFilter::try_from_env("VORTEX_BENCH_LOG").unwrap_or_else(|_| EnvFilter::new("info")), + ) + .init(); + + let cli = Cli::parse(); + match cli.command { + Command::Run { + output, + source, + source_dir, + } => { + let source = match source { + SourceKind::PublicS3 => Source::PublicS3, + SourceKind::Local => { + Source::Local(source_dir.context("--source=local requires --source-dir")?) + } + }; + let summary = migrate::run(&source, &output)?; + print!("{summary}"); + if summary.uncategorized_fraction() > 0.05 { + anyhow::bail!( + "uncategorized records ({:.2}%) exceed the 5% gate; \ + stop and report unmatched prefixes (see summary above) \ + before proceeding", + 100.0 * summary.uncategorized_fraction() + ); + } + Ok(()) + } + Command::Verify { against, duckdb } => { + let report = verify::run(&against, &duckdb)?; + print!("{report}"); + if !report.v2_groups_covered() { + std::process::exit(1); + } + Ok(()) + } + } +} diff --git a/benchmarks-website/migrate/src/migrate.rs b/benchmarks-website/migrate/src/migrate.rs new file mode 100644 index 00000000000..7b3b32bb51c --- /dev/null +++ b/benchmarks-website/migrate/src/migrate.rs @@ -0,0 +1,836 @@ +// SPDX-License-Identifier: Apache-2.0 +// SPDX-FileCopyrightText: Copyright the Vortex contributors + +//! End-to-end migration of one v2 dataset into a v3 DuckDB file. +//! +//! Streams `data.json.gz` line-by-line, runs each record through the +//! [`classifier`], and writes one row per record into the appropriate v3 fact table. +//! Every row's `measurement_id` is computed via the server's `measurement_id_*` functions so the +//! result is byte-compatible with what fresh `/api/ingest` would have produced. +//! +//! Bulk-load shape: rows are accumulated in memory as parallel column +//! vectors, deduplicated by `measurement_id`, then flushed to DuckDB +//! via `Appender::append_record_batch` as one Arrow `RecordBatch` per +//! fact table. + +use std::collections::BTreeMap; +use std::io::BufRead; +use std::path::Path; +use std::sync::Arc; +use std::time::Duration; +use std::time::Instant; + +use anyhow::Context as _; +use anyhow::Result; +use arrow_array::ArrayRef; +use arrow_array::Int32Array; +use arrow_array::Int64Array; +use arrow_array::ListArray; +use arrow_array::RecordBatch; +use arrow_array::StringArray; +use arrow_buffer::OffsetBuffer; +use arrow_schema::DataType; +use arrow_schema::Field; +use arrow_schema::Schema; +use duckdb::Connection; +use tracing::info; +use tracing::warn; +use vortex_bench_server::db::measurement_id_compression_size; +use vortex_bench_server::db::measurement_id_compression_time; +use vortex_bench_server::db::measurement_id_query; +use vortex_bench_server::db::measurement_id_random_access; +use vortex_bench_server::records::CompressionSize; +use vortex_bench_server::records::CompressionTime; +use vortex_bench_server::records::QueryMeasurement; +use vortex_bench_server::records::RandomAccessTime; +use vortex_bench_server::schema::SCHEMA_DDL; +use vortex_utils::aliases::hash_map::HashMap; + +use crate::classifier; +use crate::classifier::V3Bin; +use crate::commits::upsert_commit; +use crate::source::Source; +use crate::v2::V2Commit; +use crate::v2::V2FileSize; +use crate::v2::V2Record; +use crate::v2::index_commits; +use crate::v2::runtime_as_i64; +use crate::v2::value_as_f64; + +/// Per-table insert counts, plus skip / missing counts. +#[derive(Debug, Default, Clone)] +pub struct MigrationSummary { + pub records_read: u64, + pub query_inserted: u64, + pub compression_time_inserted: u64, + pub compression_size_inserted: u64, + pub random_access_inserted: u64, + pub file_size_inserted: u64, + pub uncategorized: u64, + pub uncategorized_prefixes: BTreeMap, + pub missing_commit: u64, + pub commit_warnings: u64, + pub skipped_no_value: u64, + pub skipped_intentional: u64, + pub commits_inserted: u64, + pub deduped: u64, + /// Number of records dropped by dedup whose `value_ns` (or + /// `value_bytes` for compression_sizes' replace path) differed + /// from the kept row's. Non-zero is a smell worth investigating. + pub deduped_with_conflict: u64, +} + +impl MigrationSummary { + /// Total `data.json.gz` records that landed in some v3 fact table. + pub fn total_inserted(&self) -> u64 { + self.query_inserted + + self.compression_time_inserted + + self.compression_size_inserted + + self.random_access_inserted + } + + /// Fraction of records that were uncategorized. The orchestrator + /// stops if this exceeds the documented 5% threshold. + pub fn uncategorized_fraction(&self) -> f64 { + if self.records_read == 0 { + return 0.0; + } + self.uncategorized as f64 / self.records_read as f64 + } +} + +/// Open or create a DuckDB at `path` and apply the v3 schema. The +/// migrator is a one-shot fresh load; the bulk-append flush is pure +/// insert (no `ON CONFLICT`), so any stale rows in `path` would clash +/// with the next run on the same primary keys. Delete both the +/// database file and its WAL companion up front so every run starts +/// from a known-empty state. +pub fn open_target_db(path: &Path) -> Result { + remove_if_exists(path)?; + let wal = wal_path(path); + remove_if_exists(&wal)?; + let conn = + Connection::open(path).with_context(|| format!("opening DuckDB at {}", path.display()))?; + conn.execute_batch(SCHEMA_DDL) + .context("applying v3 schema DDL")?; + Ok(conn) +} + +fn remove_if_exists(path: &Path) -> Result<()> { + match std::fs::remove_file(path) { + Ok(()) => { + info!(path = %path.display(), "removed pre-existing target file"); + Ok(()) + } + Err(e) if e.kind() == std::io::ErrorKind::NotFound => Ok(()), + Err(e) => Err(e).with_context(|| format!("removing {}", path.display())), + } +} + +/// DuckDB writes its write-ahead log next to the database file with a +/// `.wal` suffix appended (e.g. `v3.duckdb` -> `v3.duckdb.wal`). +fn wal_path(path: &Path) -> std::path::PathBuf { + let mut name = path.as_os_str().to_owned(); + name.push(".wal"); + std::path::PathBuf::from(name) +} + +/// Run the whole migration: commits, data.json.gz, and every +/// file-sizes-*.json.gz under the source. +pub fn run(source: &Source, target: &Path) -> Result { + let mut conn = open_target_db(target)?; + let mut summary = MigrationSummary::default(); + + info!(source = %source.describe(), "Reading commits.json"); + let commits = read_commits(source)?; + info!(commits = commits.len(), "Loaded commits"); + summary.commits_inserted = upsert_all_commits(&mut conn, &commits, &mut summary)?; + + let mut q = QueryAccum::default(); + let mut ct = CompressionTimeAccum::default(); + let mut cs = CompressionSizeAccum::default(); + let mut ra = RandomAccessAccum::default(); + + info!("Migrating data.json.gz"); + migrate_data_jsonl( + source, + &commits, + &mut summary, + &mut q, + &mut ct, + &mut cs, + &mut ra, + )?; + info!(records = summary.records_read, "data.json.gz done"); + + for name in source.list_file_sizes()? { + info!(name = %name, "Migrating file-sizes"); + if let Err(e) = migrate_file_sizes(source, &name, &commits, &mut summary, &mut cs) { + warn!("file-sizes file {name} failed: {e:#}"); + } + } + + info!("Flushing accumulators to DuckDB"); + summary.query_inserted = q.measurement_id.len() as u64; + summary.compression_time_inserted = ct.measurement_id.len() as u64; + summary.random_access_inserted = ra.measurement_id.len() as u64; + summary.compression_size_inserted = cs.rows.len() as u64; + + flush(&conn, "query_measurements", build_query_batch(q)?)?; + flush( + &conn, + "compression_times", + build_compression_time_batch(ct)?, + )?; + flush(&conn, "random_access_times", build_random_access_batch(ra)?)?; + flush( + &conn, + "compression_sizes", + build_compression_size_batch(cs)?, + )?; + + Ok(summary) +} + +fn read_commits(source: &Source) -> Result> { + let reader = source.open_commits_jsonl()?; + let mut commits: Vec = Vec::new(); + for line in reader.lines() { + let line = line?; + let trimmed = line.trim(); + if trimmed.is_empty() { + continue; + } + match serde_json::from_str::(trimmed) { + Ok(c) => commits.push(c), + Err(e) => warn!("skipping malformed commits.json line: {e}"), + } + } + Ok(index_commits(commits)) +} + +fn upsert_all_commits( + conn: &mut Connection, + commits: &BTreeMap, + summary: &mut MigrationSummary, +) -> Result { + let tx = conn.transaction().context("begin commits transaction")?; + let mut count = 0u64; + for commit in commits.values() { + let outcome = upsert_commit(&tx, commit)?; + for w in outcome.warnings { + warn!("{w}"); + summary.commit_warnings += 1; + } + count += 1; + } + tx.commit().context("commit commits transaction")?; + Ok(count) +} + +/// Stream `data.json.gz` and push classified records into the +/// per-table accumulators. Dedup happens inside each accumulator's +/// `push` method by `measurement_id`. +fn migrate_data_jsonl( + source: &Source, + commits: &BTreeMap, + summary: &mut MigrationSummary, + q: &mut QueryAccum, + ct: &mut CompressionTimeAccum, + cs: &mut CompressionSizeAccum, + ra: &mut RandomAccessAccum, +) -> Result<()> { + let reader = source.open_data_jsonl()?; + let started = Instant::now(); + let mut last_log = Instant::now(); + for line in reader.lines() { + let line = line?; + let trimmed = line.trim(); + if trimmed.is_empty() { + continue; + } + summary.records_read += 1; + let record: V2Record = match serde_json::from_str(trimmed) { + Ok(r) => r, + Err(e) => { + warn!("skipping malformed data.json line: {e}"); + continue; + } + }; + apply_v2_record(&record, commits, summary, q, ct, cs, ra); + if last_log.elapsed() >= Duration::from_secs(5) { + let elapsed = started.elapsed().as_secs_f64(); + let rate = summary.records_read as f64 / elapsed.max(0.001); + info!( + records = summary.records_read, + rate = format!("{rate:.0}/s"), + query = q.measurement_id.len(), + compression_time = ct.measurement_id.len(), + compression_size = cs.rows.len(), + random_access = ra.measurement_id.len(), + "migration progress", + ); + last_log = Instant::now(); + } + } + Ok(()) +} + +fn apply_v2_record( + record: &V2Record, + commits: &BTreeMap, + summary: &mut MigrationSummary, + q: &mut QueryAccum, + ct: &mut CompressionTimeAccum, + cs: &mut CompressionSizeAccum, + ra: &mut RandomAccessAccum, +) { + let Some(sha) = record.commit_id.clone() else { + summary.missing_commit += 1; + return; + }; + if !commits.contains_key(&sha) { + summary.missing_commit += 1; + return; + } + + let bin = match classifier::classify_outcome(record) { + classifier::Outcome::Bin(b) => b, + classifier::Outcome::Skip(_) => { + summary.skipped_intentional += 1; + return; + } + classifier::Outcome::Unknown => { + summary.uncategorized += 1; + let prefix = record.name.split('/').next().unwrap_or("").to_string(); + *summary.uncategorized_prefixes.entry(prefix).or_insert(0) += 1; + return; + } + }; + + let env_triple = record.env_triple.as_ref().and_then(|t| t.to_triple()); + let runtimes = record + .all_runtimes + .as_ref() + .map(|v| v.iter().filter_map(runtime_as_i64).collect::>()) + .unwrap_or_default(); + let value_f64 = match record.value.as_ref().and_then(value_as_f64) { + Some(v) => v, + None => { + summary.skipped_no_value += 1; + return; + } + }; + + match bin { + V3Bin::Query { + dataset, + dataset_variant, + scale_factor, + query_idx, + storage, + engine, + format, + } => { + let qm = QueryMeasurement { + commit_sha: sha, + dataset, + dataset_variant, + scale_factor, + query_idx, + storage, + engine, + format, + value_ns: value_f64 as i64, + all_runtimes_ns: runtimes, + peak_physical: None, + peak_virtual: None, + physical_delta: None, + virtual_delta: None, + env_triple, + }; + let mid = measurement_id_query(&qm); + q.push(mid, qm, summary); + } + V3Bin::CompressionTime { + dataset, + dataset_variant, + format, + op, + } => { + let ctr = CompressionTime { + commit_sha: sha, + dataset, + dataset_variant, + format, + op, + value_ns: value_f64 as i64, + all_runtimes_ns: runtimes, + env_triple, + }; + let mid = measurement_id_compression_time(&ctr); + ct.push(mid, ctr, summary); + } + V3Bin::CompressionSize { + dataset, + dataset_variant, + format, + } => { + let csr = CompressionSize { + commit_sha: sha, + dataset, + dataset_variant, + format, + value_bytes: value_f64 as i64, + }; + let mid = measurement_id_compression_size(&csr); + cs.push_replace(mid, csr, summary); + } + V3Bin::RandomAccess { dataset, format } => { + let rar = RandomAccessTime { + commit_sha: sha, + dataset, + format, + value_ns: value_f64 as i64, + all_runtimes_ns: runtimes, + env_triple, + }; + let mid = measurement_id_random_access(&rar); + ra.push(mid, rar, summary); + } + } +} + +fn migrate_file_sizes( + source: &Source, + name: &str, + commits: &BTreeMap, + summary: &mut MigrationSummary, + cs: &mut CompressionSizeAccum, +) -> Result<()> { + let reader = source.open_file_sizes(name)?; + let dataset_fallback = name + .strip_prefix("file-sizes-") + .and_then(|s| s.strip_suffix(".json.gz")) + .unwrap_or(name) + .to_string(); + let started = Instant::now(); + let mut last_log = Instant::now(); + for line in reader.lines() { + let line = line?; + let trimmed = line.trim(); + if trimmed.is_empty() { + continue; + } + let sz: V2FileSize = match serde_json::from_str(trimmed) { + Ok(r) => r, + Err(e) => { + warn!("skipping malformed {name} line: {e}"); + continue; + } + }; + if !commits.contains_key(&sz.commit_id) { + summary.missing_commit += 1; + continue; + } + let dataset = if sz.benchmark.is_empty() { + dataset_fallback.clone() + } else { + sz.benchmark.clone() + }; + let dataset_variant = sz + .scale_factor + .as_ref() + .filter(|s| !s.is_empty() && s.as_str() != "1.0") + .cloned(); + let csr = CompressionSize { + commit_sha: sz.commit_id.clone(), + dataset, + dataset_variant, + format: sz.format.clone(), + value_bytes: sz.size_bytes, + }; + let mid = measurement_id_compression_size(&csr); + cs.push_sum(mid, csr); + summary.file_size_inserted += 1; + if last_log.elapsed() >= Duration::from_secs(5) { + let elapsed = started.elapsed().as_secs_f64(); + let rate = summary.file_size_inserted as f64 / elapsed.max(0.001); + info!( + name = %name, + file_sizes = summary.file_size_inserted, + rate = format!("{rate:.0}/s"), + "file-sizes progress", + ); + last_log = Instant::now(); + } + } + Ok(()) +} + +/// Append an Arrow `RecordBatch` to a DuckDB table via `Appender`. +fn flush(conn: &Connection, table: &str, batch: RecordBatch) -> Result<()> { + let mut app = conn + .appender(table) + .with_context(|| format!("opening appender for {table}"))?; + app.append_record_batch(batch) + .with_context(|| format!("appending record batch to {table}"))?; + drop(app); + Ok(()) +} + +#[derive(Default)] +struct QueryAccum { + measurement_id: Vec, + commit_sha: Vec, + dataset: Vec, + dataset_variant: Vec>, + scale_factor: Vec>, + query_idx: Vec, + storage: Vec, + engine: Vec, + format: Vec, + value_ns: Vec, + all_runtimes_ns: Vec>, + peak_physical: Vec>, + peak_virtual: Vec>, + physical_delta: Vec>, + virtual_delta: Vec>, + env_triple: Vec>, + /// `mid` -> index in the parallel column vecs. Lets us look up the + /// kept row's `value_ns` on collision so we can flag conflicts. + seen: HashMap, +} + +impl QueryAccum { + fn push(&mut self, mid: i64, r: QueryMeasurement, summary: &mut MigrationSummary) { + if let Some(&idx) = self.seen.get(&mid) { + summary.deduped += 1; + if self.value_ns[idx] != r.value_ns { + summary.deduped_with_conflict += 1; + } + return; + } + let idx = self.measurement_id.len(); + self.seen.insert(mid, idx); + self.measurement_id.push(mid); + self.commit_sha.push(r.commit_sha); + self.dataset.push(r.dataset); + self.dataset_variant.push(r.dataset_variant); + self.scale_factor.push(r.scale_factor); + self.query_idx.push(r.query_idx); + self.storage.push(r.storage); + self.engine.push(r.engine); + self.format.push(r.format); + self.value_ns.push(r.value_ns); + self.all_runtimes_ns.push(r.all_runtimes_ns); + self.peak_physical.push(r.peak_physical); + self.peak_virtual.push(r.peak_virtual); + self.physical_delta.push(r.physical_delta); + self.virtual_delta.push(r.virtual_delta); + self.env_triple.push(r.env_triple); + } +} + +#[derive(Default)] +struct CompressionTimeAccum { + measurement_id: Vec, + commit_sha: Vec, + dataset: Vec, + dataset_variant: Vec>, + format: Vec, + op: Vec, + value_ns: Vec, + all_runtimes_ns: Vec>, + env_triple: Vec>, + seen: HashMap, +} + +impl CompressionTimeAccum { + fn push(&mut self, mid: i64, r: CompressionTime, summary: &mut MigrationSummary) { + if let Some(&idx) = self.seen.get(&mid) { + summary.deduped += 1; + if self.value_ns[idx] != r.value_ns { + summary.deduped_with_conflict += 1; + } + return; + } + let idx = self.measurement_id.len(); + self.seen.insert(mid, idx); + self.measurement_id.push(mid); + self.commit_sha.push(r.commit_sha); + self.dataset.push(r.dataset); + self.dataset_variant.push(r.dataset_variant); + self.format.push(r.format); + self.op.push(r.op); + self.value_ns.push(r.value_ns); + self.all_runtimes_ns.push(r.all_runtimes_ns); + self.env_triple.push(r.env_triple); + } +} + +#[derive(Default)] +struct RandomAccessAccum { + measurement_id: Vec, + commit_sha: Vec, + dataset: Vec, + format: Vec, + value_ns: Vec, + all_runtimes_ns: Vec>, + env_triple: Vec>, + seen: HashMap, +} + +impl RandomAccessAccum { + fn push(&mut self, mid: i64, r: RandomAccessTime, summary: &mut MigrationSummary) { + if let Some(&idx) = self.seen.get(&mid) { + summary.deduped += 1; + if self.value_ns[idx] != r.value_ns { + summary.deduped_with_conflict += 1; + } + return; + } + let idx = self.measurement_id.len(); + self.seen.insert(mid, idx); + self.measurement_id.push(mid); + self.commit_sha.push(r.commit_sha); + self.dataset.push(r.dataset); + self.format.push(r.format); + self.value_ns.push(r.value_ns); + self.all_runtimes_ns.push(r.all_runtimes_ns); + self.env_triple.push(r.env_triple); + } +} + +/// `compression_sizes` is fed by both data.json.gz (replace-on-collision) +/// and file-sizes-*.json.gz (sum-on-collision). Stored as a map; converted +/// to a `RecordBatch` at flush time. +#[derive(Default)] +struct CompressionSizeAccum { + rows: HashMap, +} + +impl CompressionSizeAccum { + /// data.json.gz path: latest write wins, mirroring the prior + /// `ON CONFLICT DO UPDATE SET value_bytes = excluded.value_bytes`. + /// Bumps `deduped_with_conflict` when an existing row's + /// `value_bytes` differs from the incoming row's, so silent + /// value-corruption is observable. + fn push_replace(&mut self, mid: i64, r: CompressionSize, summary: &mut MigrationSummary) { + if let Some(existing) = self.rows.get(&mid) + && existing.value_bytes != r.value_bytes + { + summary.deduped_with_conflict += 1; + } + self.rows.insert(mid, r); + } + + /// file-sizes-*.json.gz path: per-file rows aggregate into one + /// `(commit, dataset, dataset_variant, format)` row by summing, + /// mirroring the prior `value_bytes = compression_sizes.value_bytes + /// + excluded.value_bytes`. + fn push_sum(&mut self, mid: i64, r: CompressionSize) { + let add = r.value_bytes; + self.rows + .entry(mid) + .and_modify(|x| x.value_bytes += add) + .or_insert(r); + } +} + +fn build_query_batch(a: QueryAccum) -> Result { + let schema = Arc::new(Schema::new(vec![ + Field::new("measurement_id", DataType::Int64, false), + Field::new("commit_sha", DataType::Utf8, false), + Field::new("dataset", DataType::Utf8, false), + Field::new("dataset_variant", DataType::Utf8, true), + Field::new("scale_factor", DataType::Utf8, true), + Field::new("query_idx", DataType::Int32, false), + Field::new("storage", DataType::Utf8, false), + Field::new("engine", DataType::Utf8, false), + Field::new("format", DataType::Utf8, false), + Field::new("value_ns", DataType::Int64, false), + Field::new( + "all_runtimes_ns", + DataType::List(Arc::new(Field::new("item", DataType::Int64, false))), + false, + ), + Field::new("peak_physical", DataType::Int64, true), + Field::new("peak_virtual", DataType::Int64, true), + Field::new("physical_delta", DataType::Int64, true), + Field::new("virtual_delta", DataType::Int64, true), + Field::new("env_triple", DataType::Utf8, true), + ])); + let cols: Vec = vec![ + Arc::new(Int64Array::from(a.measurement_id)), + Arc::new(StringArray::from(a.commit_sha)), + Arc::new(StringArray::from(a.dataset)), + Arc::new(StringArray::from(a.dataset_variant)), + Arc::new(StringArray::from(a.scale_factor)), + Arc::new(Int32Array::from(a.query_idx)), + Arc::new(StringArray::from(a.storage)), + Arc::new(StringArray::from(a.engine)), + Arc::new(StringArray::from(a.format)), + Arc::new(Int64Array::from(a.value_ns)), + Arc::new(build_list_int64(a.all_runtimes_ns)), + Arc::new(Int64Array::from(a.peak_physical)), + Arc::new(Int64Array::from(a.peak_virtual)), + Arc::new(Int64Array::from(a.physical_delta)), + Arc::new(Int64Array::from(a.virtual_delta)), + Arc::new(StringArray::from(a.env_triple)), + ]; + Ok(RecordBatch::try_new(schema, cols)?) +} + +fn build_compression_time_batch(a: CompressionTimeAccum) -> Result { + let schema = Arc::new(Schema::new(vec![ + Field::new("measurement_id", DataType::Int64, false), + Field::new("commit_sha", DataType::Utf8, false), + Field::new("dataset", DataType::Utf8, false), + Field::new("dataset_variant", DataType::Utf8, true), + Field::new("format", DataType::Utf8, false), + Field::new("op", DataType::Utf8, false), + Field::new("value_ns", DataType::Int64, false), + Field::new( + "all_runtimes_ns", + DataType::List(Arc::new(Field::new("item", DataType::Int64, false))), + false, + ), + Field::new("env_triple", DataType::Utf8, true), + ])); + let cols: Vec = vec![ + Arc::new(Int64Array::from(a.measurement_id)), + Arc::new(StringArray::from(a.commit_sha)), + Arc::new(StringArray::from(a.dataset)), + Arc::new(StringArray::from(a.dataset_variant)), + Arc::new(StringArray::from(a.format)), + Arc::new(StringArray::from(a.op)), + Arc::new(Int64Array::from(a.value_ns)), + Arc::new(build_list_int64(a.all_runtimes_ns)), + Arc::new(StringArray::from(a.env_triple)), + ]; + Ok(RecordBatch::try_new(schema, cols)?) +} + +fn build_random_access_batch(a: RandomAccessAccum) -> Result { + let schema = Arc::new(Schema::new(vec![ + Field::new("measurement_id", DataType::Int64, false), + Field::new("commit_sha", DataType::Utf8, false), + Field::new("dataset", DataType::Utf8, false), + Field::new("format", DataType::Utf8, false), + Field::new("value_ns", DataType::Int64, false), + Field::new( + "all_runtimes_ns", + DataType::List(Arc::new(Field::new("item", DataType::Int64, false))), + false, + ), + Field::new("env_triple", DataType::Utf8, true), + ])); + let cols: Vec = vec![ + Arc::new(Int64Array::from(a.measurement_id)), + Arc::new(StringArray::from(a.commit_sha)), + Arc::new(StringArray::from(a.dataset)), + Arc::new(StringArray::from(a.format)), + Arc::new(Int64Array::from(a.value_ns)), + Arc::new(build_list_int64(a.all_runtimes_ns)), + Arc::new(StringArray::from(a.env_triple)), + ]; + Ok(RecordBatch::try_new(schema, cols)?) +} + +fn build_compression_size_batch(a: CompressionSizeAccum) -> Result { + let n = a.rows.len(); + let mut measurement_id = Vec::with_capacity(n); + let mut commit_sha = Vec::with_capacity(n); + let mut dataset = Vec::with_capacity(n); + let mut dataset_variant = Vec::with_capacity(n); + let mut format = Vec::with_capacity(n); + let mut value_bytes = Vec::with_capacity(n); + for (mid, cs) in a.rows { + measurement_id.push(mid); + commit_sha.push(cs.commit_sha); + dataset.push(cs.dataset); + dataset_variant.push(cs.dataset_variant); + format.push(cs.format); + value_bytes.push(cs.value_bytes); + } + let schema = Arc::new(Schema::new(vec![ + Field::new("measurement_id", DataType::Int64, false), + Field::new("commit_sha", DataType::Utf8, false), + Field::new("dataset", DataType::Utf8, false), + Field::new("dataset_variant", DataType::Utf8, true), + Field::new("format", DataType::Utf8, false), + Field::new("value_bytes", DataType::Int64, false), + ])); + let cols: Vec = vec![ + Arc::new(Int64Array::from(measurement_id)), + Arc::new(StringArray::from(commit_sha)), + Arc::new(StringArray::from(dataset)), + Arc::new(StringArray::from(dataset_variant)), + Arc::new(StringArray::from(format)), + Arc::new(Int64Array::from(value_bytes)), + ]; + Ok(RecordBatch::try_new(schema, cols)?) +} + +/// Build a non-nullable `List` Arrow array from one inner Vec +/// per row. The outer list is non-null; inner i64 values are non-null. +fn build_list_int64(values: Vec>) -> ListArray { + let mut offsets: Vec = Vec::with_capacity(values.len() + 1); + offsets.push(0); + let mut flat: Vec = Vec::new(); + for inner in values { + flat.extend_from_slice(&inner); + offsets.push(flat.len() as i32); + } + let values_arr = Int64Array::from(flat); + let field = Arc::new(Field::new("item", DataType::Int64, false)); + ListArray::new( + field, + OffsetBuffer::new(offsets.into()), + Arc::new(values_arr), + None, + ) +} + +/// Print the summary in a human-readable form. Returned by the CLI. +impl std::fmt::Display for MigrationSummary { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + writeln!(f, "Records read: {}", self.records_read)?; + writeln!(f, "Commits upserted: {}", self.commits_inserted)?; + writeln!(f, "Commit warnings: {}", self.commit_warnings)?; + writeln!(f, "Inserted (query): {}", self.query_inserted)?; + writeln!( + f, + "Inserted (compress t): {}", + self.compression_time_inserted + )?; + writeln!( + f, + "Inserted (compress s): {}", + self.compression_size_inserted + )?; + writeln!(f, "Inserted (random acc): {}", self.random_access_inserted)?; + writeln!(f, "Inserted (file sizes): {}", self.file_size_inserted)?; + writeln!(f, "Missing commit: {}", self.missing_commit)?; + writeln!(f, "Skipped (no value): {}", self.skipped_no_value)?; + writeln!(f, "Skipped (intentional): {}", self.skipped_intentional)?; + writeln!(f, "Deduplicated: {}", self.deduped)?; + writeln!(f, "Dedup w/ value diff: {}", self.deduped_with_conflict)?; + writeln!( + f, + "Uncategorized: {} ({:.2}%)", + self.uncategorized, + 100.0 * self.uncategorized_fraction() + )?; + if !self.uncategorized_prefixes.is_empty() { + let mut top: Vec<_> = self.uncategorized_prefixes.iter().collect(); + top.sort_by(|a, b| b.1.cmp(a.1)); + writeln!(f, "Top uncategorized prefixes:")?; + for (prefix, n) in top.iter().take(20) { + writeln!(f, " {prefix:>32} : {n}")?; + } + } + Ok(()) + } +} diff --git a/benchmarks-website/migrate/src/source.rs b/benchmarks-website/migrate/src/source.rs new file mode 100644 index 00000000000..c18e86a63ca --- /dev/null +++ b/benchmarks-website/migrate/src/source.rs @@ -0,0 +1,140 @@ +// SPDX-License-Identifier: Apache-2.0 +// SPDX-FileCopyrightText: Copyright the Vortex contributors + +//! Streaming readers for v2's public S3 bucket. +//! +//! The bucket is `--no-sign-request`, so we fetch the underlying +//! HTTPS URL directly and stream-decompress with `flate2`. The +//! downloads are wrapped in [`reqwest::blocking`] to keep the read +//! path synchronous; the binary's hot path is single-threaded +//! per-source already (DuckDB is a single-writer). +//! +//! For tests and offline runs, [`Source::Local`] accepts a local +//! directory of dumps; the migrator's `--source` flag picks the +//! variant. + +use std::fs::File; +use std::io::BufRead; +use std::io::BufReader; +use std::io::Read; +use std::path::Path; +use std::path::PathBuf; + +use anyhow::Context as _; +use anyhow::Result; +use flate2::read::GzDecoder; +use tracing::info; + +/// Public S3 bucket the live v2 server reads from. +pub const PUBLIC_BUCKET_BASE: &str = "https://vortex-ci-benchmark-results.s3.amazonaws.com"; + +/// Where to read the v2 dataset from. Either the public S3 bucket +/// (the live deployment) or a local directory of dumps. +#[derive(Debug, Clone)] +pub enum Source { + /// HTTPS GETs against `s3.amazonaws.com`. + PublicS3, + /// A directory containing `data.json.gz`, `commits.json`, and + /// `file-sizes-*.json.gz` files. + Local(PathBuf), +} + +impl Source { + /// Short human-readable description for log messages. + pub fn describe(&self) -> String { + match self { + Source::PublicS3 => "public S3 bucket".to_string(), + Source::Local(p) => format!("local dir {}", p.display()), + } + } + + /// Open `data.json.gz` for streaming, decompressing on the fly. + pub fn open_data_jsonl(&self) -> Result> { + let stream = self.open_raw("data.json.gz")?; + Ok(Box::new(BufReader::new(GzDecoder::new(stream)))) + } + + /// Open `commits.json` (uncompressed). + pub fn open_commits_jsonl(&self) -> Result> { + let stream = self.open_raw("commits.json")?; + Ok(Box::new(BufReader::new(stream))) + } + + /// Enumerate `file-sizes-*.json.gz` files. For local sources this + /// is a directory glob; for the public bucket we hit the documented + /// suite ids. + pub fn list_file_sizes(&self) -> Result> { + match self { + Source::Local(dir) => { + let mut out = Vec::new(); + for entry in std::fs::read_dir(dir)? { + let entry = entry?; + let name = entry.file_name(); + let s = name.to_string_lossy(); + if s.starts_with("file-sizes-") && s.ends_with(".json.gz") { + out.push(s.into_owned()); + } + } + out.sort(); + Ok(out) + } + Source::PublicS3 => { + // The S3 bucket's ListObjects is denied for unsigned + // requests, so we hit the documented per-suite keys + // emitted by `.github/workflows/sql-benchmarks.yml`. + Ok(KNOWN_FILE_SIZES_SUITES + .iter() + .map(|id| format!("file-sizes-{id}.json.gz")) + .collect()) + } + } + } + + /// Open one `file-sizes-*.json.gz` for streaming. + pub fn open_file_sizes(&self, name: &str) -> Result> { + let stream = self.open_raw(name)?; + Ok(Box::new(BufReader::new(GzDecoder::new(stream)))) + } + + fn open_raw(&self, name: &str) -> Result> { + match self { + Source::Local(dir) => open_local(&dir.join(name)), + Source::PublicS3 => open_s3(name), + } + } +} + +fn open_local(path: &Path) -> Result> { + let f = File::open(path).with_context(|| format!("opening {}", path.display()))?; + Ok(Box::new(f)) +} + +fn open_s3(name: &str) -> Result> { + let url = format!("{PUBLIC_BUCKET_BASE}/{name}"); + info!(url = %url, "GET"); + let resp = reqwest::blocking::get(&url).with_context(|| format!("GET {url}"))?; + if !resp.status().is_success() { + anyhow::bail!("GET {url} returned {}", resp.status()); + } + Ok(Box::new(resp)) +} + +/// Suite IDs we know publish a `file-sizes-{id}.json.gz` to S3. +/// +/// Source of truth: the `matrix.id` values in +/// `.github/workflows/sql-benchmarks.yml`'s `benchmark_matrix` default. +/// The post-bench `file-sizes` step uploads `file-sizes-${{ matrix.id +/// }}.json.gz`, so this list must match those IDs verbatim. Adding a +/// new matrix entry to that workflow means adding the same ID here. +const KNOWN_FILE_SIZES_SUITES: &[&str] = &[ + "clickbench-nvme", + "tpch-nvme", + "tpch-s3", + "tpch-nvme-10", + "tpch-s3-10", + "tpcds-nvme", + "statpopgen", + "fineweb", + "fineweb-s3", + "polarsignals", +]; diff --git a/benchmarks-website/migrate/src/v2.rs b/benchmarks-website/migrate/src/v2.rs new file mode 100644 index 00000000000..2a9d3bdf5d0 --- /dev/null +++ b/benchmarks-website/migrate/src/v2.rs @@ -0,0 +1,142 @@ +// SPDX-License-Identifier: Apache-2.0 +// SPDX-FileCopyrightText: Copyright the Vortex contributors + +//! Wire shapes of the v2 benchmark dataset on S3. +//! +//! These types capture only the fields the migrator reads. v2 records +//! are serialized by `vortex-bench` (see `vortex-bench/src/measurements.rs`) +//! and by older non-Rust scripts; the union of fields is loose, so we +//! deserialize permissively (`serde(default)`, untyped `serde_json::Value` +//! for the polymorphic `dataset` field). + +use std::collections::BTreeMap; + +use serde::Deserialize; + +/// One JSONL line of `data.json.gz`. +/// +/// The shape is the union of every emitter's output. Most fields are +/// optional because different benches emit different subsets. +#[derive(Debug, Clone, Deserialize)] +pub struct V2Record { + pub name: String, + #[serde(default)] + pub commit_id: Option, + #[serde(default)] + pub unit: Option, + #[serde(default)] + pub value: Option, + #[serde(default)] + pub storage: Option, + #[serde(default)] + pub dataset: Option, + #[serde(default)] + pub all_runtimes: Option>, + #[serde(default)] + pub env_triple: Option, +} + +/// `dataset` in v2 records is sometimes a string, sometimes an object +/// keyed by suite name (`{ "tpch": { "scale_factor": "10" } }`). +/// This helper looks up the scale factor for a given suite without +/// assuming a particular shape. +pub fn dataset_scale_factor(dataset: &serde_json::Value, key: &str) -> Option { + let obj = dataset.as_object()?; + let entry = obj.get(key)?; + let sf = entry.get("scale_factor")?; + match sf { + serde_json::Value::String(s) => Some(s.clone()), + serde_json::Value::Number(n) => Some(n.to_string()), + _ => None, + } +} + +/// Best-effort numeric coercion for the polymorphic `value` field. +pub fn value_as_f64(value: &serde_json::Value) -> Option { + match value { + serde_json::Value::Number(n) => n.as_f64(), + serde_json::Value::String(s) => s.parse().ok(), + _ => None, + } +} + +/// Best-effort coercion of a runtime entry to nanoseconds. +pub fn runtime_as_i64(value: &serde_json::Value) -> Option { + match value { + serde_json::Value::Number(n) => { + if let Some(i) = n.as_i64() { + Some(i) + } else { + n.as_f64().map(|f| f as i64) + } + } + serde_json::Value::String(s) => s.parse().ok(), + _ => None, + } +} + +/// Triple block as emitted by `vortex-bench`'s `--gh-json` path. v2 +/// stored it as an object; we serialize it back out as `arch-os-env`. +#[derive(Debug, Clone, Deserialize)] +pub struct V2EnvTriple { + #[serde(default)] + pub architecture: Option, + #[serde(default)] + pub operating_system: Option, + #[serde(default)] + pub environment: Option, +} + +impl V2EnvTriple { + /// Format as the `arch-os-env` triple used by v3's `env_triple` column. + pub fn to_triple(&self) -> Option { + let arch = self.architecture.as_deref()?; + let os = self.operating_system.as_deref()?; + let env = self.environment.as_deref()?; + Some(format!("{arch}-{os}-{env}")) + } +} + +/// One JSONL line of `commits.json`. +#[derive(Debug, Clone, Deserialize)] +pub struct V2Commit { + pub id: String, + #[serde(default)] + pub timestamp: Option, + #[serde(default)] + pub message: Option, + #[serde(default)] + pub author: Option, + #[serde(default)] + pub committer: Option, + #[serde(default)] + pub tree_id: Option, + #[serde(default)] + pub url: Option, +} + +#[derive(Debug, Clone, Deserialize)] +pub struct V2Person { + #[serde(default)] + pub name: Option, + #[serde(default)] + pub email: Option, +} + +/// One JSONL line of `file-sizes-*.json.gz` produced by +/// `scripts/capture-file-sizes.py`. +#[derive(Debug, Clone, Deserialize)] +pub struct V2FileSize { + pub commit_id: String, + pub benchmark: String, + #[serde(default)] + pub scale_factor: Option, + pub format: String, + pub file: String, + pub size_bytes: i64, +} + +/// Build a sha-keyed map of commits. +pub fn index_commits(commits: Vec) -> BTreeMap { + commits.into_iter().map(|c| (c.id.clone(), c)).collect() +} diff --git a/benchmarks-website/migrate/src/verify.rs b/benchmarks-website/migrate/src/verify.rs new file mode 100644 index 00000000000..eb4caef6df7 --- /dev/null +++ b/benchmarks-website/migrate/src/verify.rs @@ -0,0 +1,350 @@ +// SPDX-License-Identifier: Apache-2.0 +// SPDX-FileCopyrightText: Copyright the Vortex contributors + +//! Structural diff between a migrated v3 DuckDB and the live v2 +//! `/api/metadata` endpoint. +//! +//! Compares group / chart structure only; values aren't compared +//! because v2 converts ns → ms and bytes → MiB on read while v3 +//! stores raw and the chart query divides. Group/chart structural +//! equivalence is enough to spot classifier regressions before +//! cutover. + +use std::collections::BTreeMap; +use std::collections::BTreeSet; +use std::path::Path; + +use anyhow::Context as _; +use anyhow::Result; +use duckdb::Connection; +use serde::Deserialize; + +use crate::classifier::QUERY_SUITES; + +/// Result of one `verify` run. +#[derive(Debug, Default)] +pub struct VerifyReport { + pub matched_groups: Vec, + pub only_in_v3: Vec, + pub only_in_v2: Vec, + pub chart_diffs: Vec, +} + +#[derive(Debug, Clone)] +pub struct ChartDiff { + pub group: String, + pub v2_count: usize, + pub v3_count: usize, +} + +impl VerifyReport { + /// True if every v2 group is represented in v3. The CLI's exit + /// code reflects this. + pub fn v2_groups_covered(&self) -> bool { + self.only_in_v2.is_empty() + } +} + +impl std::fmt::Display for VerifyReport { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + writeln!(f, "Groups in both v2 and v3:")?; + for g in &self.matched_groups { + writeln!(f, " + {g}")?; + } + if !self.only_in_v2.is_empty() { + writeln!(f, "Groups only in v2 (regression candidates):")?; + for g in &self.only_in_v2 { + writeln!(f, " - {g}")?; + } + } + if !self.only_in_v3.is_empty() { + writeln!(f, "Groups only in v3:")?; + for g in &self.only_in_v3 { + writeln!(f, " + {g}")?; + } + } + if !self.chart_diffs.is_empty() { + writeln!(f, "Chart count diffs:")?; + for d in &self.chart_diffs { + writeln!( + f, + " {} : v2={} v3={} (delta={})", + d.group, + d.v2_count, + d.v3_count, + d.v3_count as i64 - d.v2_count as i64, + )?; + } + } + Ok(()) + } +} + +/// v2's `/api/metadata` reply — only the fields we need. +#[derive(Debug, Deserialize)] +struct V2Metadata { + groups: BTreeMap, +} + +#[derive(Debug, Deserialize)] +struct V2GroupMeta { + #[serde(default)] + charts: Vec, +} + +#[derive(Debug, Deserialize)] +struct V2ChartMeta { + #[serde(default)] + name: String, +} + +/// Open the migrated DuckDB at `duckdb_path`, fetch `/api/metadata`, +/// and produce a structural diff. +pub fn run(v2_server: &str, duckdb_path: &Path) -> Result { + let v3 = collect_v3_groups(duckdb_path)?; + let v2 = fetch_v2_metadata(v2_server)?; + Ok(diff(&v2, &v3)) +} + +fn collect_v3_groups(duckdb_path: &Path) -> Result>> { + let conn = Connection::open(duckdb_path) + .with_context(|| format!("opening DuckDB at {}", duckdb_path.display()))?; + let mut groups: BTreeMap> = BTreeMap::new(); + + // query_measurements: chart per (dataset, query_idx); group per + // (dataset, dataset_variant, scale_factor, storage). We want v2 + // group display names so the verifier can compare apples to + // apples, so we re-format them here using the same suite table. + let mut stmt = conn.prepare( + r#" + SELECT dataset, dataset_variant, scale_factor, storage, query_idx + FROM query_measurements + GROUP BY dataset, dataset_variant, scale_factor, storage, query_idx + "#, + )?; + let rows = stmt.query_map([], |row| { + Ok(( + row.get::<_, String>(0)?, + row.get::<_, Option>(1)?, + row.get::<_, Option>(2)?, + row.get::<_, String>(3)?, + row.get::<_, i32>(4)?, + )) + })?; + for row in rows { + let (dataset, _variant, sf, storage, query_idx) = row?; + let group_name = display_query_group(&dataset, sf.as_deref(), &storage); + let chart_name = chart_name_query(&dataset, query_idx); + groups + .entry(group_name) + .or_default() + .insert(normalize_chart(&chart_name)); + } + + // compression_times: group "Compression", charts per dataset. + let mut stmt = conn.prepare( + r#" + SELECT dataset, format, op + FROM compression_times + GROUP BY dataset, format, op + "#, + )?; + let rows = stmt.query_map([], |row| { + Ok(( + row.get::<_, String>(0)?, + row.get::<_, String>(1)?, + row.get::<_, String>(2)?, + )) + })?; + for row in rows { + let (dataset, format, op) = row?; + let chart = chart_name_compression_time(&format, &op, &dataset); + groups + .entry("Compression".to_string()) + .or_default() + .insert(normalize_chart(&chart)); + } + + let mut stmt = conn.prepare( + r#" + SELECT dataset, format + FROM compression_sizes + GROUP BY dataset, format + "#, + )?; + let rows = stmt.query_map([], |row| { + Ok((row.get::<_, String>(0)?, row.get::<_, String>(1)?)) + })?; + for row in rows { + let (_dataset, format) = row?; + let chart = chart_name_compression_size(&format); + groups + .entry("Compression Size".to_string()) + .or_default() + .insert(normalize_chart(&chart)); + } + + let mut stmt = conn.prepare( + r#" + SELECT DISTINCT dataset + FROM random_access_times + "#, + )?; + let rows = stmt.query_map([], |row| row.get::<_, String>(0))?; + for row in rows { + let dataset = row?; + groups + .entry("Random Access".to_string()) + .or_default() + .insert(normalize_chart(&dataset)); + } + + Ok(groups) +} + +fn fetch_v2_metadata(server: &str) -> Result>> { + let url = format!("{}/api/metadata", server.trim_end_matches('/')); + let body = reqwest::blocking::get(&url) + .with_context(|| format!("GET {url}"))? + .error_for_status() + .with_context(|| format!("non-2xx from {url}"))? + .json::() + .with_context(|| format!("parsing {url} as v2 /api/metadata"))?; + let mut out: BTreeMap> = BTreeMap::new(); + for (name, group) in body.groups { + let charts = group + .charts + .into_iter() + .map(|c| normalize_chart(&c.name)) + .collect(); + out.insert(name, charts); + } + Ok(out) +} + +fn diff( + v2: &BTreeMap>, + v3: &BTreeMap>, +) -> VerifyReport { + let mut report = VerifyReport::default(); + let v2_keys: BTreeSet<&String> = v2.keys().collect(); + let v3_keys: BTreeSet<&String> = v3.keys().collect(); + for g in v2_keys.intersection(&v3_keys) { + report.matched_groups.push((**g).clone()); + let v2_charts = &v2[*g]; + let v3_charts = &v3[*g]; + if v2_charts.len() != v3_charts.len() { + report.chart_diffs.push(ChartDiff { + group: (**g).clone(), + v2_count: v2_charts.len(), + v3_count: v3_charts.len(), + }); + } + } + for g in v3_keys.difference(&v2_keys) { + report.only_in_v3.push((**g).clone()); + } + for g in v2_keys.difference(&v3_keys) { + report.only_in_v2.push((**g).clone()); + } + report.matched_groups.sort(); + report.only_in_v3.sort(); + report.only_in_v2.sort(); + report +} + +fn display_query_group(dataset: &str, scale_factor: Option<&str>, storage: &str) -> String { + let suite = QUERY_SUITES + .iter() + .find(|s| s.prefix.eq_ignore_ascii_case(dataset)) + .copied(); + match suite { + Some(suite) if suite.fan_out => { + let storage_disp = match storage { + "s3" | "S3" => "S3", + _ => "NVMe", + }; + let sf = scale_factor.unwrap_or("1"); + format!("{} ({}) (SF={})", suite.display_name, storage_disp, sf) + } + Some(suite) => suite.display_name.to_string(), + None => format!("{dataset} ({storage})"), + } +} + +fn chart_name_query(dataset: &str, query_idx: i32) -> String { + let suite = QUERY_SUITES + .iter() + .find(|s| s.prefix.eq_ignore_ascii_case(dataset)) + .copied(); + match suite { + Some(suite) => format!("{} Q{}", suite.query_prefix, query_idx), + None => format!("{} Q{}", dataset.to_uppercase(), query_idx), + } +} + +fn chart_name_compression_time(format: &str, op: &str, _dataset: &str) -> String { + // Re-derive the v2 chart name (the metric, not the dataset) so we + // can compare. v2's chart axis is the metric; series is the + // dataset. v3 inverts that. For structural comparison, we project + // back to v2's per-chart key. + match (format, op) { + ("vortex-file-compressed", "encode") => "COMPRESS TIME".into(), + ("vortex-file-compressed", "decode") => "DECOMPRESS TIME".into(), + ("parquet", "encode") => "PARQUET RS ZSTD COMPRESS TIME".into(), + ("parquet", "decode") => "PARQUET RS ZSTD DECOMPRESS TIME".into(), + ("lance", "encode") => "LANCE COMPRESS TIME".into(), + ("lance", "decode") => "LANCE DECOMPRESS TIME".into(), + _ => format!("{} {} TIME", format.to_uppercase(), op.to_uppercase()), + } +} + +fn chart_name_compression_size(format: &str) -> String { + match format { + "vortex-file-compressed" => "VORTEX SIZE".into(), + "parquet" => "PARQUET SIZE".into(), + "lance" => "LANCE SIZE".into(), + _ => format!("{} SIZE", format.to_uppercase()), + } +} + +/// Strip casing and `_-` differences between v2 and v3 chart names. +/// v2 displays uppercase; v3 stores raw values. Comparing in this +/// canonical form is enough for structural verification. +fn normalize_chart(s: &str) -> String { + s.trim() + .to_uppercase() + .replace(['_', '-'], " ") + .split_whitespace() + .collect::>() + .join(" ") +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn normalize_chart_canonicalizes() { + assert_eq!(normalize_chart("taxi/take"), "TAXI/TAKE"); + assert_eq!(normalize_chart("TAXI/TAKE"), "TAXI/TAKE"); + assert_eq!(normalize_chart("tpc-h q1"), "TPC H Q1"); + assert_eq!(normalize_chart("tpc h q1"), "TPC H Q1"); + } + + #[test] + fn display_query_group_handles_fan_out() { + assert_eq!( + display_query_group("tpch", Some("10"), "s3"), + "TPC-H (S3) (SF=10)" + ); + assert_eq!( + display_query_group("tpch", Some("100"), "nvme"), + "TPC-H (NVMe) (SF=100)" + ); + assert_eq!( + display_query_group("clickbench", None, "nvme"), + "Clickbench" + ); + } +} diff --git a/benchmarks-website/migrate/tests/classifier.rs b/benchmarks-website/migrate/tests/classifier.rs new file mode 100644 index 00000000000..e4bb3991940 --- /dev/null +++ b/benchmarks-website/migrate/tests/classifier.rs @@ -0,0 +1,439 @@ +// SPDX-License-Identifier: Apache-2.0 +// SPDX-FileCopyrightText: Copyright the Vortex contributors + +//! Classifier behavior pinned by representative v2 names from each +//! group in v2's `getGroup` classifier. + +use rstest::rstest; +use serde_json::json; +use vortex_bench_migrate::classifier::Outcome; +use vortex_bench_migrate::classifier::Skip; +use vortex_bench_migrate::classifier::V3Bin; +use vortex_bench_migrate::classifier::classify; +use vortex_bench_migrate::classifier::classify_outcome; +use vortex_bench_migrate::classifier::format_query; +use vortex_bench_migrate::classifier::rename_engine; +use vortex_bench_migrate::v2::V2Record; + +fn record(name: &str) -> V2Record { + V2Record { + name: name.to_string(), + commit_id: Some("deadbeef".into()), + unit: Some("ns".into()), + value: Some(json!(123)), + storage: None, + dataset: None, + all_runtimes: None, + env_triple: None, + } +} + +fn record_with_storage_and_sf(name: &str, storage: &str, suite: &str, sf: &str) -> V2Record { + let mut r = record(name); + r.storage = Some(storage.into()); + r.dataset = Some(json!({ suite: { "scale_factor": sf } })); + r +} + +#[rstest] +#[case::clickbench( + "clickbench_q07/datafusion:parquet", + V3Bin::Query { + dataset: "clickbench".into(), + dataset_variant: None, + scale_factor: None, + query_idx: 7, + storage: "nvme".into(), + engine: "datafusion".into(), + format: "parquet".into(), + }, +)] +#[case::clickbench_vortex_renamed( + "clickbench_q12/datafusion:vortex-file-compressed", + V3Bin::Query { + dataset: "clickbench".into(), + dataset_variant: None, + scale_factor: None, + query_idx: 12, + storage: "nvme".into(), + engine: "datafusion".into(), + format: "vortex-file-compressed".into(), + }, +)] +#[case::statpopgen( + "statpopgen_q3/datafusion:parquet", + V3Bin::Query { + dataset: "statpopgen".into(), + dataset_variant: None, + scale_factor: None, + query_idx: 3, + storage: "nvme".into(), + engine: "datafusion".into(), + format: "parquet".into(), + }, +)] +#[case::polarsignals( + "polarsignals_q1/duckdb:parquet", + V3Bin::Query { + dataset: "polarsignals".into(), + dataset_variant: None, + scale_factor: None, + query_idx: 1, + storage: "nvme".into(), + engine: "duckdb".into(), + format: "parquet".into(), + }, +)] +fn non_fan_out_query_records(#[case] name: &str, #[case] expected: V3Bin) { + let r = record(name); + assert_eq!(classify(&r), Some(expected)); +} + +#[rstest] +#[case::tpch_s3_sf100( + "tpch_q01/datafusion:parquet", + "S3", + "tpch", + "100", + V3Bin::Query { + dataset: "tpch".into(), + dataset_variant: None, + scale_factor: Some("100".into()), + query_idx: 1, + storage: "s3".into(), + engine: "datafusion".into(), + format: "parquet".into(), + }, +)] +#[case::tpch_nvme_sf1( + "tpch_q22/duckdb:vortex-file-compressed", + "NVMe", + "tpch", + "1", + V3Bin::Query { + dataset: "tpch".into(), + dataset_variant: None, + scale_factor: Some("1".into()), + query_idx: 22, + storage: "nvme".into(), + engine: "duckdb".into(), + format: "vortex-file-compressed".into(), + }, +)] +#[case::tpcds_nvme_sf10( + "tpcds_q05/datafusion:vortex-file-compressed", + "NVMe", + "tpcds", + "10", + V3Bin::Query { + dataset: "tpcds".into(), + dataset_variant: None, + scale_factor: Some("10".into()), + query_idx: 5, + storage: "nvme".into(), + engine: "datafusion".into(), + format: "vortex-file-compressed".into(), + }, +)] +fn fan_out_query_records( + #[case] name: &str, + #[case] storage: &str, + #[case] suite: &str, + #[case] sf: &str, + #[case] expected: V3Bin, +) { + let r = record_with_storage_and_sf(name, storage, suite, sf); + assert_eq!(classify(&r), Some(expected)); +} + +#[rstest] +#[case::random_access_4_part( + "random-access/taxi/take/parquet-tokio-local-disk", + V3Bin::RandomAccess { + dataset: "taxi/take".into(), + format: "parquet".into(), + }, +)] +#[case::random_access_4_part_vortex( + "random-access/chimp/take/vortex-tokio-local-disk", + V3Bin::RandomAccess { + dataset: "chimp/take".into(), + format: "vortex-file-compressed".into(), + }, +)] +#[case::random_access_2_part_legacy( + "random-access/parquet-tokio-local-disk", + V3Bin::RandomAccess { + dataset: "random access".into(), + format: "parquet".into(), + }, +)] +#[case::random_access_4_part_lance( + "random-access/taxi/take/lance-tokio-local-disk", + V3Bin::RandomAccess { + dataset: "taxi/take".into(), + format: "lance".into(), + }, +)] +fn random_access_records(#[case] name: &str, #[case] expected: V3Bin) { + let r = record(name); + assert_eq!(classify(&r), Some(expected)); +} + +#[rstest] +#[case::compress_time_vortex( + "compress time/clickbench", + V3Bin::CompressionTime { + dataset: "clickbench".into(), + dataset_variant: None, + format: "vortex-file-compressed".into(), + op: "encode".into(), + }, +)] +#[case::decompress_time_vortex( + "decompress time/tpch_lineitem", + V3Bin::CompressionTime { + dataset: "tpch_lineitem".into(), + dataset_variant: None, + format: "vortex-file-compressed".into(), + op: "decode".into(), + }, +)] +#[case::parquet_compress( + "parquet_rs-zstd compress time/clickbench", + V3Bin::CompressionTime { + dataset: "clickbench".into(), + dataset_variant: None, + format: "parquet".into(), + op: "encode".into(), + }, +)] +#[case::lance_decompress( + "lance decompress time/clickbench", + V3Bin::CompressionTime { + dataset: "clickbench".into(), + dataset_variant: None, + format: "lance".into(), + op: "decode".into(), + }, +)] +fn compression_time_records(#[case] name: &str, #[case] expected: V3Bin) { + let r = record(name); + assert_eq!(classify(&r), Some(expected)); +} + +#[rstest] +#[case::vortex_size( + "vortex size/clickbench", + V3Bin::CompressionSize { + dataset: "clickbench".into(), + dataset_variant: None, + format: "vortex-file-compressed".into(), + }, +)] +#[case::vortex_file_compressed_size_normalizes( + "vortex-file-compressed size/clickbench", + V3Bin::CompressionSize { + dataset: "clickbench".into(), + dataset_variant: None, + format: "vortex-file-compressed".into(), + }, +)] +#[case::parquet_size( + "parquet size/clickbench", + V3Bin::CompressionSize { + dataset: "clickbench".into(), + dataset_variant: None, + format: "parquet".into(), + }, +)] +#[case::lance_size( + "lance size/tpch_lineitem", + V3Bin::CompressionSize { + dataset: "tpch_lineitem".into(), + dataset_variant: None, + format: "lance".into(), + }, +)] +fn compression_size_records(#[case] name: &str, #[case] expected: V3Bin) { + let r = record(name); + assert_eq!(classify(&r), Some(expected)); +} + +#[rstest] +#[case::ratio_vortex_parquet("vortex:parquet-zstd ratio compress time/clickbench")] +#[case::ratio_vortex_lance("vortex:lance ratio decompress time/clickbench")] +#[case::ratio_size_vortex_parquet("vortex:parquet-zstd size/clickbench")] +#[case::ratio_size_vortex_raw("vortex:raw size/clickbench")] +#[case::throughput("compress throughput/clickbench")] +#[case::nonsense_prefix("not-a-known-bench/series")] +fn unmapped_records_yield_none(#[case] name: &str) { + let r = record(name); + assert_eq!( + classify(&r), + None, + "expected {name:?} to classify as None (drop)", + ); +} + +#[test] +fn parquet_zstd_size_is_deprecated() { + // `parquet-zstd` is not on the v3 emitter's format allowlist, so + // historical `parquet-zstd size/...` records bucket under + // Skip::Deprecated and don't render as orphan charts in v3. + let r = record("parquet-zstd size/clickbench"); + assert!(matches!( + classify_outcome(&r), + Outcome::Skip(Skip::Deprecated) + )); +} + +#[test] +fn vortex_parquet_zstd_ratio_is_intentional_skip() { + let r = record("vortex:parquet-zstd ratio compress time/clickbench"); + assert!(matches!( + classify_outcome(&r), + Outcome::Skip(Skip::DerivedRatio) + )); +} + +#[test] +fn vortex_parquet_zst_typo_ratio_is_intentional_skip() { + // `parquet-zst` (no trailing `d`) was emitted by some v2 runs. + // Both spellings should classify as derived ratios. + for name in [ + "vortex:parquet-zst ratio compress time/clickbench", + "vortex:parquet-zst ratio decompress time/clickbench", + ] { + let r = record(name); + assert!( + matches!(classify_outcome(&r), Outcome::Skip(Skip::DerivedRatio)), + "{name:?} should be DerivedRatio", + ); + } +} + +#[test] +fn throughput_is_intentional_skip() { + let r = record("compress throughput/clickbench"); + assert!(matches!( + classify_outcome(&r), + Outcome::Skip(Skip::Throughput) + )); +} + +#[test] +fn unknown_prefix_is_unknown() { + let r = record("not-a-known-bench/series"); + assert!(matches!(classify_outcome(&r), Outcome::Unknown)); +} + +#[test] +fn gharchive_q00_is_deprecated() { + // gharchive isn't on the v3 query-suite allowlist, so historical + // gharchive query records bucket as Skip::Deprecated. + let r = record("gharchive_q00/datafusion:parquet"); + assert!(matches!( + classify_outcome(&r), + Outcome::Skip(Skip::Deprecated) + )); +} + +#[test] +fn fineweb_q00_classifies() { + // fineweb is on V3_QUERY_SUITES (still emitted by v3 CI per + // .github/workflows/sql-benchmarks.yml's `fineweb` matrix entry), + // so historical fineweb records ingest like any other suite. + let r = record("fineweb_q00/datafusion:parquet"); + assert!(matches!( + classify_outcome(&r), + Outcome::Bin(V3Bin::Query { .. }) + )); +} + +#[test] +fn memory_record_is_historical_memory_skip() { + // v2 emitted `_q_memory/:` records that + // carry top-level memory fields V2Record doesn't deserialize. + // Skip them with a known variant so they don't trip the 5% gate. + let r = record("clickbench_q07_memory/datafusion:parquet"); + assert!(matches!( + classify_outcome(&r), + Outcome::Skip(Skip::HistoricalMemory) + )); +} + +#[test] +fn tpch_compression_size_carries_scale_factor() { + // The data.json.gz "vortex size/tpch" path needs to derive + // dataset_variant from the v2 record's `dataset` object, the same + // way the file-sizes path does. Otherwise SF=10 rows from the two + // sources never collide on `mid` and produce duplicate rows. + let mut r = record("vortex size/tpch"); + r.dataset = Some(serde_json::json!({ "tpch": { "scale_factor": "10" } })); + let outcome = classify_outcome(&r); + let Outcome::Bin(V3Bin::CompressionSize { + dataset, + dataset_variant, + format, + }) = outcome + else { + panic!("expected Bin(CompressionSize), got {outcome:?}"); + }; + assert_eq!(dataset, "tpch"); + assert_eq!(dataset_variant, Some("10".into())); + assert_eq!(format, "vortex-file-compressed"); +} + +#[test] +fn tpch_compression_size_drops_default_scale_factor() { + // SF "1.0" matches the file-sizes path's filter and collapses to + // dataset_variant: None. + let mut r = record("vortex size/tpch"); + r.dataset = Some(serde_json::json!({ "tpch": { "scale_factor": "1.0" } })); + let outcome = classify_outcome(&r); + let Outcome::Bin(V3Bin::CompressionSize { + dataset_variant, .. + }) = outcome + else { + panic!("expected Bin(CompressionSize), got {outcome:?}"); + }; + assert_eq!(dataset_variant, None); +} + +#[test] +fn engine_casing_lowercased() { + // Older v2 records emitted display-case engines like `DataFusion` + // and `DuckDB`. The classifier lowercases at push time so dedup + // collapses display-case rows into the canonical lowercase ones. + let r = record("clickbench_q07/DataFusion:parquet"); + let outcome = classify_outcome(&r); + let Outcome::Bin(V3Bin::Query { engine, format, .. }) = outcome else { + panic!("expected Bin(Query), got {outcome:?}"); + }; + assert_eq!(engine, "datafusion"); + assert_eq!(format, "parquet"); +} + +#[test] +fn rename_engine_pins_canonical_outputs() { + assert_eq!(rename_engine("vortex-tokio-local-disk"), "vortex-nvme"); + assert_eq!( + rename_engine("datafusion:vortex-file-compressed"), + "datafusion:vortex" + ); + assert_eq!(rename_engine("LANCE"), "lance"); +} + +#[test] +fn format_query_pins_v2_display() { + assert_eq!(format_query("clickbench_q00"), "CLICKBENCH Q0"); + assert_eq!(format_query("tpch_q22"), "TPC-H Q22"); + assert_eq!(format_query("tpcds_q42"), "TPC-DS Q42"); + assert_eq!(format_query("polarsignals_q1"), "POLARSIGNALS Q1"); + // Names that don't match a suite fall back to upper + " " replace. + assert_eq!( + format_query("vortex-file-compressed size"), + "VORTEX FILE COMPRESSED SIZE" + ); +} diff --git a/benchmarks-website/migrate/tests/end_to_end.rs b/benchmarks-website/migrate/tests/end_to_end.rs new file mode 100644 index 00000000000..210092a4058 --- /dev/null +++ b/benchmarks-website/migrate/tests/end_to_end.rs @@ -0,0 +1,263 @@ +// SPDX-License-Identifier: Apache-2.0 +// SPDX-FileCopyrightText: Copyright the Vortex contributors + +//! Inline JSONL fixtures driven through the full migration into a +//! tempdir DuckDB. No live S3. + +use std::fs::File; +use std::io::Write; +use std::path::Path; + +use duckdb::Connection; +use flate2::Compression; +use flate2::write::GzEncoder; +use tempfile::TempDir; +use vortex_bench_migrate::migrate; +use vortex_bench_migrate::source::Source; + +const COMMITS_JSONL: &str = r#"{"id":"deadbeef","timestamp":"2026-04-25T00:00:00Z","message":"fixture commit","author":{"name":"A","email":"a@example.com"},"committer":{"name":"C","email":"c@example.com"},"tree_id":"abcd0001","url":"https://example.com/commit/deadbeef"} +"#; + +const DATA_JSONL: &str = r#"{"name":"clickbench_q07/datafusion:parquet","commit_id":"deadbeef","unit":"ns","value":42000,"all_runtimes":[41000,42000,43000]} +{"name":"compress time/clickbench","commit_id":"deadbeef","unit":"ns","value":99} +{"name":"vortex size/clickbench","commit_id":"deadbeef","unit":"bytes","value":1024} +{"name":"random-access/taxi/take/parquet-tokio-local-disk","commit_id":"deadbeef","unit":"ns","value":777,"all_runtimes":[700,777,800]} +"#; + +/// Build a local-source fixture directory. Caller supplies the contents +/// of `commits.json`, `data.json.gz`, and any number of +/// `file-sizes-*.json.gz` files (name → contents). +fn build_fixture(commits: &str, data: &str, file_sizes: &[(&str, &str)]) -> TempDir { + let dir = TempDir::new().expect("tempdir"); + write_text(&dir.path().join("commits.json"), commits); + write_gz(&dir.path().join("data.json.gz"), data); + for (name, body) in file_sizes { + write_gz(&dir.path().join(name), body); + } + dir +} + +fn write_text(path: &Path, body: &str) { + let mut f = File::create(path).unwrap(); + f.write_all(body.as_bytes()).unwrap(); +} + +fn write_gz(path: &Path, body: &str) { + let f = File::create(path).unwrap(); + let mut gz = GzEncoder::new(f, Compression::default()); + gz.write_all(body.as_bytes()).unwrap(); + gz.finish().unwrap(); +} + +#[test] +fn migrate_inline_fixture_populates_each_table() { + let src_dir = build_fixture(COMMITS_JSONL, DATA_JSONL, &[]); + let target_dir = TempDir::new().unwrap(); + let target = target_dir.path().join("v3.duckdb"); + + let summary = migrate::run(&Source::Local(src_dir.path().into()), &target).unwrap(); + + assert_eq!(summary.records_read, 4, "summary={summary}"); + assert_eq!(summary.uncategorized, 0, "summary={summary}"); + assert_eq!(summary.commits_inserted, 1); + assert_eq!(summary.query_inserted, 1); + assert_eq!(summary.compression_time_inserted, 1); + assert_eq!(summary.compression_size_inserted, 1); + assert_eq!(summary.random_access_inserted, 1); + + let conn = Connection::open(&target).unwrap(); + let count = |table: &str| -> i64 { + conn.query_row(&format!("SELECT COUNT(*) FROM {table}"), [], |r| r.get(0)) + .unwrap() + }; + assert_eq!(count("commits"), 1); + assert_eq!(count("query_measurements"), 1); + assert_eq!(count("compression_times"), 1); + assert_eq!(count("compression_sizes"), 1); + assert_eq!(count("random_access_times"), 1); + + // Spot-check the v3 column values for each kind. + let (engine, format, query_idx, value_ns): (String, String, i32, i64) = conn + .query_row( + "SELECT engine, format, query_idx, value_ns FROM query_measurements", + [], + |r| Ok((r.get(0)?, r.get(1)?, r.get(2)?, r.get(3)?)), + ) + .unwrap(); + assert_eq!(engine, "datafusion"); + assert_eq!(format, "parquet"); + assert_eq!(query_idx, 7); + assert_eq!(value_ns, 42000); + + let (dataset, format, op): (String, String, String) = conn + .query_row( + "SELECT dataset, format, op FROM compression_times", + [], + |r| Ok((r.get(0)?, r.get(1)?, r.get(2)?)), + ) + .unwrap(); + assert_eq!(dataset, "clickbench"); + assert_eq!(format, "vortex-file-compressed"); + assert_eq!(op, "encode"); + + let (dataset, format, value_bytes): (String, String, i64) = conn + .query_row( + "SELECT dataset, format, value_bytes FROM compression_sizes", + [], + |r| Ok((r.get(0)?, r.get(1)?, r.get(2)?)), + ) + .unwrap(); + assert_eq!(dataset, "clickbench"); + assert_eq!(format, "vortex-file-compressed"); + assert_eq!(value_bytes, 1024); + + let (dataset, format): (String, String) = conn + .query_row("SELECT dataset, format FROM random_access_times", [], |r| { + Ok((r.get(0)?, r.get(1)?)) + }) + .unwrap(); + assert_eq!(dataset, "taxi/take"); + assert_eq!(format, "parquet"); +} + +#[test] +fn dedup_collision_keeps_one_row() { + // Two data.json.gz lines whose query-measurement dim columns are + // identical (same commit / dataset / engine / format / query_idx, + // and `storage` collapses to "nvme" since `storage` is unset). + // Different `value`s. The accumulator's HashSet + // should drop the second one and bump `summary.deduped`. + const DATA: &str = r#"{"name":"clickbench_q07/datafusion:parquet","commit_id":"deadbeef","unit":"ns","value":111} +{"name":"clickbench_q07/datafusion:parquet","commit_id":"deadbeef","unit":"ns","value":222} +"#; + + let src_dir = build_fixture(COMMITS_JSONL, DATA, &[]); + let target_dir = TempDir::new().unwrap(); + let target = target_dir.path().join("v3.duckdb"); + + let summary = migrate::run(&Source::Local(src_dir.path().into()), &target).unwrap(); + + assert_eq!(summary.records_read, 2, "summary={summary}"); + assert_eq!(summary.query_inserted, 1, "summary={summary}"); + assert_eq!(summary.deduped, 1, "summary={summary}"); + + let conn = Connection::open(&target).unwrap(); + let n: i64 = conn + .query_row("SELECT COUNT(*) FROM query_measurements", [], |r| r.get(0)) + .unwrap(); + assert_eq!(n, 1); +} + +#[test] +fn dedup_with_conflicting_value_ns_is_counted() { + // Same dim columns, different `value`s. Dedup keeps the first + // and bumps `deduped_with_conflict` because the dropped row's + // value_ns differed from the kept row's. This is the signal we + // care about when watching for silent value-corruption across + // duplicated v2 emissions. + const DATA: &str = r#"{"name":"clickbench_q07/datafusion:parquet","commit_id":"deadbeef","unit":"ns","value":111} +{"name":"clickbench_q07/datafusion:parquet","commit_id":"deadbeef","unit":"ns","value":222} +"#; + + let src_dir = build_fixture(COMMITS_JSONL, DATA, &[]); + let target_dir = TempDir::new().unwrap(); + let target = target_dir.path().join("v3.duckdb"); + + let summary = migrate::run(&Source::Local(src_dir.path().into()), &target).unwrap(); + + assert_eq!(summary.deduped, 1, "summary={summary}"); + assert_eq!(summary.deduped_with_conflict, 1, "summary={summary}"); +} + +#[test] +fn dedup_with_matching_value_ns_does_not_count_conflict() { + // Same dim columns AND identical `value`s. Dedup still drops the + // duplicate, but `deduped_with_conflict` stays 0. + const DATA: &str = r#"{"name":"clickbench_q07/datafusion:parquet","commit_id":"deadbeef","unit":"ns","value":111} +{"name":"clickbench_q07/datafusion:parquet","commit_id":"deadbeef","unit":"ns","value":111} +"#; + + let src_dir = build_fixture(COMMITS_JSONL, DATA, &[]); + let target_dir = TempDir::new().unwrap(); + let target = target_dir.path().join("v3.duckdb"); + + let summary = migrate::run(&Source::Local(src_dir.path().into()), &target).unwrap(); + + assert_eq!(summary.deduped, 1, "summary={summary}"); + assert_eq!(summary.deduped_with_conflict, 0, "summary={summary}"); +} + +#[test] +fn compression_size_data_and_file_sizes_merge() { + // A `vortex size/tpch` record from data.json.gz and a + // file-sizes-tpch-nvme.json.gz row covering the same (commit, + // dataset, format, SF) tuple should produce the *same* + // measurement_id so the in-memory accumulator merges them into + // one row instead of two. + // + // Both sources use scale_factor "1.0", which both code paths + // filter out → dataset_variant: None on both sides → matching mid. + const DATA: &str = r#"{"name":"vortex size/tpch","commit_id":"deadbeef","unit":"bytes","value":200,"dataset":{"tpch":{"scale_factor":"1.0"}}} +"#; + const FILE_SIZES: &str = r#"{"commit_id":"deadbeef","benchmark":"tpch","scale_factor":"1.0","format":"vortex-file-compressed","file":"part-0.vortex","size_bytes":100} +"#; + + let src_dir = build_fixture( + COMMITS_JSONL, + DATA, + &[("file-sizes-tpch-nvme.json.gz", FILE_SIZES)], + ); + let target_dir = TempDir::new().unwrap(); + let target = target_dir.path().join("v3.duckdb"); + + let summary = migrate::run(&Source::Local(src_dir.path().into()), &target).unwrap(); + + assert_eq!(summary.compression_size_inserted, 1, "summary={summary}"); + + let conn = Connection::open(&target).unwrap(); + let (n, value_bytes): (i64, i64) = conn + .query_row( + "SELECT COUNT(*), SUM(value_bytes) FROM compression_sizes", + [], + |r| Ok((r.get(0)?, r.get(1)?)), + ) + .unwrap(); + assert_eq!(n, 1); + // data.json.gz seeds value_bytes=200, file-sizes adds 100. + assert_eq!(value_bytes, 300); +} + +#[test] +fn file_sizes_sum_into_one_row() { + // Two file-sizes rows sharing (commit, benchmark, format, + // scale_factor) and value_bytes 100 + 200 must collapse to a + // single compression_sizes row with 300. + const FILE_SIZES: &str = r#"{"commit_id":"deadbeef","benchmark":"clickbench","scale_factor":"1.0","format":"vortex-file-compressed","file":"part-0.vortex","size_bytes":100} +{"commit_id":"deadbeef","benchmark":"clickbench","scale_factor":"1.0","format":"vortex-file-compressed","file":"part-1.vortex","size_bytes":200} +"#; + + let src_dir = build_fixture( + COMMITS_JSONL, + "", + &[("file-sizes-clickbench.json.gz", FILE_SIZES)], + ); + let target_dir = TempDir::new().unwrap(); + let target = target_dir.path().join("v3.duckdb"); + + let summary = migrate::run(&Source::Local(src_dir.path().into()), &target).unwrap(); + + assert_eq!(summary.file_size_inserted, 2, "summary={summary}"); + assert_eq!(summary.compression_size_inserted, 1, "summary={summary}"); + + let conn = Connection::open(&target).unwrap(); + let n: i64 = conn + .query_row("SELECT COUNT(*) FROM compression_sizes", [], |r| r.get(0)) + .unwrap(); + assert_eq!(n, 1); + let value_bytes: i64 = conn + .query_row("SELECT value_bytes FROM compression_sizes", [], |r| { + r.get(0) + }) + .unwrap(); + assert_eq!(value_bytes, 300); +} diff --git a/benchmarks-website/package-lock.json b/benchmarks-website/package-lock.json deleted file mode 100644 index d140b73d225..00000000000 --- a/benchmarks-website/package-lock.json +++ /dev/null @@ -1,2298 +0,0 @@ -{ - "name": "vortex-benchmarks-website", - "version": "2.0.0", - "lockfileVersion": 3, - "requires": true, - "packages": { - "": { - "name": "vortex-benchmarks-website", - "version": "2.0.0", - "dependencies": { - "chart.js": "^4.4.4", - "chartjs-plugin-zoom": "^2.0.1", - "downsample": "^1.4.0", - "hammerjs": "^2.0.8", - "lucide-react": "^0.577.0", - "react": "^18.3.1", - "react-chartjs-2": "^5.2.0", - "react-dom": "^18.3.1" - }, - "devDependencies": { - "@types/react": "^18.3.3", - "@types/react-dom": "^18.3.0", - "@vitejs/plugin-react": "^4.3.1", - "concurrently": "^8.2.2", - "vite": "^6.0.0" - }, - "engines": { - "node": ">=18.0.0" - } - }, - "node_modules/@babel/code-frame": { - "version": "7.29.0", - "resolved": "https://registry.npmjs.org/@babel/code-frame/-/code-frame-7.29.0.tgz", - "integrity": "sha512-9NhCeYjq9+3uxgdtp20LSiJXJvN0FeCtNGpJxuMFZ1Kv3cWUNb6DOhJwUvcVCzKGR66cw4njwM6hrJLqgOwbcw==", - "dev": true, - "license": "MIT", - "dependencies": { - "@babel/helper-validator-identifier": "^7.28.5", - "js-tokens": "^4.0.0", - "picocolors": "^1.1.1" - }, - "engines": { - "node": ">=6.9.0" - } - }, - "node_modules/@babel/compat-data": { - "version": "7.29.0", - "resolved": "https://registry.npmjs.org/@babel/compat-data/-/compat-data-7.29.0.tgz", - "integrity": "sha512-T1NCJqT/j9+cn8fvkt7jtwbLBfLC/1y1c7NtCeXFRgzGTsafi68MRv8yzkYSapBnFA6L3U2VSc02ciDzoAJhJg==", - "dev": true, - "license": "MIT", - "engines": { - "node": ">=6.9.0" - } - }, - "node_modules/@babel/core": { - "version": "7.29.0", - "resolved": "https://registry.npmjs.org/@babel/core/-/core-7.29.0.tgz", - "integrity": "sha512-CGOfOJqWjg2qW/Mb6zNsDm+u5vFQ8DxXfbM09z69p5Z6+mE1ikP2jUXw+j42Pf1XTYED2Rni5f95npYeuwMDQA==", - "dev": true, - "license": "MIT", - "dependencies": { - "@babel/code-frame": "^7.29.0", - "@babel/generator": "^7.29.0", - "@babel/helper-compilation-targets": "^7.28.6", - "@babel/helper-module-transforms": "^7.28.6", - "@babel/helpers": "^7.28.6", - "@babel/parser": "^7.29.0", - "@babel/template": "^7.28.6", - "@babel/traverse": "^7.29.0", - "@babel/types": "^7.29.0", - "@jridgewell/remapping": "^2.3.5", - "convert-source-map": "^2.0.0", - "debug": "^4.1.0", - "gensync": "^1.0.0-beta.2", - "json5": "^2.2.3", - "semver": "^6.3.1" - }, - "engines": { - "node": ">=6.9.0" - }, - "funding": { - "type": "opencollective", - "url": "https://opencollective.com/babel" - } - }, - "node_modules/@babel/generator": { - "version": "7.29.1", - "resolved": "https://registry.npmjs.org/@babel/generator/-/generator-7.29.1.tgz", - "integrity": "sha512-qsaF+9Qcm2Qv8SRIMMscAvG4O3lJ0F1GuMo5HR/Bp02LopNgnZBC/EkbevHFeGs4ls/oPz9v+Bsmzbkbe+0dUw==", - "dev": true, - "license": "MIT", - "dependencies": { - "@babel/parser": "^7.29.0", - "@babel/types": "^7.29.0", - "@jridgewell/gen-mapping": "^0.3.12", - "@jridgewell/trace-mapping": "^0.3.28", - "jsesc": "^3.0.2" - }, - "engines": { - "node": ">=6.9.0" - } - }, - "node_modules/@babel/helper-compilation-targets": { - "version": "7.28.6", - "resolved": "https://registry.npmjs.org/@babel/helper-compilation-targets/-/helper-compilation-targets-7.28.6.tgz", - "integrity": "sha512-JYtls3hqi15fcx5GaSNL7SCTJ2MNmjrkHXg4FSpOA/grxK8KwyZ5bubHsCq8FXCkua6xhuaaBit+3b7+VZRfcA==", - "dev": true, - "license": "MIT", - "dependencies": { - "@babel/compat-data": "^7.28.6", - "@babel/helper-validator-option": "^7.27.1", - "browserslist": "^4.24.0", - "lru-cache": "^5.1.1", - "semver": "^6.3.1" - }, - "engines": { - "node": ">=6.9.0" - } - }, - "node_modules/@babel/helper-globals": { - "version": "7.28.0", - "resolved": "https://registry.npmjs.org/@babel/helper-globals/-/helper-globals-7.28.0.tgz", - "integrity": "sha512-+W6cISkXFa1jXsDEdYA8HeevQT/FULhxzR99pxphltZcVaugps53THCeiWA8SguxxpSp3gKPiuYfSWopkLQ4hw==", - "dev": true, - "license": "MIT", - "engines": { - "node": ">=6.9.0" - } - }, - "node_modules/@babel/helper-module-imports": { - "version": "7.28.6", - "resolved": "https://registry.npmjs.org/@babel/helper-module-imports/-/helper-module-imports-7.28.6.tgz", - "integrity": "sha512-l5XkZK7r7wa9LucGw9LwZyyCUscb4x37JWTPz7swwFE/0FMQAGpiWUZn8u9DzkSBWEcK25jmvubfpw2dnAMdbw==", - "dev": true, - "license": "MIT", - "dependencies": { - "@babel/traverse": "^7.28.6", - "@babel/types": "^7.28.6" - }, - "engines": { - "node": ">=6.9.0" - } - }, - "node_modules/@babel/helper-module-transforms": { - "version": "7.28.6", - "resolved": "https://registry.npmjs.org/@babel/helper-module-transforms/-/helper-module-transforms-7.28.6.tgz", - "integrity": "sha512-67oXFAYr2cDLDVGLXTEABjdBJZ6drElUSI7WKp70NrpyISso3plG9SAGEF6y7zbha/wOzUByWWTJvEDVNIUGcA==", - "dev": true, - "license": "MIT", - "dependencies": { - "@babel/helper-module-imports": "^7.28.6", - "@babel/helper-validator-identifier": "^7.28.5", - "@babel/traverse": "^7.28.6" - }, - "engines": { - "node": ">=6.9.0" - }, - "peerDependencies": { - "@babel/core": "^7.0.0" - } - }, - "node_modules/@babel/helper-plugin-utils": { - "version": "7.28.6", - "resolved": "https://registry.npmjs.org/@babel/helper-plugin-utils/-/helper-plugin-utils-7.28.6.tgz", - "integrity": "sha512-S9gzZ/bz83GRysI7gAD4wPT/AI3uCnY+9xn+Mx/KPs2JwHJIz1W8PZkg2cqyt3RNOBM8ejcXhV6y8Og7ly/Dug==", - "dev": true, - "license": "MIT", - "engines": { - "node": ">=6.9.0" - } - }, - "node_modules/@babel/helper-string-parser": { - "version": "7.27.1", - "resolved": "https://registry.npmjs.org/@babel/helper-string-parser/-/helper-string-parser-7.27.1.tgz", - "integrity": "sha512-qMlSxKbpRlAridDExk92nSobyDdpPijUq2DW6oDnUqd0iOGxmQjyqhMIihI9+zv4LPyZdRje2cavWPbCbWm3eA==", - "dev": true, - "license": "MIT", - "engines": { - "node": ">=6.9.0" - } - }, - "node_modules/@babel/helper-validator-identifier": { - "version": "7.28.5", - "resolved": "https://registry.npmjs.org/@babel/helper-validator-identifier/-/helper-validator-identifier-7.28.5.tgz", - "integrity": "sha512-qSs4ifwzKJSV39ucNjsvc6WVHs6b7S03sOh2OcHF9UHfVPqWWALUsNUVzhSBiItjRZoLHx7nIarVjqKVusUZ1Q==", - "dev": true, - "license": "MIT", - "engines": { - "node": ">=6.9.0" - } - }, - "node_modules/@babel/helper-validator-option": { - "version": "7.27.1", - "resolved": "https://registry.npmjs.org/@babel/helper-validator-option/-/helper-validator-option-7.27.1.tgz", - "integrity": "sha512-YvjJow9FxbhFFKDSuFnVCe2WxXk1zWc22fFePVNEaWJEu8IrZVlda6N0uHwzZrUM1il7NC9Mlp4MaJYbYd9JSg==", - "dev": true, - "license": "MIT", - "engines": { - "node": ">=6.9.0" - } - }, - "node_modules/@babel/helpers": { - "version": "7.29.2", - "resolved": "https://registry.npmjs.org/@babel/helpers/-/helpers-7.29.2.tgz", - "integrity": "sha512-HoGuUs4sCZNezVEKdVcwqmZN8GoHirLUcLaYVNBK2J0DadGtdcqgr3BCbvH8+XUo4NGjNl3VOtSjEKNzqfFgKw==", - "dev": true, - "license": "MIT", - "dependencies": { - "@babel/template": "^7.28.6", - "@babel/types": "^7.29.0" - }, - "engines": { - "node": ">=6.9.0" - } - }, - "node_modules/@babel/parser": { - "version": "7.29.2", - "resolved": "https://registry.npmjs.org/@babel/parser/-/parser-7.29.2.tgz", - "integrity": "sha512-4GgRzy/+fsBa72/RZVJmGKPmZu9Byn8o4MoLpmNe1m8ZfYnz5emHLQz3U4gLud6Zwl0RZIcgiLD7Uq7ySFuDLA==", - "dev": true, - "license": "MIT", - "dependencies": { - "@babel/types": "^7.29.0" - }, - "bin": { - "parser": "bin/babel-parser.js" - }, - "engines": { - "node": ">=6.0.0" - } - }, - "node_modules/@babel/plugin-transform-react-jsx-self": { - "version": "7.27.1", - "resolved": "https://registry.npmjs.org/@babel/plugin-transform-react-jsx-self/-/plugin-transform-react-jsx-self-7.27.1.tgz", - "integrity": "sha512-6UzkCs+ejGdZ5mFFC/OCUrv028ab2fp1znZmCZjAOBKiBK2jXD1O+BPSfX8X2qjJ75fZBMSnQn3Rq2mrBJK2mw==", - "dev": true, - "license": "MIT", - "dependencies": { - "@babel/helper-plugin-utils": "^7.27.1" - }, - "engines": { - "node": ">=6.9.0" - }, - "peerDependencies": { - "@babel/core": "^7.0.0-0" - } - }, - "node_modules/@babel/plugin-transform-react-jsx-source": { - "version": "7.27.1", - "resolved": "https://registry.npmjs.org/@babel/plugin-transform-react-jsx-source/-/plugin-transform-react-jsx-source-7.27.1.tgz", - "integrity": "sha512-zbwoTsBruTeKB9hSq73ha66iFeJHuaFkUbwvqElnygoNbj/jHRsSeokowZFN3CZ64IvEqcmmkVe89OPXc7ldAw==", - "dev": true, - "license": "MIT", - "dependencies": { - "@babel/helper-plugin-utils": "^7.27.1" - }, - "engines": { - "node": ">=6.9.0" - }, - "peerDependencies": { - "@babel/core": "^7.0.0-0" - } - }, - "node_modules/@babel/runtime": { - "version": "7.29.2", - "resolved": "https://registry.npmjs.org/@babel/runtime/-/runtime-7.29.2.tgz", - "integrity": "sha512-JiDShH45zKHWyGe4ZNVRrCjBz8Nh9TMmZG1kh4QTK8hCBTWBi8Da+i7s1fJw7/lYpM4ccepSNfqzZ/QvABBi5g==", - "dev": true, - "license": "MIT", - "engines": { - "node": ">=6.9.0" - } - }, - "node_modules/@babel/template": { - "version": "7.28.6", - "resolved": "https://registry.npmjs.org/@babel/template/-/template-7.28.6.tgz", - "integrity": "sha512-YA6Ma2KsCdGb+WC6UpBVFJGXL58MDA6oyONbjyF/+5sBgxY/dwkhLogbMT2GXXyU84/IhRw/2D1Os1B/giz+BQ==", - "dev": true, - "license": "MIT", - "dependencies": { - "@babel/code-frame": "^7.28.6", - "@babel/parser": "^7.28.6", - "@babel/types": "^7.28.6" - }, - "engines": { - "node": ">=6.9.0" - } - }, - "node_modules/@babel/traverse": { - "version": "7.29.0", - "resolved": "https://registry.npmjs.org/@babel/traverse/-/traverse-7.29.0.tgz", - "integrity": "sha512-4HPiQr0X7+waHfyXPZpWPfWL/J7dcN1mx9gL6WdQVMbPnF3+ZhSMs8tCxN7oHddJE9fhNE7+lxdnlyemKfJRuA==", - "dev": true, - "license": "MIT", - "dependencies": { - "@babel/code-frame": "^7.29.0", - "@babel/generator": "^7.29.0", - "@babel/helper-globals": "^7.28.0", - "@babel/parser": "^7.29.0", - "@babel/template": "^7.28.6", - "@babel/types": "^7.29.0", - "debug": "^4.3.1" - }, - "engines": { - "node": ">=6.9.0" - } - }, - "node_modules/@babel/types": { - "version": "7.29.0", - "resolved": "https://registry.npmjs.org/@babel/types/-/types-7.29.0.tgz", - "integrity": "sha512-LwdZHpScM4Qz8Xw2iKSzS+cfglZzJGvofQICy7W7v4caru4EaAmyUuO6BGrbyQ2mYV11W0U8j5mBhd14dd3B0A==", - "dev": true, - "license": "MIT", - "dependencies": { - "@babel/helper-string-parser": "^7.27.1", - "@babel/helper-validator-identifier": "^7.28.5" - }, - "engines": { - "node": ">=6.9.0" - } - }, - "node_modules/@esbuild/aix-ppc64": { - "version": "0.25.12", - "resolved": "https://registry.npmjs.org/@esbuild/aix-ppc64/-/aix-ppc64-0.25.12.tgz", - "integrity": "sha512-Hhmwd6CInZ3dwpuGTF8fJG6yoWmsToE+vYgD4nytZVxcu1ulHpUQRAB1UJ8+N1Am3Mz4+xOByoQoSZf4D+CpkA==", - "cpu": [ - "ppc64" - ], - "dev": true, - "license": "MIT", - "optional": true, - "os": [ - "aix" - ], - "engines": { - "node": ">=18" - } - }, - "node_modules/@esbuild/android-arm": { - "version": "0.25.12", - "resolved": "https://registry.npmjs.org/@esbuild/android-arm/-/android-arm-0.25.12.tgz", - "integrity": "sha512-VJ+sKvNA/GE7Ccacc9Cha7bpS8nyzVv0jdVgwNDaR4gDMC/2TTRc33Ip8qrNYUcpkOHUT5OZ0bUcNNVZQ9RLlg==", - "cpu": [ - "arm" - ], - "dev": true, - "license": "MIT", - "optional": true, - "os": [ - "android" - ], - "engines": { - "node": ">=18" - } - }, - "node_modules/@esbuild/android-arm64": { - "version": "0.25.12", - "resolved": "https://registry.npmjs.org/@esbuild/android-arm64/-/android-arm64-0.25.12.tgz", - "integrity": "sha512-6AAmLG7zwD1Z159jCKPvAxZd4y/VTO0VkprYy+3N2FtJ8+BQWFXU+OxARIwA46c5tdD9SsKGZ/1ocqBS/gAKHg==", - "cpu": [ - "arm64" - ], - "dev": true, - "license": "MIT", - "optional": true, - "os": [ - "android" - ], - "engines": { - "node": ">=18" - } - }, - "node_modules/@esbuild/android-x64": { - "version": "0.25.12", - "resolved": "https://registry.npmjs.org/@esbuild/android-x64/-/android-x64-0.25.12.tgz", - "integrity": "sha512-5jbb+2hhDHx5phYR2By8GTWEzn6I9UqR11Kwf22iKbNpYrsmRB18aX/9ivc5cabcUiAT/wM+YIZ6SG9QO6a8kg==", - "cpu": [ - "x64" - ], - "dev": true, - "license": "MIT", - "optional": true, - "os": [ - "android" - ], - "engines": { - "node": ">=18" - } - }, - "node_modules/@esbuild/darwin-arm64": { - "version": "0.25.12", - "resolved": "https://registry.npmjs.org/@esbuild/darwin-arm64/-/darwin-arm64-0.25.12.tgz", - "integrity": "sha512-N3zl+lxHCifgIlcMUP5016ESkeQjLj/959RxxNYIthIg+CQHInujFuXeWbWMgnTo4cp5XVHqFPmpyu9J65C1Yg==", - "cpu": [ - "arm64" - ], - "dev": true, - "license": "MIT", - "optional": true, - "os": [ - "darwin" - ], - "engines": { - "node": ">=18" - } - }, - "node_modules/@esbuild/darwin-x64": { - "version": "0.25.12", - "resolved": "https://registry.npmjs.org/@esbuild/darwin-x64/-/darwin-x64-0.25.12.tgz", - "integrity": "sha512-HQ9ka4Kx21qHXwtlTUVbKJOAnmG1ipXhdWTmNXiPzPfWKpXqASVcWdnf2bnL73wgjNrFXAa3yYvBSd9pzfEIpA==", - "cpu": [ - "x64" - ], - "dev": true, - "license": "MIT", - "optional": true, - "os": [ - "darwin" - ], - "engines": { - "node": ">=18" - } - }, - "node_modules/@esbuild/freebsd-arm64": { - "version": "0.25.12", - "resolved": "https://registry.npmjs.org/@esbuild/freebsd-arm64/-/freebsd-arm64-0.25.12.tgz", - "integrity": "sha512-gA0Bx759+7Jve03K1S0vkOu5Lg/85dou3EseOGUes8flVOGxbhDDh/iZaoek11Y8mtyKPGF3vP8XhnkDEAmzeg==", - "cpu": [ - "arm64" - ], - "dev": true, - "license": "MIT", - "optional": true, - "os": [ - "freebsd" - ], - "engines": { - "node": ">=18" - } - }, - "node_modules/@esbuild/freebsd-x64": { - "version": "0.25.12", - "resolved": "https://registry.npmjs.org/@esbuild/freebsd-x64/-/freebsd-x64-0.25.12.tgz", - "integrity": "sha512-TGbO26Yw2xsHzxtbVFGEXBFH0FRAP7gtcPE7P5yP7wGy7cXK2oO7RyOhL5NLiqTlBh47XhmIUXuGciXEqYFfBQ==", - "cpu": [ - "x64" - ], - "dev": true, - "license": "MIT", - "optional": true, - "os": [ - "freebsd" - ], - "engines": { - "node": ">=18" - } - }, - "node_modules/@esbuild/linux-arm": { - "version": "0.25.12", - "resolved": "https://registry.npmjs.org/@esbuild/linux-arm/-/linux-arm-0.25.12.tgz", - "integrity": "sha512-lPDGyC1JPDou8kGcywY0YILzWlhhnRjdof3UlcoqYmS9El818LLfJJc3PXXgZHrHCAKs/Z2SeZtDJr5MrkxtOw==", - "cpu": [ - "arm" - ], - "dev": true, - "license": "MIT", - "optional": true, - "os": [ - "linux" - ], - "engines": { - "node": ">=18" - } - }, - "node_modules/@esbuild/linux-arm64": { - "version": "0.25.12", - "resolved": "https://registry.npmjs.org/@esbuild/linux-arm64/-/linux-arm64-0.25.12.tgz", - "integrity": "sha512-8bwX7a8FghIgrupcxb4aUmYDLp8pX06rGh5HqDT7bB+8Rdells6mHvrFHHW2JAOPZUbnjUpKTLg6ECyzvas2AQ==", - "cpu": [ - "arm64" - ], - "dev": true, - "license": "MIT", - "optional": true, - "os": [ - "linux" - ], - "engines": { - "node": ">=18" - } - }, - "node_modules/@esbuild/linux-ia32": { - "version": "0.25.12", - "resolved": "https://registry.npmjs.org/@esbuild/linux-ia32/-/linux-ia32-0.25.12.tgz", - "integrity": "sha512-0y9KrdVnbMM2/vG8KfU0byhUN+EFCny9+8g202gYqSSVMonbsCfLjUO+rCci7pM0WBEtz+oK/PIwHkzxkyharA==", - "cpu": [ - "ia32" - ], - "dev": true, - "license": "MIT", - "optional": true, - "os": [ - "linux" - ], - "engines": { - "node": ">=18" - } - }, - "node_modules/@esbuild/linux-loong64": { - "version": "0.25.12", - "resolved": "https://registry.npmjs.org/@esbuild/linux-loong64/-/linux-loong64-0.25.12.tgz", - "integrity": "sha512-h///Lr5a9rib/v1GGqXVGzjL4TMvVTv+s1DPoxQdz7l/AYv6LDSxdIwzxkrPW438oUXiDtwM10o9PmwS/6Z0Ng==", - "cpu": [ - "loong64" - ], - "dev": true, - "license": "MIT", - "optional": true, - "os": [ - "linux" - ], - "engines": { - "node": ">=18" - } - }, - "node_modules/@esbuild/linux-mips64el": { - "version": "0.25.12", - "resolved": "https://registry.npmjs.org/@esbuild/linux-mips64el/-/linux-mips64el-0.25.12.tgz", - "integrity": "sha512-iyRrM1Pzy9GFMDLsXn1iHUm18nhKnNMWscjmp4+hpafcZjrr2WbT//d20xaGljXDBYHqRcl8HnxbX6uaA/eGVw==", - "cpu": [ - "mips64el" - ], - "dev": true, - "license": "MIT", - "optional": true, - "os": [ - "linux" - ], - "engines": { - "node": ">=18" - } - }, - "node_modules/@esbuild/linux-ppc64": { - "version": "0.25.12", - "resolved": "https://registry.npmjs.org/@esbuild/linux-ppc64/-/linux-ppc64-0.25.12.tgz", - "integrity": "sha512-9meM/lRXxMi5PSUqEXRCtVjEZBGwB7P/D4yT8UG/mwIdze2aV4Vo6U5gD3+RsoHXKkHCfSxZKzmDssVlRj1QQA==", - "cpu": [ - "ppc64" - ], - "dev": true, - "license": "MIT", - "optional": true, - "os": [ - "linux" - ], - "engines": { - "node": ">=18" - } - }, - "node_modules/@esbuild/linux-riscv64": { - "version": "0.25.12", - "resolved": "https://registry.npmjs.org/@esbuild/linux-riscv64/-/linux-riscv64-0.25.12.tgz", - "integrity": "sha512-Zr7KR4hgKUpWAwb1f3o5ygT04MzqVrGEGXGLnj15YQDJErYu/BGg+wmFlIDOdJp0PmB0lLvxFIOXZgFRrdjR0w==", - "cpu": [ - "riscv64" - ], - "dev": true, - "license": "MIT", - "optional": true, - "os": [ - "linux" - ], - "engines": { - "node": ">=18" - } - }, - "node_modules/@esbuild/linux-s390x": { - "version": "0.25.12", - "resolved": "https://registry.npmjs.org/@esbuild/linux-s390x/-/linux-s390x-0.25.12.tgz", - "integrity": "sha512-MsKncOcgTNvdtiISc/jZs/Zf8d0cl/t3gYWX8J9ubBnVOwlk65UIEEvgBORTiljloIWnBzLs4qhzPkJcitIzIg==", - "cpu": [ - "s390x" - ], - "dev": true, - "license": "MIT", - "optional": true, - "os": [ - "linux" - ], - "engines": { - "node": ">=18" - } - }, - "node_modules/@esbuild/linux-x64": { - "version": "0.25.12", - "resolved": "https://registry.npmjs.org/@esbuild/linux-x64/-/linux-x64-0.25.12.tgz", - "integrity": "sha512-uqZMTLr/zR/ed4jIGnwSLkaHmPjOjJvnm6TVVitAa08SLS9Z0VM8wIRx7gWbJB5/J54YuIMInDquWyYvQLZkgw==", - "cpu": [ - "x64" - ], - "dev": true, - "license": "MIT", - "optional": true, - "os": [ - "linux" - ], - "engines": { - "node": ">=18" - } - }, - "node_modules/@esbuild/netbsd-arm64": { - "version": "0.25.12", - "resolved": "https://registry.npmjs.org/@esbuild/netbsd-arm64/-/netbsd-arm64-0.25.12.tgz", - "integrity": "sha512-xXwcTq4GhRM7J9A8Gv5boanHhRa/Q9KLVmcyXHCTaM4wKfIpWkdXiMog/KsnxzJ0A1+nD+zoecuzqPmCRyBGjg==", - "cpu": [ - "arm64" - ], - "dev": true, - "license": "MIT", - "optional": true, - "os": [ - "netbsd" - ], - "engines": { - "node": ">=18" - } - }, - "node_modules/@esbuild/netbsd-x64": { - "version": "0.25.12", - "resolved": "https://registry.npmjs.org/@esbuild/netbsd-x64/-/netbsd-x64-0.25.12.tgz", - "integrity": "sha512-Ld5pTlzPy3YwGec4OuHh1aCVCRvOXdH8DgRjfDy/oumVovmuSzWfnSJg+VtakB9Cm0gxNO9BzWkj6mtO1FMXkQ==", - "cpu": [ - "x64" - ], - "dev": true, - "license": "MIT", - "optional": true, - "os": [ - "netbsd" - ], - "engines": { - "node": ">=18" - } - }, - "node_modules/@esbuild/openbsd-arm64": { - "version": "0.25.12", - "resolved": "https://registry.npmjs.org/@esbuild/openbsd-arm64/-/openbsd-arm64-0.25.12.tgz", - "integrity": "sha512-fF96T6KsBo/pkQI950FARU9apGNTSlZGsv1jZBAlcLL1MLjLNIWPBkj5NlSz8aAzYKg+eNqknrUJ24QBybeR5A==", - "cpu": [ - "arm64" - ], - "dev": true, - "license": "MIT", - "optional": true, - "os": [ - "openbsd" - ], - "engines": { - "node": ">=18" - } - }, - "node_modules/@esbuild/openbsd-x64": { - "version": "0.25.12", - "resolved": "https://registry.npmjs.org/@esbuild/openbsd-x64/-/openbsd-x64-0.25.12.tgz", - "integrity": "sha512-MZyXUkZHjQxUvzK7rN8DJ3SRmrVrke8ZyRusHlP+kuwqTcfWLyqMOE3sScPPyeIXN/mDJIfGXvcMqCgYKekoQw==", - "cpu": [ - "x64" - ], - "dev": true, - "license": "MIT", - "optional": true, - "os": [ - "openbsd" - ], - "engines": { - "node": ">=18" - } - }, - "node_modules/@esbuild/openharmony-arm64": { - "version": "0.25.12", - "resolved": "https://registry.npmjs.org/@esbuild/openharmony-arm64/-/openharmony-arm64-0.25.12.tgz", - "integrity": "sha512-rm0YWsqUSRrjncSXGA7Zv78Nbnw4XL6/dzr20cyrQf7ZmRcsovpcRBdhD43Nuk3y7XIoW2OxMVvwuRvk9XdASg==", - "cpu": [ - "arm64" - ], - "dev": true, - "license": "MIT", - "optional": true, - "os": [ - "openharmony" - ], - "engines": { - "node": ">=18" - } - }, - "node_modules/@esbuild/sunos-x64": { - "version": "0.25.12", - "resolved": "https://registry.npmjs.org/@esbuild/sunos-x64/-/sunos-x64-0.25.12.tgz", - "integrity": "sha512-3wGSCDyuTHQUzt0nV7bocDy72r2lI33QL3gkDNGkod22EsYl04sMf0qLb8luNKTOmgF/eDEDP5BFNwoBKH441w==", - "cpu": [ - "x64" - ], - "dev": true, - "license": "MIT", - "optional": true, - "os": [ - "sunos" - ], - "engines": { - "node": ">=18" - } - }, - "node_modules/@esbuild/win32-arm64": { - "version": "0.25.12", - "resolved": "https://registry.npmjs.org/@esbuild/win32-arm64/-/win32-arm64-0.25.12.tgz", - "integrity": "sha512-rMmLrur64A7+DKlnSuwqUdRKyd3UE7oPJZmnljqEptesKM8wx9J8gx5u0+9Pq0fQQW8vqeKebwNXdfOyP+8Bsg==", - "cpu": [ - "arm64" - ], - "dev": true, - "license": "MIT", - "optional": true, - "os": [ - "win32" - ], - "engines": { - "node": ">=18" - } - }, - "node_modules/@esbuild/win32-ia32": { - "version": "0.25.12", - "resolved": "https://registry.npmjs.org/@esbuild/win32-ia32/-/win32-ia32-0.25.12.tgz", - "integrity": "sha512-HkqnmmBoCbCwxUKKNPBixiWDGCpQGVsrQfJoVGYLPT41XWF8lHuE5N6WhVia2n4o5QK5M4tYr21827fNhi4byQ==", - "cpu": [ - "ia32" - ], - "dev": true, - "license": "MIT", - "optional": true, - "os": [ - "win32" - ], - "engines": { - "node": ">=18" - } - }, - "node_modules/@esbuild/win32-x64": { - "version": "0.25.12", - "resolved": "https://registry.npmjs.org/@esbuild/win32-x64/-/win32-x64-0.25.12.tgz", - "integrity": "sha512-alJC0uCZpTFrSL0CCDjcgleBXPnCrEAhTBILpeAp7M/OFgoqtAetfBzX0xM00MUsVVPpVjlPuMbREqnZCXaTnA==", - "cpu": [ - "x64" - ], - "dev": true, - "license": "MIT", - "optional": true, - "os": [ - "win32" - ], - "engines": { - "node": ">=18" - } - }, - "node_modules/@jridgewell/gen-mapping": { - "version": "0.3.13", - "resolved": "https://registry.npmjs.org/@jridgewell/gen-mapping/-/gen-mapping-0.3.13.tgz", - "integrity": "sha512-2kkt/7niJ6MgEPxF0bYdQ6etZaA+fQvDcLKckhy1yIQOzaoKjBBjSj63/aLVjYE3qhRt5dvM+uUyfCg6UKCBbA==", - "dev": true, - "license": "MIT", - "dependencies": { - "@jridgewell/sourcemap-codec": "^1.5.0", - "@jridgewell/trace-mapping": "^0.3.24" - } - }, - "node_modules/@jridgewell/remapping": { - "version": "2.3.5", - "resolved": "https://registry.npmjs.org/@jridgewell/remapping/-/remapping-2.3.5.tgz", - "integrity": "sha512-LI9u/+laYG4Ds1TDKSJW2YPrIlcVYOwi2fUC6xB43lueCjgxV4lffOCZCtYFiH6TNOX+tQKXx97T4IKHbhyHEQ==", - "dev": true, - "license": "MIT", - "dependencies": { - "@jridgewell/gen-mapping": "^0.3.5", - "@jridgewell/trace-mapping": "^0.3.24" - } - }, - "node_modules/@jridgewell/resolve-uri": { - "version": "3.1.2", - "resolved": "https://registry.npmjs.org/@jridgewell/resolve-uri/-/resolve-uri-3.1.2.tgz", - "integrity": "sha512-bRISgCIjP20/tbWSPWMEi54QVPRZExkuD9lJL+UIxUKtwVJA8wW1Trb1jMs1RFXo1CBTNZ/5hpC9QvmKWdopKw==", - "dev": true, - "license": "MIT", - "engines": { - "node": ">=6.0.0" - } - }, - "node_modules/@jridgewell/sourcemap-codec": { - "version": "1.5.5", - "resolved": "https://registry.npmjs.org/@jridgewell/sourcemap-codec/-/sourcemap-codec-1.5.5.tgz", - "integrity": "sha512-cYQ9310grqxueWbl+WuIUIaiUaDcj7WOq5fVhEljNVgRfOUhY9fy2zTvfoqWsnebh8Sl70VScFbICvJnLKB0Og==", - "dev": true, - "license": "MIT" - }, - "node_modules/@jridgewell/trace-mapping": { - "version": "0.3.31", - "resolved": "https://registry.npmjs.org/@jridgewell/trace-mapping/-/trace-mapping-0.3.31.tgz", - "integrity": "sha512-zzNR+SdQSDJzc8joaeP8QQoCQr8NuYx2dIIytl1QeBEZHJ9uW6hebsrYgbz8hJwUQao3TWCMtmfV8Nu1twOLAw==", - "dev": true, - "license": "MIT", - "dependencies": { - "@jridgewell/resolve-uri": "^3.1.0", - "@jridgewell/sourcemap-codec": "^1.4.14" - } - }, - "node_modules/@kurkle/color": { - "version": "0.3.4", - "resolved": "https://registry.npmjs.org/@kurkle/color/-/color-0.3.4.tgz", - "integrity": "sha512-M5UknZPHRu3DEDWoipU6sE8PdkZ6Z/S+v4dD+Ke8IaNlpdSQah50lz1KtcFBa2vsdOnwbbnxJwVM4wty6udA5w==", - "license": "MIT" - }, - "node_modules/@rolldown/pluginutils": { - "version": "1.0.0-beta.27", - "resolved": "https://registry.npmjs.org/@rolldown/pluginutils/-/pluginutils-1.0.0-beta.27.tgz", - "integrity": "sha512-+d0F4MKMCbeVUJwG96uQ4SgAznZNSq93I3V+9NHA4OpvqG8mRCpGdKmK8l/dl02h2CCDHwW2FqilnTyDcAnqjA==", - "dev": true, - "license": "MIT" - }, - "node_modules/@rollup/rollup-android-arm-eabi": { - "version": "4.60.2", - "resolved": "https://registry.npmjs.org/@rollup/rollup-android-arm-eabi/-/rollup-android-arm-eabi-4.60.2.tgz", - "integrity": "sha512-dnlp69efPPg6Uaw2dVqzWRfAWRnYVb1XJ8CyyhIbZeaq4CA5/mLeZ1IEt9QqQxmbdvagjLIm2ZL8BxXv5lH4Yw==", - "cpu": [ - "arm" - ], - "dev": true, - "license": "MIT", - "optional": true, - "os": [ - "android" - ] - }, - "node_modules/@rollup/rollup-android-arm64": { - "version": "4.60.2", - "resolved": "https://registry.npmjs.org/@rollup/rollup-android-arm64/-/rollup-android-arm64-4.60.2.tgz", - "integrity": "sha512-OqZTwDRDchGRHHm/hwLOL7uVPB9aUvI0am/eQuWMNyFHf5PSEQmyEeYYheA0EPPKUO/l0uigCp+iaTjoLjVoHg==", - "cpu": [ - "arm64" - ], - "dev": true, - "license": "MIT", - "optional": true, - "os": [ - "android" - ] - }, - "node_modules/@rollup/rollup-darwin-arm64": { - "version": "4.60.2", - "resolved": "https://registry.npmjs.org/@rollup/rollup-darwin-arm64/-/rollup-darwin-arm64-4.60.2.tgz", - "integrity": "sha512-UwRE7CGpvSVEQS8gUMBe1uADWjNnVgP3Iusyda1nSRwNDCsRjnGc7w6El6WLQsXmZTbLZx9cecegumcitNfpmA==", - "cpu": [ - "arm64" - ], - "dev": true, - "license": "MIT", - "optional": true, - "os": [ - "darwin" - ] - }, - "node_modules/@rollup/rollup-darwin-x64": { - "version": "4.60.2", - "resolved": "https://registry.npmjs.org/@rollup/rollup-darwin-x64/-/rollup-darwin-x64-4.60.2.tgz", - "integrity": "sha512-gjEtURKLCC5VXm1I+2i1u9OhxFsKAQJKTVB8WvDAHF+oZlq0GTVFOlTlO1q3AlCTE/DF32c16ESvfgqR7343/g==", - "cpu": [ - "x64" - ], - "dev": true, - "license": "MIT", - "optional": true, - "os": [ - "darwin" - ] - }, - "node_modules/@rollup/rollup-freebsd-arm64": { - "version": "4.60.2", - "resolved": "https://registry.npmjs.org/@rollup/rollup-freebsd-arm64/-/rollup-freebsd-arm64-4.60.2.tgz", - "integrity": "sha512-Bcl6CYDeAgE70cqZaMojOi/eK63h5Me97ZqAQoh77VPjMysA/4ORQBRGo3rRy45x4MzVlU9uZxs8Uwy7ZaKnBw==", - "cpu": [ - "arm64" - ], - "dev": true, - "license": "MIT", - "optional": true, - "os": [ - "freebsd" - ] - }, - "node_modules/@rollup/rollup-freebsd-x64": { - "version": "4.60.2", - "resolved": "https://registry.npmjs.org/@rollup/rollup-freebsd-x64/-/rollup-freebsd-x64-4.60.2.tgz", - "integrity": "sha512-LU+TPda3mAE2QB0/Hp5VyeKJivpC6+tlOXd1VMoXV/YFMvk/MNk5iXeBfB4MQGRWyOYVJ01625vjkr0Az98OJQ==", - "cpu": [ - "x64" - ], - "dev": true, - "license": "MIT", - "optional": true, - "os": [ - "freebsd" - ] - }, - "node_modules/@rollup/rollup-linux-arm-gnueabihf": { - "version": "4.60.2", - "resolved": "https://registry.npmjs.org/@rollup/rollup-linux-arm-gnueabihf/-/rollup-linux-arm-gnueabihf-4.60.2.tgz", - "integrity": "sha512-2QxQrM+KQ7DAW4o22j+XZ6RKdxjLD7BOWTP0Bv0tmjdyhXSsr2Ul1oJDQqh9Zf5qOwTuTc7Ek83mOFaKnodPjg==", - "cpu": [ - "arm" - ], - "dev": true, - "libc": [ - "glibc" - ], - "license": "MIT", - "optional": true, - "os": [ - "linux" - ] - }, - "node_modules/@rollup/rollup-linux-arm-musleabihf": { - "version": "4.60.2", - "resolved": "https://registry.npmjs.org/@rollup/rollup-linux-arm-musleabihf/-/rollup-linux-arm-musleabihf-4.60.2.tgz", - "integrity": "sha512-TbziEu2DVsTEOPif2mKWkMeDMLoYjx95oESa9fkQQK7r/Orta0gnkcDpzwufEcAO2BLBsD7mZkXGFqEdMRRwfw==", - "cpu": [ - "arm" - ], - "dev": true, - "libc": [ - "musl" - ], - "license": "MIT", - "optional": true, - "os": [ - "linux" - ] - }, - "node_modules/@rollup/rollup-linux-arm64-gnu": { - "version": "4.60.2", - "resolved": "https://registry.npmjs.org/@rollup/rollup-linux-arm64-gnu/-/rollup-linux-arm64-gnu-4.60.2.tgz", - "integrity": "sha512-bO/rVDiDUuM2YfuCUwZ1t1cP+/yqjqz+Xf2VtkdppefuOFS2OSeAfgafaHNkFn0t02hEyXngZkxtGqXcXwO8Rg==", - "cpu": [ - "arm64" - ], - "dev": true, - "libc": [ - "glibc" - ], - "license": "MIT", - "optional": true, - "os": [ - "linux" - ] - }, - "node_modules/@rollup/rollup-linux-arm64-musl": { - "version": "4.60.2", - "resolved": "https://registry.npmjs.org/@rollup/rollup-linux-arm64-musl/-/rollup-linux-arm64-musl-4.60.2.tgz", - "integrity": "sha512-hr26p7e93Rl0Za+JwW7EAnwAvKkehh12BU1Llm9Ykiibg4uIr2rbpxG9WCf56GuvidlTG9KiiQT/TXT1yAWxTA==", - "cpu": [ - "arm64" - ], - "dev": true, - "libc": [ - "musl" - ], - "license": "MIT", - "optional": true, - "os": [ - "linux" - ] - }, - "node_modules/@rollup/rollup-linux-loong64-gnu": { - "version": "4.60.2", - "resolved": "https://registry.npmjs.org/@rollup/rollup-linux-loong64-gnu/-/rollup-linux-loong64-gnu-4.60.2.tgz", - "integrity": "sha512-pOjB/uSIyDt+ow3k/RcLvUAOGpysT2phDn7TTUB3n75SlIgZzM6NKAqlErPhoFU+npgY3/n+2HYIQVbF70P9/A==", - "cpu": [ - "loong64" - ], - "dev": true, - "libc": [ - "glibc" - ], - "license": "MIT", - "optional": true, - "os": [ - "linux" - ] - }, - "node_modules/@rollup/rollup-linux-loong64-musl": { - "version": "4.60.2", - "resolved": "https://registry.npmjs.org/@rollup/rollup-linux-loong64-musl/-/rollup-linux-loong64-musl-4.60.2.tgz", - "integrity": "sha512-2/w+q8jszv9Ww1c+6uJT3OwqhdmGP2/4T17cu8WuwyUuuaCDDJ2ojdyYwZzCxx0GcsZBhzi3HmH+J5pZNXnd+Q==", - "cpu": [ - "loong64" - ], - "dev": true, - "libc": [ - "musl" - ], - "license": "MIT", - "optional": true, - "os": [ - "linux" - ] - }, - "node_modules/@rollup/rollup-linux-ppc64-gnu": { - "version": "4.60.2", - "resolved": "https://registry.npmjs.org/@rollup/rollup-linux-ppc64-gnu/-/rollup-linux-ppc64-gnu-4.60.2.tgz", - "integrity": "sha512-11+aL5vKheYgczxtPVVRhdptAM2H7fcDR5Gw4/bTcteuZBlH4oP9f5s9zYO9aGZvoGeBpqXI/9TZZihZ609wKw==", - "cpu": [ - "ppc64" - ], - "dev": true, - "libc": [ - "glibc" - ], - "license": "MIT", - "optional": true, - "os": [ - "linux" - ] - }, - "node_modules/@rollup/rollup-linux-ppc64-musl": { - "version": "4.60.2", - "resolved": "https://registry.npmjs.org/@rollup/rollup-linux-ppc64-musl/-/rollup-linux-ppc64-musl-4.60.2.tgz", - "integrity": "sha512-i16fokAGK46IVZuV8LIIwMdtqhin9hfYkCh8pf8iC3QU3LpwL+1FSFGej+O7l3E/AoknL6Dclh2oTdnRMpTzFQ==", - "cpu": [ - "ppc64" - ], - "dev": true, - "libc": [ - "musl" - ], - "license": "MIT", - "optional": true, - "os": [ - "linux" - ] - }, - "node_modules/@rollup/rollup-linux-riscv64-gnu": { - "version": "4.60.2", - "resolved": "https://registry.npmjs.org/@rollup/rollup-linux-riscv64-gnu/-/rollup-linux-riscv64-gnu-4.60.2.tgz", - "integrity": "sha512-49FkKS6RGQoriDSK/6E2GkAsAuU5kETFCh7pG4yD/ylj9rKhTmO3elsnmBvRD4PgJPds5W2PkhC82aVwmUcJ7A==", - "cpu": [ - "riscv64" - ], - "dev": true, - "libc": [ - "glibc" - ], - "license": "MIT", - "optional": true, - "os": [ - "linux" - ] - }, - "node_modules/@rollup/rollup-linux-riscv64-musl": { - "version": "4.60.2", - "resolved": "https://registry.npmjs.org/@rollup/rollup-linux-riscv64-musl/-/rollup-linux-riscv64-musl-4.60.2.tgz", - "integrity": "sha512-mjYNkHPfGpUR00DuM1ZZIgs64Hpf4bWcz9Z41+4Q+pgDx73UwWdAYyf6EG/lRFldmdHHzgrYyge5akFUW0D3mQ==", - "cpu": [ - "riscv64" - ], - "dev": true, - "libc": [ - "musl" - ], - "license": "MIT", - "optional": true, - "os": [ - "linux" - ] - }, - "node_modules/@rollup/rollup-linux-s390x-gnu": { - "version": "4.60.2", - "resolved": "https://registry.npmjs.org/@rollup/rollup-linux-s390x-gnu/-/rollup-linux-s390x-gnu-4.60.2.tgz", - "integrity": "sha512-ALyvJz965BQk8E9Al/JDKKDLH2kfKFLTGMlgkAbbYtZuJt9LU8DW3ZoDMCtQpXAltZxwBHevXz5u+gf0yA0YoA==", - "cpu": [ - "s390x" - ], - "dev": true, - "libc": [ - "glibc" - ], - "license": "MIT", - "optional": true, - "os": [ - "linux" - ] - }, - "node_modules/@rollup/rollup-linux-x64-gnu": { - "version": "4.60.2", - "resolved": "https://registry.npmjs.org/@rollup/rollup-linux-x64-gnu/-/rollup-linux-x64-gnu-4.60.2.tgz", - "integrity": "sha512-UQjrkIdWrKI626Du8lCQ6MJp/6V1LAo2bOK9OTu4mSn8GGXIkPXk/Vsp4bLHCd9Z9Iz2OTEaokUE90VweJgIYQ==", - "cpu": [ - "x64" - ], - "dev": true, - "libc": [ - "glibc" - ], - "license": "MIT", - "optional": true, - "os": [ - "linux" - ] - }, - "node_modules/@rollup/rollup-linux-x64-musl": { - "version": "4.60.2", - "resolved": "https://registry.npmjs.org/@rollup/rollup-linux-x64-musl/-/rollup-linux-x64-musl-4.60.2.tgz", - "integrity": "sha512-bTsRGj6VlSdn/XD4CGyzMnzaBs9bsRxy79eTqTCBsA8TMIEky7qg48aPkvJvFe1HyzQ5oMZdg7AnVlWQSKLTnw==", - "cpu": [ - "x64" - ], - "dev": true, - "libc": [ - "musl" - ], - "license": "MIT", - "optional": true, - "os": [ - "linux" - ] - }, - "node_modules/@rollup/rollup-openbsd-x64": { - "version": "4.60.2", - "resolved": "https://registry.npmjs.org/@rollup/rollup-openbsd-x64/-/rollup-openbsd-x64-4.60.2.tgz", - "integrity": "sha512-6d4Z3534xitaA1FcMWP7mQPq5zGwBmGbhphh2DwaA1aNIXUu3KTOfwrWpbwI4/Gr0uANo7NTtaykFyO2hPuFLg==", - "cpu": [ - "x64" - ], - "dev": true, - "license": "MIT", - "optional": true, - "os": [ - "openbsd" - ] - }, - "node_modules/@rollup/rollup-openharmony-arm64": { - "version": "4.60.2", - "resolved": "https://registry.npmjs.org/@rollup/rollup-openharmony-arm64/-/rollup-openharmony-arm64-4.60.2.tgz", - "integrity": "sha512-NetAg5iO2uN7eB8zE5qrZ3CSil+7IJt4WDFLcC75Ymywq1VZVD6qJ6EvNLjZ3rEm6gB7XW5JdT60c6MN35Z85Q==", - "cpu": [ - "arm64" - ], - "dev": true, - "license": "MIT", - "optional": true, - "os": [ - "openharmony" - ] - }, - "node_modules/@rollup/rollup-win32-arm64-msvc": { - "version": "4.60.2", - "resolved": "https://registry.npmjs.org/@rollup/rollup-win32-arm64-msvc/-/rollup-win32-arm64-msvc-4.60.2.tgz", - "integrity": "sha512-NCYhOotpgWZ5kdxCZsv6Iudx0wX8980Q/oW4pNFNihpBKsDbEA1zpkfxJGC0yugsUuyDZ7gL37dbzwhR0VI7pQ==", - "cpu": [ - "arm64" - ], - "dev": true, - "license": "MIT", - "optional": true, - "os": [ - "win32" - ] - }, - "node_modules/@rollup/rollup-win32-ia32-msvc": { - "version": "4.60.2", - "resolved": "https://registry.npmjs.org/@rollup/rollup-win32-ia32-msvc/-/rollup-win32-ia32-msvc-4.60.2.tgz", - "integrity": "sha512-RXsaOqXxfoUBQoOgvmmijVxJnW2IGB0eoMO7F8FAjaj0UTywUO/luSqimWBJn04WNgUkeNhh7fs7pESXajWmkg==", - "cpu": [ - "ia32" - ], - "dev": true, - "license": "MIT", - "optional": true, - "os": [ - "win32" - ] - }, - "node_modules/@rollup/rollup-win32-x64-gnu": { - "version": "4.60.2", - "resolved": "https://registry.npmjs.org/@rollup/rollup-win32-x64-gnu/-/rollup-win32-x64-gnu-4.60.2.tgz", - "integrity": "sha512-qdAzEULD+/hzObedtmV6iBpdL5TIbKVztGiK7O3/KYSf+HIzU257+MX1EXJcyIiDbMAqmbwaufcYPvyRryeZtA==", - "cpu": [ - "x64" - ], - "dev": true, - "license": "MIT", - "optional": true, - "os": [ - "win32" - ] - }, - "node_modules/@rollup/rollup-win32-x64-msvc": { - "version": "4.60.2", - "resolved": "https://registry.npmjs.org/@rollup/rollup-win32-x64-msvc/-/rollup-win32-x64-msvc-4.60.2.tgz", - "integrity": "sha512-Nd/SgG27WoA9e+/TdK74KnHz852TLa94ovOYySo/yMPuTmpckK/jIF2jSwS3g7ELSKXK13/cVdmg1Z/DaCWKxA==", - "cpu": [ - "x64" - ], - "dev": true, - "license": "MIT", - "optional": true, - "os": [ - "win32" - ] - }, - "node_modules/@types/babel__core": { - "version": "7.20.5", - "resolved": "https://registry.npmjs.org/@types/babel__core/-/babel__core-7.20.5.tgz", - "integrity": "sha512-qoQprZvz5wQFJwMDqeseRXWv3rqMvhgpbXFfVyWhbx9X47POIA6i/+dXefEmZKoAgOaTdaIgNSMqMIU61yRyzA==", - "dev": true, - "license": "MIT", - "dependencies": { - "@babel/parser": "^7.20.7", - "@babel/types": "^7.20.7", - "@types/babel__generator": "*", - "@types/babel__template": "*", - "@types/babel__traverse": "*" - } - }, - "node_modules/@types/babel__generator": { - "version": "7.27.0", - "resolved": "https://registry.npmjs.org/@types/babel__generator/-/babel__generator-7.27.0.tgz", - "integrity": "sha512-ufFd2Xi92OAVPYsy+P4n7/U7e68fex0+Ee8gSG9KX7eo084CWiQ4sdxktvdl0bOPupXtVJPY19zk6EwWqUQ8lg==", - "dev": true, - "license": "MIT", - "dependencies": { - "@babel/types": "^7.0.0" - } - }, - "node_modules/@types/babel__template": { - "version": "7.4.4", - "resolved": "https://registry.npmjs.org/@types/babel__template/-/babel__template-7.4.4.tgz", - "integrity": "sha512-h/NUaSyG5EyxBIp8YRxo4RMe2/qQgvyowRwVMzhYhBCONbW8PUsg4lkFMrhgZhUe5z3L3MiLDuvyJ/CaPa2A8A==", - "dev": true, - "license": "MIT", - "dependencies": { - "@babel/parser": "^7.1.0", - "@babel/types": "^7.0.0" - } - }, - "node_modules/@types/babel__traverse": { - "version": "7.28.0", - "resolved": "https://registry.npmjs.org/@types/babel__traverse/-/babel__traverse-7.28.0.tgz", - "integrity": "sha512-8PvcXf70gTDZBgt9ptxJ8elBeBjcLOAcOtoO/mPJjtji1+CdGbHgm77om1GrsPxsiE+uXIpNSK64UYaIwQXd4Q==", - "dev": true, - "license": "MIT", - "dependencies": { - "@babel/types": "^7.28.2" - } - }, - "node_modules/@types/estree": { - "version": "1.0.8", - "resolved": "https://registry.npmjs.org/@types/estree/-/estree-1.0.8.tgz", - "integrity": "sha512-dWHzHa2WqEXI/O1E9OjrocMTKJl2mSrEolh1Iomrv6U+JuNwaHXsXx9bLu5gG7BUWFIN0skIQJQ/L1rIex4X6w==", - "dev": true, - "license": "MIT" - }, - "node_modules/@types/hammerjs": { - "version": "2.0.46", - "resolved": "https://registry.npmjs.org/@types/hammerjs/-/hammerjs-2.0.46.tgz", - "integrity": "sha512-ynRvcq6wvqexJ9brDMS4BnBLzmr0e14d6ZJTEShTBWKymQiHwlAyGu0ZPEFI2Fh1U53F7tN9ufClWM5KvqkKOw==", - "license": "MIT" - }, - "node_modules/@types/prop-types": { - "version": "15.7.15", - "resolved": "https://registry.npmjs.org/@types/prop-types/-/prop-types-15.7.15.tgz", - "integrity": "sha512-F6bEyamV9jKGAFBEmlQnesRPGOQqS2+Uwi0Em15xenOxHaf2hv6L8YCVn3rPdPJOiJfPiCnLIRyvwVaqMY3MIw==", - "dev": true, - "license": "MIT" - }, - "node_modules/@types/react": { - "version": "18.3.28", - "resolved": "https://registry.npmjs.org/@types/react/-/react-18.3.28.tgz", - "integrity": "sha512-z9VXpC7MWrhfWipitjNdgCauoMLRdIILQsAEV+ZesIzBq/oUlxk0m3ApZuMFCXdnS4U7KrI+l3WRUEGQ8K1QKw==", - "dev": true, - "license": "MIT", - "dependencies": { - "@types/prop-types": "*", - "csstype": "^3.2.2" - } - }, - "node_modules/@types/react-dom": { - "version": "18.3.7", - "resolved": "https://registry.npmjs.org/@types/react-dom/-/react-dom-18.3.7.tgz", - "integrity": "sha512-MEe3UeoENYVFXzoXEWsvcpg6ZvlrFNlOQ7EOsvhI3CfAXwzPfO8Qwuxd40nepsYKqyyVQnTdEfv68q91yLcKrQ==", - "dev": true, - "license": "MIT", - "peerDependencies": { - "@types/react": "^18.0.0" - } - }, - "node_modules/@vitejs/plugin-react": { - "version": "4.7.0", - "resolved": "https://registry.npmjs.org/@vitejs/plugin-react/-/plugin-react-4.7.0.tgz", - "integrity": "sha512-gUu9hwfWvvEDBBmgtAowQCojwZmJ5mcLn3aufeCsitijs3+f2NsrPtlAWIR6OPiqljl96GVCUbLe0HyqIpVaoA==", - "dev": true, - "license": "MIT", - "dependencies": { - "@babel/core": "^7.28.0", - "@babel/plugin-transform-react-jsx-self": "^7.27.1", - "@babel/plugin-transform-react-jsx-source": "^7.27.1", - "@rolldown/pluginutils": "1.0.0-beta.27", - "@types/babel__core": "^7.20.5", - "react-refresh": "^0.17.0" - }, - "engines": { - "node": "^14.18.0 || >=16.0.0" - }, - "peerDependencies": { - "vite": "^4.2.0 || ^5.0.0 || ^6.0.0 || ^7.0.0" - } - }, - "node_modules/ansi-regex": { - "version": "5.0.1", - "resolved": "https://registry.npmjs.org/ansi-regex/-/ansi-regex-5.0.1.tgz", - "integrity": "sha512-quJQXlTSUGL2LH9SUXo8VwsY4soanhgo6LNSm84E1LBcE8s3O0wpdiRzyR9z/ZZJMlMWv37qOOb9pdJlMUEKFQ==", - "dev": true, - "license": "MIT", - "engines": { - "node": ">=8" - } - }, - "node_modules/ansi-styles": { - "version": "4.3.0", - "resolved": "https://registry.npmjs.org/ansi-styles/-/ansi-styles-4.3.0.tgz", - "integrity": "sha512-zbB9rCJAT1rbjiVDb2hqKFHNYLxgtk8NURxZ3IZwD3F6NtxbXZQCnnSi1Lkx+IDohdPlFp222wVALIheZJQSEg==", - "dev": true, - "license": "MIT", - "dependencies": { - "color-convert": "^2.0.1" - }, - "engines": { - "node": ">=8" - }, - "funding": { - "url": "https://github.com/chalk/ansi-styles?sponsor=1" - } - }, - "node_modules/baseline-browser-mapping": { - "version": "2.10.21", - "resolved": "https://registry.npmjs.org/baseline-browser-mapping/-/baseline-browser-mapping-2.10.21.tgz", - "integrity": "sha512-Q+rUQ7Uz8AHM7DEaNdwvfFCTq7a43lNTzuS94eiWqwyxfV/wJv+oUivef51T91mmRY4d4A1u9rcSvkeufCVXlA==", - "dev": true, - "license": "Apache-2.0", - "bin": { - "baseline-browser-mapping": "dist/cli.cjs" - }, - "engines": { - "node": ">=6.0.0" - } - }, - "node_modules/browserslist": { - "version": "4.28.2", - "resolved": "https://registry.npmjs.org/browserslist/-/browserslist-4.28.2.tgz", - "integrity": "sha512-48xSriZYYg+8qXna9kwqjIVzuQxi+KYWp2+5nCYnYKPTr0LvD89Jqk2Or5ogxz0NUMfIjhh2lIUX/LyX9B4oIg==", - "dev": true, - "funding": [ - { - "type": "opencollective", - "url": "https://opencollective.com/browserslist" - }, - { - "type": "tidelift", - "url": "https://tidelift.com/funding/github/npm/browserslist" - }, - { - "type": "github", - "url": "https://github.com/sponsors/ai" - } - ], - "license": "MIT", - "dependencies": { - "baseline-browser-mapping": "^2.10.12", - "caniuse-lite": "^1.0.30001782", - "electron-to-chromium": "^1.5.328", - "node-releases": "^2.0.36", - "update-browserslist-db": "^1.2.3" - }, - "bin": { - "browserslist": "cli.js" - }, - "engines": { - "node": "^6 || ^7 || ^8 || ^9 || ^10 || ^11 || ^12 || >=13.7" - } - }, - "node_modules/caniuse-lite": { - "version": "1.0.30001790", - "resolved": "https://registry.npmjs.org/caniuse-lite/-/caniuse-lite-1.0.30001790.tgz", - "integrity": "sha512-bOoxfJPyYo+ds6W0YfptaCWbFnJYjh2Y1Eow5lRv+vI2u8ganPZqNm1JwNh0t2ELQCqIWg4B3dWEusgAmsoyOw==", - "dev": true, - "funding": [ - { - "type": "opencollective", - "url": "https://opencollective.com/browserslist" - }, - { - "type": "tidelift", - "url": "https://tidelift.com/funding/github/npm/caniuse-lite" - }, - { - "type": "github", - "url": "https://github.com/sponsors/ai" - } - ], - "license": "CC-BY-4.0" - }, - "node_modules/chalk": { - "version": "4.1.2", - "resolved": "https://registry.npmjs.org/chalk/-/chalk-4.1.2.tgz", - "integrity": "sha512-oKnbhFyRIXpUuez8iBMmyEa4nbj4IOQyuhc/wy9kY7/WVPcwIO9VA668Pu8RkO7+0G76SLROeyw9CpQ061i4mA==", - "dev": true, - "license": "MIT", - "dependencies": { - "ansi-styles": "^4.1.0", - "supports-color": "^7.1.0" - }, - "engines": { - "node": ">=10" - }, - "funding": { - "url": "https://github.com/chalk/chalk?sponsor=1" - } - }, - "node_modules/chalk/node_modules/supports-color": { - "version": "7.2.0", - "resolved": "https://registry.npmjs.org/supports-color/-/supports-color-7.2.0.tgz", - "integrity": "sha512-qpCAvRl9stuOHveKsn7HncJRvv501qIacKzQlO/+Lwxc9+0q2wLyv4Dfvt80/DPn2pqOBsJdDiogXGR9+OvwRw==", - "dev": true, - "license": "MIT", - "dependencies": { - "has-flag": "^4.0.0" - }, - "engines": { - "node": ">=8" - } - }, - "node_modules/chart.js": { - "version": "4.5.1", - "resolved": "https://registry.npmjs.org/chart.js/-/chart.js-4.5.1.tgz", - "integrity": "sha512-GIjfiT9dbmHRiYi6Nl2yFCq7kkwdkp1W/lp2J99rX0yo9tgJGn3lKQATztIjb5tVtevcBtIdICNWqlq5+E8/Pw==", - "license": "MIT", - "dependencies": { - "@kurkle/color": "^0.3.0" - }, - "engines": { - "pnpm": ">=8" - } - }, - "node_modules/chartjs-plugin-zoom": { - "version": "2.2.0", - "resolved": "https://registry.npmjs.org/chartjs-plugin-zoom/-/chartjs-plugin-zoom-2.2.0.tgz", - "integrity": "sha512-in6kcdiTlP6npIVLMd4zXZ08PDUXC52gZ4FAy5oyjk1zX3gKarXMAof7B9eFiisf9WOC3bh2saHg+J5WtLXZeA==", - "license": "MIT", - "dependencies": { - "@types/hammerjs": "^2.0.45", - "hammerjs": "^2.0.8" - }, - "peerDependencies": { - "chart.js": ">=3.2.0" - } - }, - "node_modules/cliui": { - "version": "8.0.1", - "resolved": "https://registry.npmjs.org/cliui/-/cliui-8.0.1.tgz", - "integrity": "sha512-BSeNnyus75C4//NQ9gQt1/csTXyo/8Sb+afLAkzAptFuMsod9HFokGNudZpi/oQV73hnVK+sR+5PVRMd+Dr7YQ==", - "dev": true, - "license": "ISC", - "dependencies": { - "string-width": "^4.2.0", - "strip-ansi": "^6.0.1", - "wrap-ansi": "^7.0.0" - }, - "engines": { - "node": ">=12" - } - }, - "node_modules/color-convert": { - "version": "2.0.1", - "resolved": "https://registry.npmjs.org/color-convert/-/color-convert-2.0.1.tgz", - "integrity": "sha512-RRECPsj7iu/xb5oKYcsFHSppFNnsj/52OVTRKb4zP5onXwVF3zVmmToNcOfGC+CRDpfK/U584fMg38ZHCaElKQ==", - "dev": true, - "license": "MIT", - "dependencies": { - "color-name": "~1.1.4" - }, - "engines": { - "node": ">=7.0.0" - } - }, - "node_modules/color-name": { - "version": "1.1.4", - "resolved": "https://registry.npmjs.org/color-name/-/color-name-1.1.4.tgz", - "integrity": "sha512-dOy+3AuW3a2wNbZHIuMZpTcgjGuLU/uBL/ubcZF9OXbDo8ff4O8yVp5Bf0efS8uEoYo5q4Fx7dY9OgQGXgAsQA==", - "dev": true, - "license": "MIT" - }, - "node_modules/concurrently": { - "version": "8.2.2", - "resolved": "https://registry.npmjs.org/concurrently/-/concurrently-8.2.2.tgz", - "integrity": "sha512-1dP4gpXFhei8IOtlXRE/T/4H88ElHgTiUzh71YUmtjTEHMSRS2Z/fgOxHSxxusGHogsRfxNq1vyAwxSC+EVyDg==", - "dev": true, - "license": "MIT", - "dependencies": { - "chalk": "^4.1.2", - "date-fns": "^2.30.0", - "lodash": "^4.17.21", - "rxjs": "^7.8.1", - "shell-quote": "^1.8.1", - "spawn-command": "0.0.2", - "supports-color": "^8.1.1", - "tree-kill": "^1.2.2", - "yargs": "^17.7.2" - }, - "bin": { - "conc": "dist/bin/concurrently.js", - "concurrently": "dist/bin/concurrently.js" - }, - "engines": { - "node": "^14.13.0 || >=16.0.0" - }, - "funding": { - "url": "https://github.com/open-cli-tools/concurrently?sponsor=1" - } - }, - "node_modules/convert-source-map": { - "version": "2.0.0", - "resolved": "https://registry.npmjs.org/convert-source-map/-/convert-source-map-2.0.0.tgz", - "integrity": "sha512-Kvp459HrV2FEJ1CAsi1Ku+MY3kasH19TFykTz2xWmMeq6bk2NU3XXvfJ+Q61m0xktWwt+1HSYf3JZsTms3aRJg==", - "dev": true, - "license": "MIT" - }, - "node_modules/csstype": { - "version": "3.2.3", - "resolved": "https://registry.npmjs.org/csstype/-/csstype-3.2.3.tgz", - "integrity": "sha512-z1HGKcYy2xA8AGQfwrn0PAy+PB7X/GSj3UVJW9qKyn43xWa+gl5nXmU4qqLMRzWVLFC8KusUX8T/0kCiOYpAIQ==", - "dev": true, - "license": "MIT" - }, - "node_modules/date-fns": { - "version": "2.30.0", - "resolved": "https://registry.npmjs.org/date-fns/-/date-fns-2.30.0.tgz", - "integrity": "sha512-fnULvOpxnC5/Vg3NCiWelDsLiUc9bRwAPs/+LfTLNvetFCtCTN+yQz15C/fs4AwX1R9K5GLtLfn8QW+dWisaAw==", - "dev": true, - "license": "MIT", - "dependencies": { - "@babel/runtime": "^7.21.0" - }, - "engines": { - "node": ">=0.11" - }, - "funding": { - "type": "opencollective", - "url": "https://opencollective.com/date-fns" - } - }, - "node_modules/debug": { - "version": "4.4.3", - "resolved": "https://registry.npmjs.org/debug/-/debug-4.4.3.tgz", - "integrity": "sha512-RGwwWnwQvkVfavKVt22FGLw+xYSdzARwm0ru6DhTVA3umU5hZc28V3kO4stgYryrTlLpuvgI9GiijltAjNbcqA==", - "dev": true, - "license": "MIT", - "dependencies": { - "ms": "^2.1.3" - }, - "engines": { - "node": ">=6.0" - }, - "peerDependenciesMeta": { - "supports-color": { - "optional": true - } - } - }, - "node_modules/downsample": { - "version": "1.4.0", - "resolved": "https://registry.npmjs.org/downsample/-/downsample-1.4.0.tgz", - "integrity": "sha512-teYPhUPxqwtyICt47t1mP/LjhbRV/ghuKb/LmFDbcZ0CjqFD31tn6rVLZoeCEa1xr8+f2skW8UjRiLiGIKQE4w==", - "license": "MIT" - }, - "node_modules/electron-to-chromium": { - "version": "1.5.343", - "resolved": "https://registry.npmjs.org/electron-to-chromium/-/electron-to-chromium-1.5.343.tgz", - "integrity": "sha512-YHnQ3MXI08icvL9ZKnEBy05F2EQ8ob01UaMOuMbM8l+4UcAq6MPPbBTJBbsBUg3H8JeZNt+O4fjsoWth3p6IFg==", - "dev": true, - "license": "ISC" - }, - "node_modules/emoji-regex": { - "version": "8.0.0", - "resolved": "https://registry.npmjs.org/emoji-regex/-/emoji-regex-8.0.0.tgz", - "integrity": "sha512-MSjYzcWNOA0ewAHpz0MxpYFvwg6yjy1NG3xteoqz644VCo/RPgnr1/GGt+ic3iJTzQ8Eu3TdM14SawnVUmGE6A==", - "dev": true, - "license": "MIT" - }, - "node_modules/esbuild": { - "version": "0.25.12", - "resolved": "https://registry.npmjs.org/esbuild/-/esbuild-0.25.12.tgz", - "integrity": "sha512-bbPBYYrtZbkt6Os6FiTLCTFxvq4tt3JKall1vRwshA3fdVztsLAatFaZobhkBC8/BrPetoa0oksYoKXoG4ryJg==", - "dev": true, - "hasInstallScript": true, - "license": "MIT", - "bin": { - "esbuild": "bin/esbuild" - }, - "engines": { - "node": ">=18" - }, - "optionalDependencies": { - "@esbuild/aix-ppc64": "0.25.12", - "@esbuild/android-arm": "0.25.12", - "@esbuild/android-arm64": "0.25.12", - "@esbuild/android-x64": "0.25.12", - "@esbuild/darwin-arm64": "0.25.12", - "@esbuild/darwin-x64": "0.25.12", - "@esbuild/freebsd-arm64": "0.25.12", - "@esbuild/freebsd-x64": "0.25.12", - "@esbuild/linux-arm": "0.25.12", - "@esbuild/linux-arm64": "0.25.12", - "@esbuild/linux-ia32": "0.25.12", - "@esbuild/linux-loong64": "0.25.12", - "@esbuild/linux-mips64el": "0.25.12", - "@esbuild/linux-ppc64": "0.25.12", - "@esbuild/linux-riscv64": "0.25.12", - "@esbuild/linux-s390x": "0.25.12", - "@esbuild/linux-x64": "0.25.12", - "@esbuild/netbsd-arm64": "0.25.12", - "@esbuild/netbsd-x64": "0.25.12", - "@esbuild/openbsd-arm64": "0.25.12", - "@esbuild/openbsd-x64": "0.25.12", - "@esbuild/openharmony-arm64": "0.25.12", - "@esbuild/sunos-x64": "0.25.12", - "@esbuild/win32-arm64": "0.25.12", - "@esbuild/win32-ia32": "0.25.12", - "@esbuild/win32-x64": "0.25.12" - } - }, - "node_modules/escalade": { - "version": "3.2.0", - "resolved": "https://registry.npmjs.org/escalade/-/escalade-3.2.0.tgz", - "integrity": "sha512-WUj2qlxaQtO4g6Pq5c29GTcWGDyd8itL8zTlipgECz3JesAiiOKotd8JU6otB3PACgG6xkJUyVhboMS+bje/jA==", - "dev": true, - "license": "MIT", - "engines": { - "node": ">=6" - } - }, - "node_modules/fdir": { - "version": "6.5.0", - "resolved": "https://registry.npmjs.org/fdir/-/fdir-6.5.0.tgz", - "integrity": "sha512-tIbYtZbucOs0BRGqPJkshJUYdL+SDH7dVM8gjy+ERp3WAUjLEFJE+02kanyHtwjWOnwrKYBiwAmM0p4kLJAnXg==", - "dev": true, - "license": "MIT", - "engines": { - "node": ">=12.0.0" - }, - "peerDependencies": { - "picomatch": "^3 || ^4" - }, - "peerDependenciesMeta": { - "picomatch": { - "optional": true - } - } - }, - "node_modules/fsevents": { - "version": "2.3.3", - "resolved": "https://registry.npmjs.org/fsevents/-/fsevents-2.3.3.tgz", - "integrity": "sha512-5xoDfX+fL7faATnagmWPpbFtwh/R77WmMMqqHGS65C3vvB0YHrgF+B1YmZ3441tMj5n63k0212XNoJwzlhffQw==", - "dev": true, - "hasInstallScript": true, - "license": "MIT", - "optional": true, - "os": [ - "darwin" - ], - "engines": { - "node": "^8.16.0 || ^10.6.0 || >=11.0.0" - } - }, - "node_modules/gensync": { - "version": "1.0.0-beta.2", - "resolved": "https://registry.npmjs.org/gensync/-/gensync-1.0.0-beta.2.tgz", - "integrity": "sha512-3hN7NaskYvMDLQY55gnW3NQ+mesEAepTqlg+VEbj7zzqEMBVNhzcGYYeqFo/TlYz6eQiFcp1HcsCZO+nGgS8zg==", - "dev": true, - "license": "MIT", - "engines": { - "node": ">=6.9.0" - } - }, - "node_modules/get-caller-file": { - "version": "2.0.5", - "resolved": "https://registry.npmjs.org/get-caller-file/-/get-caller-file-2.0.5.tgz", - "integrity": "sha512-DyFP3BM/3YHTQOCUL/w0OZHR0lpKeGrxotcHWcqNEdnltqFwXVfhEBQ94eIo34AfQpo0rGki4cyIiftY06h2Fg==", - "dev": true, - "license": "ISC", - "engines": { - "node": "6.* || 8.* || >= 10.*" - } - }, - "node_modules/hammerjs": { - "version": "2.0.8", - "resolved": "https://registry.npmjs.org/hammerjs/-/hammerjs-2.0.8.tgz", - "integrity": "sha512-tSQXBXS/MWQOn/RKckawJ61vvsDpCom87JgxiYdGwHdOa0ht0vzUWDlfioofFCRU0L+6NGDt6XzbgoJvZkMeRQ==", - "license": "MIT", - "engines": { - "node": ">=0.8.0" - } - }, - "node_modules/has-flag": { - "version": "4.0.0", - "resolved": "https://registry.npmjs.org/has-flag/-/has-flag-4.0.0.tgz", - "integrity": "sha512-EykJT/Q1KjTWctppgIAgfSO0tKVuZUjhgMr17kqTumMl6Afv3EISleU7qZUzoXDFTAHTDC4NOoG/ZxU3EvlMPQ==", - "dev": true, - "license": "MIT", - "engines": { - "node": ">=8" - } - }, - "node_modules/is-fullwidth-code-point": { - "version": "3.0.0", - "resolved": "https://registry.npmjs.org/is-fullwidth-code-point/-/is-fullwidth-code-point-3.0.0.tgz", - "integrity": "sha512-zymm5+u+sCsSWyD9qNaejV3DFvhCKclKdizYaJUuHA83RLjb7nSuGnddCHGv0hk+KY7BMAlsWeK4Ueg6EV6XQg==", - "dev": true, - "license": "MIT", - "engines": { - "node": ">=8" - } - }, - "node_modules/js-tokens": { - "version": "4.0.0", - "resolved": "https://registry.npmjs.org/js-tokens/-/js-tokens-4.0.0.tgz", - "integrity": "sha512-RdJUflcE3cUzKiMqQgsCu06FPu9UdIJO0beYbPhHN4k6apgJtifcoCtT9bcxOpYBtpD2kCM6Sbzg4CausW/PKQ==", - "license": "MIT" - }, - "node_modules/jsesc": { - "version": "3.1.0", - "resolved": "https://registry.npmjs.org/jsesc/-/jsesc-3.1.0.tgz", - "integrity": "sha512-/sM3dO2FOzXjKQhJuo0Q173wf2KOo8t4I8vHy6lF9poUp7bKT0/NHE8fPX23PwfhnykfqnC2xRxOnVw5XuGIaA==", - "dev": true, - "license": "MIT", - "bin": { - "jsesc": "bin/jsesc" - }, - "engines": { - "node": ">=6" - } - }, - "node_modules/json5": { - "version": "2.2.3", - "resolved": "https://registry.npmjs.org/json5/-/json5-2.2.3.tgz", - "integrity": "sha512-XmOWe7eyHYH14cLdVPoyg+GOH3rYX++KpzrylJwSW98t3Nk+U8XOl8FWKOgwtzdb8lXGf6zYwDUzeHMWfxasyg==", - "dev": true, - "license": "MIT", - "bin": { - "json5": "lib/cli.js" - }, - "engines": { - "node": ">=6" - } - }, - "node_modules/lodash": { - "version": "4.18.1", - "resolved": "https://registry.npmjs.org/lodash/-/lodash-4.18.1.tgz", - "integrity": "sha512-dMInicTPVE8d1e5otfwmmjlxkZoUpiVLwyeTdUsi/Caj/gfzzblBcCE5sRHV/AsjuCmxWrte2TNGSYuCeCq+0Q==", - "dev": true, - "license": "MIT" - }, - "node_modules/loose-envify": { - "version": "1.4.0", - "resolved": "https://registry.npmjs.org/loose-envify/-/loose-envify-1.4.0.tgz", - "integrity": "sha512-lyuxPGr/Wfhrlem2CL/UcnUc1zcqKAImBDzukY7Y5F/yQiNdko6+fRLevlw1HgMySw7f611UIY408EtxRSoK3Q==", - "license": "MIT", - "dependencies": { - "js-tokens": "^3.0.0 || ^4.0.0" - }, - "bin": { - "loose-envify": "cli.js" - } - }, - "node_modules/lru-cache": { - "version": "5.1.1", - "resolved": "https://registry.npmjs.org/lru-cache/-/lru-cache-5.1.1.tgz", - "integrity": "sha512-KpNARQA3Iwv+jTA0utUVVbrh+Jlrr1Fv0e56GGzAFOXN7dk/FviaDW8LHmK52DlcH4WP2n6gI8vN1aesBFgo9w==", - "dev": true, - "license": "ISC", - "dependencies": { - "yallist": "^3.0.2" - } - }, - "node_modules/lucide-react": { - "version": "0.577.0", - "resolved": "https://registry.npmjs.org/lucide-react/-/lucide-react-0.577.0.tgz", - "integrity": "sha512-4LjoFv2eEPwYDPg/CUdBJQSDfPyzXCRrVW1X7jrx/trgxnxkHFjnVZINbzvzxjN70dxychOfg+FTYwBiS3pQ5A==", - "license": "ISC", - "peerDependencies": { - "react": "^16.5.1 || ^17.0.0 || ^18.0.0 || ^19.0.0" - } - }, - "node_modules/ms": { - "version": "2.1.3", - "resolved": "https://registry.npmjs.org/ms/-/ms-2.1.3.tgz", - "integrity": "sha512-6FlzubTLZG3J2a/NVCAleEhjzq5oxgHyaCU9yYXvcLsvoVaHJq/s5xXI6/XXP6tz7R9xAOtHnSO/tXtF3WRTlA==", - "dev": true, - "license": "MIT" - }, - "node_modules/nanoid": { - "version": "3.3.11", - "resolved": "https://registry.npmjs.org/nanoid/-/nanoid-3.3.11.tgz", - "integrity": "sha512-N8SpfPUnUp1bK+PMYW8qSWdl9U+wwNWI4QKxOYDy9JAro3WMX7p2OeVRF9v+347pnakNevPmiHhNmZ2HbFA76w==", - "dev": true, - "funding": [ - { - "type": "github", - "url": "https://github.com/sponsors/ai" - } - ], - "license": "MIT", - "bin": { - "nanoid": "bin/nanoid.cjs" - }, - "engines": { - "node": "^10 || ^12 || ^13.7 || ^14 || >=15.0.1" - } - }, - "node_modules/node-releases": { - "version": "2.0.38", - "resolved": "https://registry.npmjs.org/node-releases/-/node-releases-2.0.38.tgz", - "integrity": "sha512-3qT/88Y3FbH/Kx4szpQQ4HzUbVrHPKTLVpVocKiLfoYvw9XSGOX2FmD2d6DrXbVYyAQTF2HeF6My8jmzx7/CRw==", - "dev": true, - "license": "MIT" - }, - "node_modules/picocolors": { - "version": "1.1.1", - "resolved": "https://registry.npmjs.org/picocolors/-/picocolors-1.1.1.tgz", - "integrity": "sha512-xceH2snhtb5M9liqDsmEw56le376mTZkEX/jEb/RxNFyegNul7eNslCXP9FDj/Lcu0X8KEyMceP2ntpaHrDEVA==", - "dev": true, - "license": "ISC" - }, - "node_modules/picomatch": { - "version": "4.0.4", - "resolved": "https://registry.npmjs.org/picomatch/-/picomatch-4.0.4.tgz", - "integrity": "sha512-QP88BAKvMam/3NxH6vj2o21R6MjxZUAd6nlwAS/pnGvN9IVLocLHxGYIzFhg6fUQ+5th6P4dv4eW9jX3DSIj7A==", - "dev": true, - "license": "MIT", - "engines": { - "node": ">=12" - }, - "funding": { - "url": "https://github.com/sponsors/jonschlinkert" - } - }, - "node_modules/postcss": { - "version": "8.5.10", - "resolved": "https://registry.npmjs.org/postcss/-/postcss-8.5.10.tgz", - "integrity": "sha512-pMMHxBOZKFU6HgAZ4eyGnwXF/EvPGGqUr0MnZ5+99485wwW41kW91A4LOGxSHhgugZmSChL5AlElNdwlNgcnLQ==", - "dev": true, - "funding": [ - { - "type": "opencollective", - "url": "https://opencollective.com/postcss/" - }, - { - "type": "tidelift", - "url": "https://tidelift.com/funding/github/npm/postcss" - }, - { - "type": "github", - "url": "https://github.com/sponsors/ai" - } - ], - "license": "MIT", - "dependencies": { - "nanoid": "^3.3.11", - "picocolors": "^1.1.1", - "source-map-js": "^1.2.1" - }, - "engines": { - "node": "^10 || ^12 || >=14" - } - }, - "node_modules/react": { - "version": "18.3.1", - "resolved": "https://registry.npmjs.org/react/-/react-18.3.1.tgz", - "integrity": "sha512-wS+hAgJShR0KhEvPJArfuPVN1+Hz1t0Y6n5jLrGQbkb4urgPE/0Rve+1kMB1v/oWgHgm4WIcV+i7F2pTVj+2iQ==", - "license": "MIT", - "dependencies": { - "loose-envify": "^1.1.0" - }, - "engines": { - "node": ">=0.10.0" - } - }, - "node_modules/react-chartjs-2": { - "version": "5.3.1", - "resolved": "https://registry.npmjs.org/react-chartjs-2/-/react-chartjs-2-5.3.1.tgz", - "integrity": "sha512-h5IPXKg9EXpjoBzUfyWJvllMjG2mQ4EiuHQFhms/AjUm0XSZHhyRy2xVmLXHKrtcdrPO4mnGqRtYoD0vp95A0A==", - "license": "MIT", - "peerDependencies": { - "chart.js": "^4.1.1", - "react": "^16.8.0 || ^17.0.0 || ^18.0.0 || ^19.0.0" - } - }, - "node_modules/react-dom": { - "version": "18.3.1", - "resolved": "https://registry.npmjs.org/react-dom/-/react-dom-18.3.1.tgz", - "integrity": "sha512-5m4nQKp+rZRb09LNH59GM4BxTh9251/ylbKIbpe7TpGxfJ+9kv6BLkLBXIjjspbgbnIBNqlI23tRnTWT0snUIw==", - "license": "MIT", - "dependencies": { - "loose-envify": "^1.1.0", - "scheduler": "^0.23.2" - }, - "peerDependencies": { - "react": "^18.3.1" - } - }, - "node_modules/react-refresh": { - "version": "0.17.0", - "resolved": "https://registry.npmjs.org/react-refresh/-/react-refresh-0.17.0.tgz", - "integrity": "sha512-z6F7K9bV85EfseRCp2bzrpyQ0Gkw1uLoCel9XBVWPg/TjRj94SkJzUTGfOa4bs7iJvBWtQG0Wq7wnI0syw3EBQ==", - "dev": true, - "license": "MIT", - "engines": { - "node": ">=0.10.0" - } - }, - "node_modules/require-directory": { - "version": "2.1.1", - "resolved": "https://registry.npmjs.org/require-directory/-/require-directory-2.1.1.tgz", - "integrity": "sha512-fGxEI7+wsG9xrvdjsrlmL22OMTTiHRwAMroiEeMgq8gzoLC/PQr7RsRDSTLUg/bZAZtF+TVIkHc6/4RIKrui+Q==", - "dev": true, - "license": "MIT", - "engines": { - "node": ">=0.10.0" - } - }, - "node_modules/rollup": { - "version": "4.60.2", - "resolved": "https://registry.npmjs.org/rollup/-/rollup-4.60.2.tgz", - "integrity": "sha512-J9qZyW++QK/09NyN/zeO0dG/1GdGfyp9lV8ajHnRVLfo/uFsbji5mHnDgn/qYdUHyCkM2N+8VyspgZclfAh0eQ==", - "dev": true, - "license": "MIT", - "dependencies": { - "@types/estree": "1.0.8" - }, - "bin": { - "rollup": "dist/bin/rollup" - }, - "engines": { - "node": ">=18.0.0", - "npm": ">=8.0.0" - }, - "optionalDependencies": { - "@rollup/rollup-android-arm-eabi": "4.60.2", - "@rollup/rollup-android-arm64": "4.60.2", - "@rollup/rollup-darwin-arm64": "4.60.2", - "@rollup/rollup-darwin-x64": "4.60.2", - "@rollup/rollup-freebsd-arm64": "4.60.2", - "@rollup/rollup-freebsd-x64": "4.60.2", - "@rollup/rollup-linux-arm-gnueabihf": "4.60.2", - "@rollup/rollup-linux-arm-musleabihf": "4.60.2", - "@rollup/rollup-linux-arm64-gnu": "4.60.2", - "@rollup/rollup-linux-arm64-musl": "4.60.2", - "@rollup/rollup-linux-loong64-gnu": "4.60.2", - "@rollup/rollup-linux-loong64-musl": "4.60.2", - "@rollup/rollup-linux-ppc64-gnu": "4.60.2", - "@rollup/rollup-linux-ppc64-musl": "4.60.2", - "@rollup/rollup-linux-riscv64-gnu": "4.60.2", - "@rollup/rollup-linux-riscv64-musl": "4.60.2", - "@rollup/rollup-linux-s390x-gnu": "4.60.2", - "@rollup/rollup-linux-x64-gnu": "4.60.2", - "@rollup/rollup-linux-x64-musl": "4.60.2", - "@rollup/rollup-openbsd-x64": "4.60.2", - "@rollup/rollup-openharmony-arm64": "4.60.2", - "@rollup/rollup-win32-arm64-msvc": "4.60.2", - "@rollup/rollup-win32-ia32-msvc": "4.60.2", - "@rollup/rollup-win32-x64-gnu": "4.60.2", - "@rollup/rollup-win32-x64-msvc": "4.60.2", - "fsevents": "~2.3.2" - } - }, - "node_modules/rxjs": { - "version": "7.8.2", - "resolved": "https://registry.npmjs.org/rxjs/-/rxjs-7.8.2.tgz", - "integrity": "sha512-dhKf903U/PQZY6boNNtAGdWbG85WAbjT/1xYoZIC7FAY0yWapOBQVsVrDl58W86//e1VpMNBtRV4MaXfdMySFA==", - "dev": true, - "license": "Apache-2.0", - "dependencies": { - "tslib": "^2.1.0" - } - }, - "node_modules/scheduler": { - "version": "0.23.2", - "resolved": "https://registry.npmjs.org/scheduler/-/scheduler-0.23.2.tgz", - "integrity": "sha512-UOShsPwz7NrMUqhR6t0hWjFduvOzbtv7toDH1/hIrfRNIDBnnBWd0CwJTGvTpngVlmwGCdP9/Zl/tVrDqcuYzQ==", - "license": "MIT", - "dependencies": { - "loose-envify": "^1.1.0" - } - }, - "node_modules/semver": { - "version": "6.3.1", - "resolved": "https://registry.npmjs.org/semver/-/semver-6.3.1.tgz", - "integrity": "sha512-BR7VvDCVHO+q2xBEWskxS6DJE1qRnb7DxzUrogb71CWoSficBxYsiAGd+Kl0mmq/MprG9yArRkyrQxTO6XjMzA==", - "dev": true, - "license": "ISC", - "bin": { - "semver": "bin/semver.js" - } - }, - "node_modules/shell-quote": { - "version": "1.8.3", - "resolved": "https://registry.npmjs.org/shell-quote/-/shell-quote-1.8.3.tgz", - "integrity": "sha512-ObmnIF4hXNg1BqhnHmgbDETF8dLPCggZWBjkQfhZpbszZnYur5DUljTcCHii5LC3J5E0yeO/1LIMyH+UvHQgyw==", - "dev": true, - "license": "MIT", - "engines": { - "node": ">= 0.4" - }, - "funding": { - "url": "https://github.com/sponsors/ljharb" - } - }, - "node_modules/source-map-js": { - "version": "1.2.1", - "resolved": "https://registry.npmjs.org/source-map-js/-/source-map-js-1.2.1.tgz", - "integrity": "sha512-UXWMKhLOwVKb728IUtQPXxfYU+usdybtUrK/8uGE8CQMvrhOpwvzDBwj0QhSL7MQc7vIsISBG8VQ8+IDQxpfQA==", - "dev": true, - "license": "BSD-3-Clause", - "engines": { - "node": ">=0.10.0" - } - }, - "node_modules/spawn-command": { - "version": "0.0.2", - "resolved": "https://registry.npmjs.org/spawn-command/-/spawn-command-0.0.2.tgz", - "integrity": "sha512-zC8zGoGkmc8J9ndvml8Xksr1Amk9qBujgbF0JAIWO7kXr43w0h/0GJNM/Vustixu+YE8N/MTrQ7N31FvHUACxQ==", - "dev": true - }, - "node_modules/string-width": { - "version": "4.2.3", - "resolved": "https://registry.npmjs.org/string-width/-/string-width-4.2.3.tgz", - "integrity": "sha512-wKyQRQpjJ0sIp62ErSZdGsjMJWsap5oRNihHhu6G7JVO/9jIB6UyevL+tXuOqrng8j/cxKTWyWUwvSTriiZz/g==", - "dev": true, - "license": "MIT", - "dependencies": { - "emoji-regex": "^8.0.0", - "is-fullwidth-code-point": "^3.0.0", - "strip-ansi": "^6.0.1" - }, - "engines": { - "node": ">=8" - } - }, - "node_modules/strip-ansi": { - "version": "6.0.1", - "resolved": "https://registry.npmjs.org/strip-ansi/-/strip-ansi-6.0.1.tgz", - "integrity": "sha512-Y38VPSHcqkFrCpFnQ9vuSXmquuv5oXOKpGeT6aGrr3o3Gc9AlVa6JBfUSOCnbxGGZF+/0ooI7KrPuUSztUdU5A==", - "dev": true, - "license": "MIT", - "dependencies": { - "ansi-regex": "^5.0.1" - }, - "engines": { - "node": ">=8" - } - }, - "node_modules/supports-color": { - "version": "8.1.1", - "resolved": "https://registry.npmjs.org/supports-color/-/supports-color-8.1.1.tgz", - "integrity": "sha512-MpUEN2OodtUzxvKQl72cUF7RQ5EiHsGvSsVG0ia9c5RbWGL2CI4C7EpPS8UTBIplnlzZiNuV56w+FuNxy3ty2Q==", - "dev": true, - "license": "MIT", - "dependencies": { - "has-flag": "^4.0.0" - }, - "engines": { - "node": ">=10" - }, - "funding": { - "url": "https://github.com/chalk/supports-color?sponsor=1" - } - }, - "node_modules/tinyglobby": { - "version": "0.2.16", - "resolved": "https://registry.npmjs.org/tinyglobby/-/tinyglobby-0.2.16.tgz", - "integrity": "sha512-pn99VhoACYR8nFHhxqix+uvsbXineAasWm5ojXoN8xEwK5Kd3/TrhNn1wByuD52UxWRLy8pu+kRMniEi6Eq9Zg==", - "dev": true, - "license": "MIT", - "dependencies": { - "fdir": "^6.5.0", - "picomatch": "^4.0.4" - }, - "engines": { - "node": ">=12.0.0" - }, - "funding": { - "url": "https://github.com/sponsors/SuperchupuDev" - } - }, - "node_modules/tree-kill": { - "version": "1.2.2", - "resolved": "https://registry.npmjs.org/tree-kill/-/tree-kill-1.2.2.tgz", - "integrity": "sha512-L0Orpi8qGpRG//Nd+H90vFB+3iHnue1zSSGmNOOCh1GLJ7rUKVwV2HvijphGQS2UmhUZewS9VgvxYIdgr+fG1A==", - "dev": true, - "license": "MIT", - "bin": { - "tree-kill": "cli.js" - } - }, - "node_modules/tslib": { - "version": "2.8.1", - "resolved": "https://registry.npmjs.org/tslib/-/tslib-2.8.1.tgz", - "integrity": "sha512-oJFu94HQb+KVduSUQL7wnpmqnfmLsOA/nAh6b6EH0wCEoK0/mPeXU6c3wKDV83MkOuHPRHtSXKKU99IBazS/2w==", - "dev": true, - "license": "0BSD" - }, - "node_modules/update-browserslist-db": { - "version": "1.2.3", - "resolved": "https://registry.npmjs.org/update-browserslist-db/-/update-browserslist-db-1.2.3.tgz", - "integrity": "sha512-Js0m9cx+qOgDxo0eMiFGEueWztz+d4+M3rGlmKPT+T4IS/jP4ylw3Nwpu6cpTTP8R1MAC1kF4VbdLt3ARf209w==", - "dev": true, - "funding": [ - { - "type": "opencollective", - "url": "https://opencollective.com/browserslist" - }, - { - "type": "tidelift", - "url": "https://tidelift.com/funding/github/npm/browserslist" - }, - { - "type": "github", - "url": "https://github.com/sponsors/ai" - } - ], - "license": "MIT", - "dependencies": { - "escalade": "^3.2.0", - "picocolors": "^1.1.1" - }, - "bin": { - "update-browserslist-db": "cli.js" - }, - "peerDependencies": { - "browserslist": ">= 4.21.0" - } - }, - "node_modules/vite": { - "version": "6.4.2", - "resolved": "https://registry.npmjs.org/vite/-/vite-6.4.2.tgz", - "integrity": "sha512-2N/55r4JDJ4gdrCvGgINMy+HH3iRpNIz8K6SFwVsA+JbQScLiC+clmAxBgwiSPgcG9U15QmvqCGWzMbqda5zGQ==", - "dev": true, - "license": "MIT", - "dependencies": { - "esbuild": "^0.25.0", - "fdir": "^6.4.4", - "picomatch": "^4.0.2", - "postcss": "^8.5.3", - "rollup": "^4.34.9", - "tinyglobby": "^0.2.13" - }, - "bin": { - "vite": "bin/vite.js" - }, - "engines": { - "node": "^18.0.0 || ^20.0.0 || >=22.0.0" - }, - "funding": { - "url": "https://github.com/vitejs/vite?sponsor=1" - }, - "optionalDependencies": { - "fsevents": "~2.3.3" - }, - "peerDependencies": { - "@types/node": "^18.0.0 || ^20.0.0 || >=22.0.0", - "jiti": ">=1.21.0", - "less": "*", - "lightningcss": "^1.21.0", - "sass": "*", - "sass-embedded": "*", - "stylus": "*", - "sugarss": "*", - "terser": "^5.16.0", - "tsx": "^4.8.1", - "yaml": "^2.4.2" - }, - "peerDependenciesMeta": { - "@types/node": { - "optional": true - }, - "jiti": { - "optional": true - }, - "less": { - "optional": true - }, - "lightningcss": { - "optional": true - }, - "sass": { - "optional": true - }, - "sass-embedded": { - "optional": true - }, - "stylus": { - "optional": true - }, - "sugarss": { - "optional": true - }, - "terser": { - "optional": true - }, - "tsx": { - "optional": true - }, - "yaml": { - "optional": true - } - } - }, - "node_modules/wrap-ansi": { - "version": "7.0.0", - "resolved": "https://registry.npmjs.org/wrap-ansi/-/wrap-ansi-7.0.0.tgz", - "integrity": "sha512-YVGIj2kamLSTxw6NsZjoBxfSwsn0ycdesmc4p+Q21c5zPuZ1pl+NfxVdxPtdHvmNVOQ6XSYG4AUtyt/Fi7D16Q==", - "dev": true, - "license": "MIT", - "dependencies": { - "ansi-styles": "^4.0.0", - "string-width": "^4.1.0", - "strip-ansi": "^6.0.0" - }, - "engines": { - "node": ">=10" - }, - "funding": { - "url": "https://github.com/chalk/wrap-ansi?sponsor=1" - } - }, - "node_modules/y18n": { - "version": "5.0.8", - "resolved": "https://registry.npmjs.org/y18n/-/y18n-5.0.8.tgz", - "integrity": "sha512-0pfFzegeDWJHJIAmTLRP2DwHjdF5s7jo9tuztdQxAhINCdvS+3nGINqPd00AphqJR/0LhANUS6/+7SCb98YOfA==", - "dev": true, - "license": "ISC", - "engines": { - "node": ">=10" - } - }, - "node_modules/yallist": { - "version": "3.1.1", - "resolved": "https://registry.npmjs.org/yallist/-/yallist-3.1.1.tgz", - "integrity": "sha512-a4UGQaWPH59mOXUYnAG2ewncQS4i4F43Tv3JoAM+s2VDAmS9NsK8GpDMLrCHPksFT7h3K6TOoUNn2pb7RoXx4g==", - "dev": true, - "license": "ISC" - }, - "node_modules/yargs": { - "version": "17.7.2", - "resolved": "https://registry.npmjs.org/yargs/-/yargs-17.7.2.tgz", - "integrity": "sha512-7dSzzRQ++CKnNI/krKnYRV7JKKPUXMEh61soaHKg9mrWEhzFWhFnxPxGl+69cD1Ou63C13NUPCnmIcrvqCuM6w==", - "dev": true, - "license": "MIT", - "dependencies": { - "cliui": "^8.0.1", - "escalade": "^3.1.1", - "get-caller-file": "^2.0.5", - "require-directory": "^2.1.1", - "string-width": "^4.2.3", - "y18n": "^5.0.5", - "yargs-parser": "^21.1.1" - }, - "engines": { - "node": ">=12" - } - }, - "node_modules/yargs-parser": { - "version": "21.1.1", - "resolved": "https://registry.npmjs.org/yargs-parser/-/yargs-parser-21.1.1.tgz", - "integrity": "sha512-tVpsJW7DdjecAiFpbIB1e3qxIQsE6NoPc5/eTdrbbIC4h0LVsWhnoa3g+m2HclBIujHzsxZ4VJVA+GUuc2/LBw==", - "dev": true, - "license": "ISC", - "engines": { - "node": ">=12" - } - } - } -} diff --git a/benchmarks-website/package.json b/benchmarks-website/package.json deleted file mode 100644 index 6cda687b838..00000000000 --- a/benchmarks-website/package.json +++ /dev/null @@ -1,32 +0,0 @@ -{ - "name": "vortex-benchmarks-website", - "version": "2.0.0", - "type": "module", - "scripts": { - "dev": "concurrently \"npm run server\" \"npm run vite\"", - "vite": "vite", - "server": "node server.js", - "build": "vite build", - "preview": "vite preview" - }, - "engines": { - "node": ">=18.0.0" - }, - "dependencies": { - "chart.js": "^4.4.4", - "chartjs-plugin-zoom": "^2.0.1", - "downsample": "^1.4.0", - "hammerjs": "^2.0.8", - "lucide-react": "^0.577.0", - "react": "^18.3.1", - "react-chartjs-2": "^5.2.0", - "react-dom": "^18.3.1" - }, - "devDependencies": { - "@types/react": "^18.3.3", - "@types/react-dom": "^18.3.0", - "@vitejs/plugin-react": "^4.3.1", - "concurrently": "^8.2.2", - "vite": "^6.0.0" - } -} diff --git a/benchmarks-website/planning/00-overview.md b/benchmarks-website/planning/00-overview.md new file mode 100644 index 00000000000..c6c3e05c57f --- /dev/null +++ b/benchmarks-website/planning/00-overview.md @@ -0,0 +1,104 @@ + + +# 00 - Overview + +## What we're building + +A replacement for the current `bench.vortex.dev` site. The new +stack is a **single Rust binary** (axum + maud + duckdb-rs) that +owns a **DuckDB database** on local disk and serves the website +plus an `/api/ingest` route. CI eventually POSTs new benchmark +results there. There is no separate ingester service, no S3 +coordination layer for writes, no client-side WASM. + +The server crate is `vortex-bench-server` at +`benchmarks-website/server/`. + +## Phasing + +We build this in two phases. **Plan only the first.** + +### Alpha (this plan) + +The smallest end-to-end loop that proves the design: + +1. **Schema** locked enough to ingest one benchmark result. +2. **Server**: open DuckDB, accept a bearer-token-authenticated POST, + serve a couple of read routes. +3. **Emitter**: `vortex-bench --gh-json-v3` + a tiny POST script. +4. **Web UI**: one landing page + one chart page rendered against a + fixture DB. + +That's it. No production deploy, no historical data import, no CI +workflow integration, no admin tooling, no schema migration +framework, no auth beyond the shared bearer token. All of those +live in [`deferred.md`](./deferred.md). + +The alpha runs on a developer machine. v2 keeps running in +production unchanged. There is no cutover in alpha. + +### Phase 2 and beyond + +Once the alpha loop is green, we layer in production deploy, +historical migration, CI dual-write, and the rest of the v2-parity +work. Stubs are in [`deferred.md`](./deferred.md). + +## Architecture (alpha) + +One process, one DB file. The server is the API and the website. +The emitter writes JSONL of bare records; a small POST script +wraps and uploads them. CI isn't wired up yet; ingest happens +manually during alpha. + +## Components + +Three components for alpha. Each is one workstream, one branch, one +PR. + +| Component | Plan | Owns | +|---|---|---| +| Server | [components/server.md](./components/server.md) | DuckDB open + schema, bearer-auth ingest, read routes, HTML routes mounted from web-ui | +| Emitter | [components/emitter.md](./components/emitter.md) | `vortex-bench --gh-json-v3` + the post-ingest script | +| Web UI | [components/web-ui.md](./components/web-ui.md) | Landing page + chart page, against a fixture DuckDB | + +### Dependencies + +The schema feeds all three components. The contracts feed the +server and the emitter. With both stable, **all three components +can be worked on in parallel**. + +## Goals + +In priority order: + +1. **End-to-end alpha loop works.** Emit → POST → store → render. +2. **Schema is the right shape.** Five fact tables (one per + measurement family) plus a `commits` dim. See + [`01-schema.md`](./01-schema.md). +3. **Each component is small enough that one agent can finish it + in one PR.** No mega-PRs. + +Cutover, parity, and "faster than v2" are explicit non-goals at +alpha; they come back in phase 2. + +## Shared docs + +- [`00-overview.md`](./00-overview.md) (this file) +- [`01-schema.md`](./01-schema.md) - the five fact tables + `commits` +- [`02-contracts.md`](./02-contracts.md) - wire shapes + HTTP error + matrix + auth header +- [`benchmark-mapping.md`](./benchmark-mapping.md) - existing + benchmarks → fact tables +- [`decisions.md`](./decisions.md) - resolved decisions +- [`deferred.md`](./deferred.md) - phase-2 stubs + +## Status of v2 during alpha + +v2 stays in production untouched. Do not edit +`benchmarks-website/server.js`, `benchmarks-website/src/`, or any +other v2 files at `benchmarks-website/` top level. v3 lives in the +sibling subdirectory at `benchmarks-website/server/` +(`vortex-bench-server` crate). diff --git a/benchmarks-website/planning/01-schema.md b/benchmarks-website/planning/01-schema.md new file mode 100644 index 00000000000..b74d0bb9a92 --- /dev/null +++ b/benchmarks-website/planning/01-schema.md @@ -0,0 +1,228 @@ + + +# 01 - DuckDB schema (alpha) + +The persistent data model. **One `commits` dim table plus five fact +tables, one per measurement family.** No lookup tables, no views, no +migration framework; those are deferred (see +[`deferred.md`](./deferred.md)). + +## Design principles + +1. **One fact table per (dim shape, value shape).** A row in any + fact table has every value column populated; NULLs only appear + in genuinely optional dimensions. +2. **No discriminator columns spanning families.** No `metric_kind` + enum forcing five shapes into one row. +3. **No JSON escape hatch.** New benchmark parameters become real + columns. Adding a nullable column is cheap; the readability win + is worth it. +4. **Hashed primary key per table.** Each fact table has a + `measurement_id` that is a deterministic 64-bit hash of + `commit_sha` plus that table's dimensional tuple. Including + `commit_sha` makes every (commit, dim) pair a distinct row - + that's what the chart pages render as a time series. + Server-internal; not on the wire. +5. **`commits` is the only dim table.** Engine, format, dataset, + etc. stay as inline strings; DuckDB's dictionary encoding makes + a lookup table pointless. +6. **Ratios are not stored.** Computed at query time from + `compression_sizes`. + +## Why five fact tables, not one + +The five families have genuinely different shapes: + +| Table | Shape sketch | +|---|---| +| `query_measurements` | dataset + query_idx + engine + format + storage → timing **and** memory | +| `compression_times` | dataset + format + op∈{encode,decode} → timing | +| `compression_sizes` | dataset + format → bytes | +| `random_access_times` | dataset + format → timing (different dataset namespace) | +| `vector_search_runs` | dataset + layout + flavor + threshold → timing + counters | + +Forcing them into one table either bloats every row with columns +that are NULL for ~99% of rows (`layout`, `flavor`, `threshold`, +`matches`, `rows_scanned`, `bytes_scanned`) or splits scan results +across multiple rows that have to be re-joined to render one chart. + +## Group / chart / series fit + +The render-time view used by `/api/groups` and `/api/chart/:slug` +is mechanically derivable per table: + +| Table | Group key | Chart key | Series key | +|---|---|---|---| +| `query_measurements` | `(dataset, dataset_variant, scale_factor, storage)` | `(dataset, query_idx)` | `(engine, format)` | +| `compression_times` | constant `"Compression"` | `(dataset, dataset_variant)` | `(format, op)` | +| `compression_sizes` | constant `"Compression Size"` | `(dataset, dataset_variant)` | `format` | +| `random_access_times` | constant `"Random Access"` | `dataset` | `format` | +| `vector_search_runs` | `(dataset, layout)` | `(dataset, layout, threshold)` | `flavor` | + +The classifier logic in v2's `v2-classifier.js` mostly disappears - +each table already knows what suite it represents. + +## Tables + +DDL is the server's call. Below is the column contract: name, type +family, and whether it's NOT NULL. The server agent picks exact +DuckDB types, indexes, and constraint syntax. + +### `commits` (dim) + +| Column | Type | Required? | Notes | +|---|---|---|---| +| `commit_sha` | string | yes (PK) | 40-hex lowercase | +| `timestamp` | timestamptz | yes | | +| `message` | string | yes | first line only | +| `author_name` | string | yes | | +| `author_email` | string | yes | | +| `committer_name` | string | yes | | +| `committer_email` | string | yes | | +| `tree_sha` | string | yes | | +| `url` | string | yes | | + +Populated from the envelope on every `/api/ingest` call. + +### `query_measurements` + +SQL query suites: TPC-H, TPC-DS, ClickBench, StatPopGen, +PolarSignals, Fineweb, GhArchive, Public-BI. Memory columns are +populated when the run was instrumented for memory; NULL otherwise. +Timing and memory share the row because they're produced together +for the same query execution. + +| Column | Type | Required? | Notes | +|---|---|---|---| +| `measurement_id` | int64 | yes (PK) | hash of dim tuple | +| `commit_sha` | string | yes | FK to `commits` | +| `dataset` | string | yes | `tpch`, `tpcds`, `clickbench`, ... | +| `dataset_variant` | string | optional | ClickBench flavor, Public-BI name | +| `scale_factor` | string | optional | TPC SF; n_rows for StatPopGen / PolarSignals | +| `query_idx` | int32 | yes | 1-based | +| `storage` | string | yes | `nvme` or `s3` | +| `engine` | string | yes | `datafusion`, `duckdb`, `vortex`, `arrow` | +| `format` | string | yes | `vortex-file-compressed`, `parquet`, `lance`, ... | +| `value_ns` | int64 | yes | median timing, ns | +| `all_runtimes_ns` | list<int64> | yes | per-iteration timings | +| `peak_physical` | int64 | optional | bytes | +| `peak_virtual` | int64 | optional | bytes | +| `physical_delta` | int64 | optional | bytes | +| `virtual_delta` | int64 | optional | bytes | +| `env_triple` | string | optional | e.g. `x86_64-linux-gnu` | + +### `compression_times` + +Encode/decode timings from `compress-bench`. + +| Column | Type | Required? | Notes | +|---|---|---|---| +| `measurement_id` | int64 | yes (PK) | | +| `commit_sha` | string | yes | FK | +| `dataset` | string | yes | | +| `dataset_variant` | string | optional | | +| `format` | string | yes | | +| `op` | string | yes | `encode` or `decode` | +| `value_ns` | int64 | yes | | +| `all_runtimes_ns` | list<int64> | yes | | +| `env_triple` | string | optional | | + +### `compression_sizes` + +On-disk sizes from `compress-bench`. One-shot, no per-iteration data. +Compression ratios in v2 (`vortex:parquet-zstd ratio/...`) are a +SELECT over this table joined to itself; they're not stored. + +| Column | Type | Required? | Notes | +|---|---|---|---| +| `measurement_id` | int64 | yes (PK) | | +| `commit_sha` | string | yes | FK | +| `dataset` | string | yes | | +| `dataset_variant` | string | optional | | +| `format` | string | yes | | +| `value_bytes` | int64 | yes | | + +### `random_access_times` + +Take-time timings from `random-access-bench`. Different dataset +namespace from `compression_times` - kept in its own table so +dataset filters never have to disambiguate which suite a row +belongs to. + +| Column | Type | Required? | Notes | +|---|---|---|---| +| `measurement_id` | int64 | yes (PK) | | +| `commit_sha` | string | yes | FK | +| `dataset` | string | yes | | +| `format` | string | yes | | +| `value_ns` | int64 | yes | | +| `all_runtimes_ns` | list<int64> | yes | | +| `env_triple` | string | optional | | + +### `vector_search_runs` + +Cosine-similarity scans from `vector-search-bench`. The only family +that emits a timing **plus side counters** for the same scan; +keeping them in one row avoids a 1:N split that has to be re-joined +on read. + +| Column | Type | Required? | Notes | +|---|---|---|---| +| `measurement_id` | int64 | yes (PK) | | +| `commit_sha` | string | yes | FK | +| `dataset` | string | yes | e.g. `cohere-large-10m` | +| `layout` | string | yes | `TrainLayout`, e.g. `partitioned` | +| `flavor` | string | yes | `VectorFlavor`, e.g. `vortex-turboquant` | +| `threshold` | double | yes | cosine threshold | +| `value_ns` | int64 | yes | per-scan wall time | +| `all_runtimes_ns` | list<int64> | yes | | +| `matches` | int64 | yes | | +| `rows_scanned` | int64 | yes | | +| `bytes_scanned` | int64 | yes | | +| `iterations` | int32 | yes | not part of the dim hash | +| `env_triple` | string | optional | | + +## `measurement_id` hash + +Per-table xxhash64 over `commit_sha` plus that table's dimensional +tuple. Including `commit_sha` makes every (commit, dim) pair a +distinct row, which is what the chart pages render as a time +series. The hash is **server-internal** - the wire never carries +it. The server's INSERT path computes it before each +`INSERT ... ON CONFLICT DO UPDATE`, which gives idempotent upsert +on re-emission of the same (commit, dim) pair. Encoding details +(input order, NULL handling, byte layout) are the server's call, +since the value never crosses a process boundary. + +When the historical migrator lands (deferred), it reuses the +server's hash function via a shared crate. + +## Storage values + +`storage` is `'nvme'` or `'s3'`. Legacy `gcs` is dropped. Only +`query_measurements` carries `storage` - the other families don't +fan out by storage backend. + +## Schema changes during alpha + +There is no migration framework. If you change the schema: + +1. Update this doc. +2. Update the server's DDL. +3. Delete any local `bench.duckdb` and re-run. + +A real forward-only migration framework lands post-alpha. See +[`deferred.md`](./deferred.md). + +## What's intentionally NOT here (deferred) + +- `schema_meta` and migration framework. +- `known_engines` / `known_formats` / `known_datasets` lookup + tables and seed SQL. +- Views (`v_compression_ratios`, `v_latest_per_group`, etc.). +- Pre-downsampled aliases. +- A `microbench_runs` table - reserved as the next family to add + when microbench results start landing. diff --git a/benchmarks-website/planning/02-contracts.md b/benchmarks-website/planning/02-contracts.md new file mode 100644 index 00000000000..9aba31fb73b --- /dev/null +++ b/benchmarks-website/planning/02-contracts.md @@ -0,0 +1,195 @@ + + +# 02 - Wire contracts (alpha) + +The cross-component glue between the emitter, the POST script, and +the server. Wire-format only - implementations are local to each +component. + +If two components disagree about a shape, **this file is right** +and both update. + +## Records are discriminated by `kind` + +Each record on the wire carries a `kind` field that picks one of +the [five fact tables](./01-schema.md#tables). The emitter never +decides "what column" - it decides "what kind", and the rest of the +row is that kind's flat field set. + +| `kind` | Destination table | +|---|---| +| `query_measurement` | `query_measurements` | +| `compression_time` | `compression_times` | +| `compression_size` | `compression_sizes` | +| `random_access_time` | `random_access_times` | +| `vector_search_run` | `vector_search_runs` | + +**Unknown `kind` values cause a 400.** Unknown fields within a known +`kind` also cause a 400. Version skew should fail loudly. + +## Per-kind record shapes + +All shared metadata first; per-kind fields after. + +### `query_measurement` + +| Field | Type | Required? | Notes | +|---|---|---|---| +| `kind` | `"query_measurement"` | yes | discriminator | +| `commit_sha` | string | yes | 40-hex lowercase | +| `dataset` | string | yes | `tpch`, `tpcds`, `clickbench`, ... | +| `dataset_variant` | string | optional | ClickBench flavor, Public-BI name | +| `scale_factor` | string | optional | TPC SF; n_rows for StatPopGen / PolarSignals | +| `query_idx` | integer | yes | 1-based | +| `storage` | enum string | yes | `nvme` or `s3` | +| `engine` | string | yes | `datafusion`, `duckdb`, `vortex`, `arrow` | +| `format` | string | yes | `vortex-file-compressed`, `parquet`, `lance`, ... | +| `value_ns` | integer | yes | median timing, ns | +| `all_runtimes_ns` | array<integer> | yes | per-iteration timings (may be empty) | +| `peak_physical` | integer | optional | bytes | +| `peak_virtual` | integer | optional | bytes | +| `physical_delta` | integer | optional | bytes | +| `virtual_delta` | integer | optional | bytes | +| `env_triple` | string | optional | e.g. `x86_64-linux-gnu` | + +The four memory fields are populated together (all four or none). + +### `compression_time` + +| Field | Type | Required? | Notes | +|---|---|---|---| +| `kind` | `"compression_time"` | yes | | +| `commit_sha` | string | yes | | +| `dataset` | string | yes | | +| `dataset_variant` | string | optional | | +| `format` | string | yes | | +| `op` | enum string | yes | `encode` or `decode` | +| `value_ns` | integer | yes | | +| `all_runtimes_ns` | array<integer> | yes | | +| `env_triple` | string | optional | | + +### `compression_size` + +| Field | Type | Required? | Notes | +|---|---|---|---| +| `kind` | `"compression_size"` | yes | | +| `commit_sha` | string | yes | | +| `dataset` | string | yes | | +| `dataset_variant` | string | optional | | +| `format` | string | yes | | +| `value_bytes` | integer | yes | | + +### `random_access_time` + +| Field | Type | Required? | Notes | +|---|---|---|---| +| `kind` | `"random_access_time"` | yes | | +| `commit_sha` | string | yes | | +| `dataset` | string | yes | random-access dataset name (e.g. `chimp`, `taxi`) | +| `format` | string | yes | | +| `value_ns` | integer | yes | | +| `all_runtimes_ns` | array<integer> | yes | | +| `env_triple` | string | optional | | + +### `vector_search_run` + +| Field | Type | Required? | Notes | +|---|---|---|---| +| `kind` | `"vector_search_run"` | yes | | +| `commit_sha` | string | yes | | +| `dataset` | string | yes | e.g. `cohere-large-10m` | +| `layout` | string | yes | `TrainLayout`, e.g. `partitioned` | +| `flavor` | string | yes | `VectorFlavor`, e.g. `vortex-turboquant` | +| `threshold` | number | yes | cosine threshold | +| `value_ns` | integer | yes | per-scan wall time (median of iterations) | +| `all_runtimes_ns` | array<integer> | yes | | +| `matches` | integer | yes | | +| `rows_scanned` | integer | yes | | +| `bytes_scanned` | integer | yes | | +| `iterations` | integer | yes | | +| `env_triple` | string | optional | | + +## Ingest envelope + +`/api/ingest` accepts one envelope per POST. The envelope wraps a +heterogeneous batch of records (any mix of `kind`s). Required +top-level fields: + +- `run_meta`: object with `benchmark_id` (string), `schema_version` + (integer; `1` at alpha), `started_at` (RFC 3339 timestamp). +- `commit`: object with the columns of the [`commits` + table](./01-schema.md#commits-dim), keyed by their column names + with `commit_sha` renamed to `sha`. The server upserts this row + before applying records. +- `records`: array of per-`kind` records as defined above. + +`vortex-bench --gh-json-v3 ` writes JSONL of bare records +only. The envelope (`run_meta` + `commit`) is added by the +post-ingest script before POSTing - this keeps the Rust emitter +dependency-light. + +The post-ingest script is responsible for filling the `commit` +fields. CI has the SHA from `${{ github.sha }}`; the rest comes +from `git show` or equivalent. See +[`components/emitter.md`](./components/emitter.md). + +## HTTP matrix for `POST /api/ingest` + +| Condition | Status | +|---|---| +| Happy path | 200 with `{ "inserted": N, "updated": M }` | +| Malformed JSON | 400 | +| Unknown `kind`, unknown field, or per-record validation failure | 400 with the offending record index | +| Missing/invalid bearer token | 401 | +| Schema version newer than server expects | 409 | +| Other server error | 500 | + +All-or-nothing per POST: a single failed record fails the whole +batch. The reported `inserted` and `updated` counts are aggregated +across all five tables. + +## Authentication header + +```text +Authorization: Bearer +``` + +Compared with constant-time equality on the server. Token comes from +the `INGEST_BEARER_TOKEN` env var. + +## Slug grammar (server ↔ web-ui) + +The web-ui receives slugs from `/api/groups` and feeds them back +into `/api/chart/:slug`. Slugs are **opaque strings** as far as the +web-ui is concerned: it never parses or constructs them itself, +only echoes what the API returned. The server is free to choose any +slug format, change it without breaking the web-ui, or make it +debuggable (e.g. `qm-tpch-q01-nvme-sf1`) - the only contract is +"`/api/chart/:slug` accepts any slug `/api/groups` returned." + +## Read API (alpha) + +Two routes - just enough to render one chart page. Field shapes are +not binding; refine during implementation. + +### `GET /api/groups` + +A flat list of distinct group keys derivable from the data, with +just enough metadata to link to a chart. The server walks each fact +table to produce the group keys defined in +[`01-schema.md`](./01-schema.md#group--chart--series-fit). Every +chart entry includes a `slug` that round-trips through +`/api/chart/:slug`. + +### `GET /api/chart/:slug` + +Returns the data for one chart: a `display_name`, a `unit`, an +ordered `commits` list (sha + timestamp + first-line message + url), +and a `series` map keyed by series name where each value is an +array aligned to `commits` (with `null` for missing data points). + +Per-commit page, zoom/pan, range queries, and the rest of the read +API are deferred. See [`deferred.md`](./deferred.md). diff --git a/benchmarks-website/planning/AGENTS.md b/benchmarks-website/planning/AGENTS.md new file mode 100644 index 00000000000..28d6641cf7b --- /dev/null +++ b/benchmarks-website/planning/AGENTS.md @@ -0,0 +1,85 @@ + + +# AGENTS.md - benchmarks-website v3 (alpha) + +Brief for coding agents working on this rewrite. Keep it short; +detail belongs in component plans. + +## What you're working on + +The **alpha** of v3 of `bench.vortex.dev`. Target: a single Rust +binary (axum + maud + duckdb-rs) with **DuckDB on local disk**. +The smallest end-to-end loop that proves the design. + +The v2 site (top-level files in `benchmarks-website/`: +`server.js`, `src/`, `package.json`, etc.) is in production and +stays running unchanged. v3 lives alongside in +`vortex-bench-server` at `benchmarks-website/server/`. + +Anything not listed in [`README.md`](./README.md) under +"Components" is **deferred**. See [`deferred.md`](./deferred.md). +Don't expand scope past your component plan. + +## Where to start + +1. [`README.md`](./README.md) - reading order. +2. [`00-overview.md`](./00-overview.md) - phases, components, + dependency map. +3. [`01-schema.md`](./01-schema.md) - the DuckDB schema (column + contracts; SQL is the server agent's call). +4. [`02-contracts.md`](./02-contracts.md) - wire shapes + HTTP + matrix + auth header. +5. [`benchmark-mapping.md`](./benchmark-mapping.md) - existing + benchmarks → fact tables (read this if you're working on the + emitter or eventual migration). +6. Your component plan in [`components/`](./components/). + +You **don't** need to read other components' plans. + +## Repository conventions + +See the root [`CLAUDE.md`](/CLAUDE.md) for Rust style, test layout, +and CI norms. Project-specific: + +- The v3 server crate lives at `benchmarks-website/server/` and is + registered in the root `Cargo.toml` `members` list. +- All commits need a `Signed-off-by:` trailer. +- Run `cargo +nightly fmt --all` and narrow clippy on what you + changed. +- Public-API changes need `./scripts/public-api.sh`. +- Every new public item needs a doc comment. +- Tests return `VortexResult<()>` and use `?`. No `unwrap`. + +## Things to avoid + +- **Don't widen scope past your component plan.** If a feature + feels missing, check [`deferred.md`](./deferred.md) first - it + is almost certainly already deferred there. +- **Don't write a server-side classifier.** The emitter is + responsible for v3-shape records. +- **Don't drift from contracts.** Wire-shape changes are a + coordinated PR across the affected components. +- **Don't touch the v2 React/Node app.** It stays in production + unchanged through alpha and through phase 2 until cutover. +- **Don't reach for WASM.** + +## Working branches + +| Branch | Purpose | +|---|---| +| `develop` | Live v2 site. Don't break. | +| `ct/benchmarks-v3` | Integration branch carrying the planning commit + landed component PRs. All component branches start here. | +| `claude/benchmarks-v3-` | Per-workstream feature branches, each branched from `ct/benchmarks-v3` and PR'd back to it. | + +## How to update this file + +Keep it short. If you've learned something a future agent will need: + +- Cross-component contract → [`02-contracts.md`](./02-contracts.md) +- Local detail → your component plan +- Decided → [`decisions.md`](./decisions.md) +- Not designing yet → [`deferred.md`](./deferred.md) +- Cross-cutting agent norm → here diff --git a/benchmarks-website/planning/README.md b/benchmarks-website/planning/README.md new file mode 100644 index 00000000000..6eabdaf0815 --- /dev/null +++ b/benchmarks-website/planning/README.md @@ -0,0 +1,71 @@ + + +# Benchmarks website v3 - Planning + +Planning docs for rebuilding `bench.vortex.dev` as a single Rust +binary with DuckDB on local disk. + +This plan is **alpha-only**. Everything beyond the smallest +end-to-end loop is deliberately punted to +[`deferred.md`](./deferred.md). + +## Reading order + +| File | Read when | +|---|---| +| [`00-overview.md`](./00-overview.md) | Always. The pitch, phases, and dependency map. | +| [`01-schema.md`](./01-schema.md) | Always. The five DuckDB fact tables + `commits` dim. | +| [`02-contracts.md`](./02-contracts.md) | Always. Wire shapes (one `kind` per fact table), HTTP error matrix, auth header. | +| [`benchmark-mapping.md`](./benchmark-mapping.md) | Always when working on the emitter or the historical migrator. Maps every existing benchmark to its target table. | +| [`decisions.md`](./decisions.md) | Skim once. What's pinned for alpha. | +| [`deferred.md`](./deferred.md) | Skim once. What we're not designing yet. | +| `components/.md` | The plan for your specific workstream. | +| `components/.md` | Avoid. If you're tempted, `02-contracts.md` probably needs an update. | + +## Components + +Three components for alpha. Each is one workstream, one branch, one +PR. After the schema and contracts are stable, **all three can be +worked on in parallel**. + +| Component | Plan | Branch | +|---|---|---| +| Server | [components/server.md](./components/server.md) | `claude/benchmarks-v3-server` | +| Emitter | [components/emitter.md](./components/emitter.md) | `claude/benchmarks-v3-emitter` | +| Web UI | [components/web-ui.md](./components/web-ui.md) | `claude/benchmarks-v3-web-ui` | + +## Working branches + +- `develop` - the v2 site, in production. **Do not touch.** +- `ct/benchmarks-v3` - the integration branch carrying the + planning commit and any landed component PRs. All component + branches start here. +- Component branches (one per workstream, see "Components" above), + each branched from `ct/benchmarks-v3` and PR'd back to it. + +## What this plan is not + +- Not implementation instructions. Component plans are deliberately + high-level. +- Not a phase-2 plan. Phase-2 work is one paragraph each in + [`deferred.md`](./deferred.md). The path will be clearer once the + alpha loop is running. +- Not a parity-with-v2 plan. v2 keeps running unchanged through + alpha. + +## Updating these docs + +If you find a gap, prefer to: + +1. Update [`02-contracts.md`](./02-contracts.md) when the gap is at + a component boundary. +2. Update the relevant component plan when the gap is local. +3. Update [`decisions.md`](./decisions.md) when the gap is "we just + haven't decided yet, but we need to." +4. Update [`deferred.md`](./deferred.md) when the gap is "this is + real work but not for alpha." + +Don't add a new top-level numbered doc. diff --git a/benchmarks-website/planning/benchmark-mapping.md b/benchmarks-website/planning/benchmark-mapping.md new file mode 100644 index 00000000000..9216a45ebc4 --- /dev/null +++ b/benchmarks-website/planning/benchmark-mapping.md @@ -0,0 +1,147 @@ + + +# Existing benchmarks → fact-table mapping + +A cross-reference from today's benchmark code to the v3 fact tables +in [`01-schema.md`](./01-schema.md). Use this when implementing +emitter `to_v3_json` (component plan in +[`components/emitter.md`](./components/emitter.md)) or when sanity- +checking that the schema is expressive enough. + +If a benchmark in this repo is not listed here, it is either +deferred to phase 2 or out of scope for the bench website. + +## Source measurement type → target table + +The canonical mapping. The Rust types live in +`vortex-bench/src/measurements.rs` (and per-benchmark crates). + +| Source type | Wire `kind` | Target table | Notes | +|---|---|---|---| +| `QueryMeasurement` (paired with `MemoryMeasurement`) | `query_measurement` | `query_measurements` | The two structs collapse into **one** v3 record. Memory fields are omitted if `--track-memory` was off. | +| `TimingMeasurement` (only the random-access variant uses this today) | `random_access_time` | `random_access_times` | | +| `CompressionTimingMeasurement` | `compression_time` (with `op ∈ {encode, decode}`) | `compression_times` | The `op` is decided by which side of `compress-bench`'s timing loop produced it. | +| `CustomUnitMeasurement` with byte unit (sizes) | `compression_size` | `compression_sizes` | A new `CompressionSizeMeasurement` extraction lives in `vortex-bench/src/compress/mod.rs`; the emitter no longer rides on `CustomUnitMeasurement`. | +| `CustomUnitMeasurement` with `ratio` unit | **dropped** | none | Computed at read time from `compression_sizes`. | +| `ScanTiming` (vector-search) | `vector_search_run` | `vector_search_runs` | Carries timing **plus** the three counters in the same row. | + +## Per-binary inventory + +Every benchmark binary in this repo, the measurement structs it +produces today, and the v3 tables those measurements land in. + +### `benchmarks/datafusion-bench` + +Runs the SQL query suites with `engine = datafusion`, parameterized +over a `Format` (parquet, vortex-file-compressed, vortex-compact, +arrow, lance via the lance-bench wrapper). + +- Produces `QueryMeasurement` (+ `MemoryMeasurement` when + `--track-memory`) → **`query_measurements`**. +- One row per `(commit, dataset, dataset_variant, scale_factor, + query_idx, storage, engine = "datafusion", format)`. + +### `benchmarks/duckdb-bench` + +Same as `datafusion-bench` but with `engine = duckdb`. + +- Produces `QueryMeasurement` (+ `MemoryMeasurement` when tracking) + → **`query_measurements`**, with `engine = "duckdb"`. + +### `benchmarks/lance-bench` + +Three things in one crate: + +1. **Query runner** (`src/main.rs`): `engine = datafusion`, + `format = lance` only. Produces `QueryMeasurement` (+ + `MemoryMeasurement`) → **`query_measurements`**. +2. **Compression runner** (`src/compress.rs`): produces + `CompressionTimingMeasurement` + size `CustomUnitMeasurement` → + **`compression_times`** (with `op ∈ {encode, decode}`, + `format = lance`) and **`compression_sizes`** + (`format = lance`). +3. **Random-access runner** (`src/random_access.rs`): produces + `TimingMeasurement` → **`random_access_times`** with + `format = lance`. + +### `benchmarks/compress-bench` + +The compression suite. Per dataset, runs encode + decode against +each enabled `Format` and records the resulting on-disk size. + +- `CompressionTimingMeasurement` for encode → **`compression_times`** + with `op = "encode"`. +- `CompressionTimingMeasurement` for decode → **`compression_times`** + with `op = "decode"`. +- Byte-unit `CustomUnitMeasurement` (the size entries) → + **`compression_sizes`**. +- Ratio-unit `CustomUnitMeasurement` (the `vortex:parquet-zstd + ratio/...` entries) → **dropped**. The reader recomputes ratios + from `compression_sizes`. + +### `benchmarks/random-access-bench` + +The random-access "take" timing suite. Datasets here (chimp, taxi, +etc.) are a different namespace from the SQL query suites. + +- `TimingMeasurement` → **`random_access_times`**. +- `format` is one of `vortex-file-compressed`, `vortex-compact`, + `parquet`, `lance`. + +### `benchmarks/vector-search-bench` + +Cosine-similarity scan over a vector dataset. Each dataset/layout/ +flavor combination produces a single `ScanTiming` per scan +configuration. + +- `ScanTiming` → **`vector_search_runs`**. +- `dataset` from `VectorDataset` (e.g. `cohere-large-10m`). +- `layout` from `TrainLayout`. +- `flavor` from `VectorFlavor` (compression flavor; the vector- + search analogue of `format`). +- `threshold`, `iterations` are real columns. +- `query_seed` is **not** stored - it's a deterministic seed for + the query sampler and not a measurement dimension. + +## Per-suite dim values + +For SQL query suites (everything that flows through +`query_measurements`), the dim columns are populated as follows: + +| `BenchmarkArg` | `dataset` | `dataset_variant` | `scale_factor` | Notes | +|---|---|---|---|---| +| `TpcH` | `tpch` | NULL | TPC SF as string (`"1"`, `"10"`, `"100"`, `"1000"`) | | +| `TpcDS` | `tpcds` | NULL | TPC SF as string | | +| `ClickBench` | `clickbench` | flavor as string (`partitioned` / `single`) | NULL | The flavor lives in `dataset_variant`, not `dataset`. | +| `StatPopGen` | `statpopgen` | NULL | n_rows as string | `scale_factor` here is the row count; the per-dataset interpretation of SF is documented in [`01-schema.md`](./01-schema.md). | +| `PolarSignals` | `polarsignals` | NULL | n_rows as string | Same SF interpretation as StatPopGen. | +| `Fineweb` | `fineweb` | NULL | NULL | | +| `GhArchive` | `gharchive` | NULL | NULL | | +| `PublicBi` | `public-bi` | dataset name (e.g. `cms-provider`) | NULL | The Public-BI sub-dataset name lives in `dataset_variant`. | + +For non-query suites: + +- `compress-bench`: `dataset` is the compression dataset name; if + the suite later grows variants, `dataset_variant` is available. +- `random-access-bench`: `dataset` is the random-access dataset + name. No variant column on this table. +- `vector-search-bench`: see the [vector_search_runs + table](./01-schema.md#vector_search_runs). + +## What this implies for the emitter + +The mapping above is the contract `vortex-bench --gh-json-v3` +implements. Any v3 record an emitter writes today must land in +exactly one of the five tables; if a future measurement type +doesn't fit, that's the signal to add a sixth table (and a sixth +`kind`) rather than overload one of these. + +The **historical migrator** will use the same mapping when it lands +(it's deferred - see [`deferred.md`](./deferred.md#historical-data-migration)). +The v2 classifier on `develop` at `benchmarks-website/server.js` +becomes useful then, because the v2 S3 dump pre-dates the +discriminator and we'll have to recover `kind` from name strings. +For new ingest at alpha, no classifier is needed. diff --git a/benchmarks-website/planning/components/emitter.md b/benchmarks-website/planning/components/emitter.md new file mode 100644 index 00000000000..e462a9804c8 --- /dev/null +++ b/benchmarks-website/planning/components/emitter.md @@ -0,0 +1,86 @@ + + +# Component: Emitter (alpha) + +## Required reading + +- [`../00-overview.md`](../00-overview.md) +- [`../02-contracts.md`](../02-contracts.md) +- [`../benchmark-mapping.md`](../benchmark-mapping.md) - the + source-type → target-table mapping. + +## Goal + +Extend `vortex-bench` so it emits v3-shape JSON. Plus a small POST +script that wraps the JSONL in an envelope and sends it to a +running alpha server. + +This is **purely additive** to v2's emission path. Nothing in v2 is +touched. CI workflow integration, dual-write, the orchestrator +update, and the outbox safety net all wait until after the alpha +loop works end-to-end (see [`../deferred.md`](../deferred.md)). + +## In scope + +### Rust emitter + +- Add a `--gh-json-v3 ` CLI flag that writes JSONL of bare + v3 records (no envelope). The legacy `-d gh-json -o ...` form is + untouched - both work at alpha. +- Emit a record with the appropriate `kind` for every measurement + type produced today. The mapping from existing measurement + structs to wire `kind`s is the table in + [`../benchmark-mapping.md`](../benchmark-mapping.md). +- Two non-obvious points (everything else is mechanical): + - `QueryMeasurement` and the paired `MemoryMeasurement` collapse + into **one** `query_measurement` record with both `value_ns` + and the four memory fields. If memory wasn't tracked, omit the + memory fields. + - Vector-search's `ScanTiming` doesn't carry its own dataset / + layout / threshold (those live in the binary's `Args`). The + emitter has to plumb them through to the record. +- `CustomUnitMeasurement` cross-format ratios are **not emitted** - + ratios are computed in the read path. +- Snapshot tests per `kind` (any framework), scrubbing `commit_sha` + and `env_triple`. + +### Post-ingest script + +A small Python script (path of the agent's choosing, e.g. under +`scripts/`) that: + +- Reads JSONL of records. +- Fills the `commit` envelope fields by shelling out to `git show` + (or equivalent) for the SHA passed as an argument. +- Wraps the records in the envelope from + [`../02-contracts.md`](../02-contracts.md). +- POSTs to `/api/ingest` with the bearer token. +- Exits non-zero on 4xx / 5xx. **No retries, no spool, no S3 + outbox at alpha** - those land when CI starts using this. + +## Out of scope (deferred) + +- Replacing the v2 `-d`/`-o` CLI form. Both forms coexist at alpha. +- Removing the v2 `gh-json` emission path. +- Updating `bench-orchestrator` or any GitHub Actions workflows. + Alpha runs are manual. +- Retry / spool / outbox-drain on POST failures. + +See [`../deferred.md`](../deferred.md) for the post-alpha plan. + +## Acceptance criteria + +- `cargo test -p vortex-bench` passes; one snapshot per `kind`. +- Running a benchmark with `--gh-json-v3 ` writes valid JSONL + matching the wire shape from + [`../02-contracts.md`](../02-contracts.md). +- The post-ingest script round-trips a fixture file through a + running alpha server (200 with non-zero `inserted` on first run, + 200 with non-zero `updated` on second run). + +## Branch + +`claude/benchmarks-v3-emitter` diff --git a/benchmarks-website/planning/components/server.md b/benchmarks-website/planning/components/server.md new file mode 100644 index 00000000000..8bed8485e70 --- /dev/null +++ b/benchmarks-website/planning/components/server.md @@ -0,0 +1,70 @@ + + +# Component: Server (alpha) + +## Required reading + +- [`../00-overview.md`](../00-overview.md) +- [`../01-schema.md`](../01-schema.md) +- [`../02-contracts.md`](../02-contracts.md) + +## Goal + +A single Rust binary: an HTTP server that owns a DuckDB file on +local disk, accepts authenticated `/api/ingest` POSTs, and serves +enough of a read API to render one chart page. + +This is the **alpha** version. It runs locally or on a dev box; no +production deploy. Production deploy, backups, admin tooling, and +historical data import are deferred (see +[`../deferred.md`](../deferred.md)). + +The server crate is `vortex-bench-server`, living at +`benchmarks-website/server/`, registered as a workspace member. + +## In scope + +- Open the DuckDB file and apply the schema DDL on boot. No + migration framework yet - if the schema changes during alpha, + delete the file and re-run. +- Bearer-token middleware on `/api/ingest`. Token from + `INGEST_BEARER_TOKEN` env var, constant-time compared. +- `POST /api/ingest`: parse the envelope from + [`../02-contracts.md`](../02-contracts.md), upsert the commit, + dispatch each record to its destination fact table by `kind`, + enforce all-or-nothing per POST. Compute each row's + `measurement_id` server-side as part of the INSERT. Return + `{ inserted, updated }` aggregated across tables. +- `GET /api/groups` and `GET /api/chart/:slug`: enough to render + one chart page. Slugs round-trip; the agent picks the format. +- `GET /health`: enough to confirm the DB is open and ingest is + working (path, latest commit timestamp, per-table row counts - + exact shape is the agent's call). +- Mount whatever HTML routes the web-ui component contributes. + +Framework, templating engine (`maud` or `askama`), DuckDB driver +version, module layout, and DB-access concurrency model are the +agent's call. Pin the DuckDB crate version in `Cargo.toml`. + +## Out of scope (deferred) + +Schema migrations, lookup tables, pre-built views, multi-page read +API, admin endpoints, containerization, EBS mount, backups. See +[`../deferred.md`](../deferred.md). + +## Acceptance criteria + +- `cargo build` succeeds for the server crate. +- Integration test: POST a fixture envelope with a valid bearer → + 200; POST again → 200 with `updated > 0, inserted = 0`; POST + with no/wrong bearer → 401; POST with an unknown `kind` → 400. +- `GET /health` returns a coherent shape after an ingest. +- `cargo run` for the server, pointed at a fresh DuckDB file, + serves both read routes locally. + +## Branch + +`claude/benchmarks-v3-server` diff --git a/benchmarks-website/planning/components/web-ui.md b/benchmarks-website/planning/components/web-ui.md new file mode 100644 index 00000000000..abf9e9de4a0 --- /dev/null +++ b/benchmarks-website/planning/components/web-ui.md @@ -0,0 +1,62 @@ + + +# Component: Web UI (alpha) + +## Required reading + +- [`../00-overview.md`](../00-overview.md) +- [`../01-schema.md`](../01-schema.md) +- [`../02-contracts.md`](../02-contracts.md) - the JSON shapes you + render against. + +## Goal + +Get something on screen. **One landing page** that lists groups and +**one chart page** that renders a single chart. SSR HTML + a thin +Chart.js hydration. That's it for alpha. + +This component develops in parallel against a fixture-populated +DuckDB - no dependency on the live ingest path. + +## In scope + +- A fixture: a small DuckDB file (or a builder that produces one + from a JSONL fixture) covering all five fact tables with a + handful of records each. Used for dev and tests. +- Landing page (`GET /`): list of groups with links into chart + pages, derived from `/api/groups`. +- Chart page (`GET /chart/:slug`): one Chart.js line chart, data + embedded inline as a JSON `"}"#; + let out = escape_json_for_script(input); + assert!(!out.contains(", + body: Json, +) -> Result { + let Json(value) = body; + let envelope: Envelope = + serde_json::from_value(value).map_err(|e| IngestError::Malformed(e.to_string()))?; + validate_envelope(&envelope)?; + + let response = db::run_blocking(&state.db, move |conn| apply_envelope(conn, envelope)) + .await + .map_err(|err| match err.downcast::() { + Ok(ingest) => ingest, + Err(other) => IngestError::Internal(other), + })?; + Ok(Json(response)) +} + +fn validate_envelope(env: &Envelope) -> Result<(), IngestError> { + if env.run_meta.schema_version > SCHEMA_VERSION { + return Err(IngestError::SchemaVersionTooNew { + expected: SCHEMA_VERSION, + got: env.run_meta.schema_version, + }); + } + if env.run_meta.schema_version < SCHEMA_VERSION { + return Err(IngestError::Malformed(format!( + "schema_version {} is older than server's {}", + env.run_meta.schema_version, SCHEMA_VERSION + ))); + } + Ok(()) +} + +fn apply_envelope(conn: &mut Connection, env: Envelope) -> Result { + let tx = conn.transaction().context("begin transaction")?; + + upsert_commit(&tx, &env.commit).context("upsert commit")?; + + let mut inserted = 0u64; + let mut updated = 0u64; + for (idx, record) in env.records.iter().enumerate() { + if record.commit_sha() != env.commit.sha { + return Err(IngestError::Record { + index: idx, + message: format!( + "record commit_sha {:?} does not match envelope commit.sha {:?}", + record.commit_sha(), + env.commit.sha, + ), + } + .into()); + } + match apply_record(&tx, record) { + Ok(was_update) => { + if was_update { + updated += 1; + } else { + inserted += 1; + } + } + Err(RecordError::Validation(msg)) => { + return Err(IngestError::Record { + index: idx, + message: msg, + } + .into()); + } + Err(RecordError::Internal(err)) => { + return Err(err.context(format!("applying record at index {idx}"))); + } + } + } + + tx.commit().context("commit transaction")?; + + Ok(IngestResponse { inserted, updated }) +} + +fn upsert_commit(tx: &duckdb::Transaction<'_>, c: &CommitInfo) -> Result<()> { + tx.execute( + r#" + INSERT INTO commits ( + commit_sha, timestamp, message, author_name, author_email, + committer_name, committer_email, tree_sha, url + ) VALUES (?, CAST(? AS TIMESTAMPTZ), ?, ?, ?, ?, ?, ?, ?) + ON CONFLICT (commit_sha) DO UPDATE SET + timestamp = excluded.timestamp, + message = excluded.message, + author_name = excluded.author_name, + author_email = excluded.author_email, + committer_name = excluded.committer_name, + committer_email = excluded.committer_email, + tree_sha = excluded.tree_sha, + url = excluded.url + "#, + params![ + c.sha, + c.timestamp, + c.message, + c.author_name, + c.author_email, + c.committer_name, + c.committer_email, + c.tree_sha, + c.url, + ], + )?; + Ok(()) +} + +/// Per-record error split: validation failures carry a message that the +/// caller turns into an [`IngestError::Record`] with the right index; +/// anything else bubbles up as a 500. +enum RecordError { + Validation(String), + Internal(anyhow::Error), +} + +impl From for RecordError { + fn from(err: anyhow::Error) -> Self { + Self::Internal(err) + } +} + +impl From for RecordError { + fn from(err: duckdb::Error) -> Self { + Self::Internal(err.into()) + } +} + +fn apply_record(tx: &duckdb::Transaction<'_>, record: &Record) -> Result { + match record { + Record::QueryMeasurement(r) => insert_query_measurement(tx, r), + Record::CompressionTime(r) => { + let mid = measurement_id_compression_time(r); + let was_update = exists(tx, "compression_times", mid)?; + tx.execute( + r#" + INSERT INTO compression_times ( + measurement_id, commit_sha, dataset, dataset_variant, + format, op, value_ns, all_runtimes_ns, env_triple + ) VALUES (?, ?, ?, ?, ?, ?, ?, CAST(? AS BIGINT[]), ?) + ON CONFLICT (measurement_id) DO UPDATE SET + commit_sha = excluded.commit_sha, + value_ns = excluded.value_ns, + all_runtimes_ns = excluded.all_runtimes_ns, + env_triple = excluded.env_triple + "#, + params![ + mid, + r.commit_sha, + r.dataset, + r.dataset_variant, + r.format, + r.op, + r.value_ns, + runtimes_literal(&r.all_runtimes_ns), + r.env_triple, + ], + )?; + Ok(was_update) + } + Record::CompressionSize(r) => { + let mid = measurement_id_compression_size(r); + let was_update = exists(tx, "compression_sizes", mid)?; + tx.execute( + r#" + INSERT INTO compression_sizes ( + measurement_id, commit_sha, dataset, dataset_variant, + format, value_bytes + ) VALUES (?, ?, ?, ?, ?, ?) + ON CONFLICT (measurement_id) DO UPDATE SET + commit_sha = excluded.commit_sha, + value_bytes = excluded.value_bytes + "#, + params![ + mid, + r.commit_sha, + r.dataset, + r.dataset_variant, + r.format, + r.value_bytes, + ], + )?; + Ok(was_update) + } + Record::RandomAccessTime(r) => { + let mid = measurement_id_random_access(r); + let was_update = exists(tx, "random_access_times", mid)?; + tx.execute( + r#" + INSERT INTO random_access_times ( + measurement_id, commit_sha, dataset, format, + value_ns, all_runtimes_ns, env_triple + ) VALUES (?, ?, ?, ?, ?, CAST(? AS BIGINT[]), ?) + ON CONFLICT (measurement_id) DO UPDATE SET + commit_sha = excluded.commit_sha, + value_ns = excluded.value_ns, + all_runtimes_ns = excluded.all_runtimes_ns, + env_triple = excluded.env_triple + "#, + params![ + mid, + r.commit_sha, + r.dataset, + r.format, + r.value_ns, + runtimes_literal(&r.all_runtimes_ns), + r.env_triple, + ], + )?; + Ok(was_update) + } + Record::VectorSearchRun(r) => insert_vector_search(tx, r), + } +} + +fn insert_query_measurement( + tx: &duckdb::Transaction<'_>, + r: &QueryMeasurement, +) -> Result { + if !matches!(r.storage.as_str(), "nvme" | "s3") { + return Err(RecordError::Validation(format!( + "storage must be 'nvme' or 's3', got {:?}", + r.storage + ))); + } + if !memory_quartet_consistent(r) { + return Err(RecordError::Validation( + "memory fields must be populated together (all four or none)".into(), + )); + } + let mid = measurement_id_query(r); + let was_update = exists(tx, "query_measurements", mid)?; + tx.execute( + r#" + INSERT INTO query_measurements ( + measurement_id, commit_sha, dataset, dataset_variant, scale_factor, + query_idx, storage, engine, format, + value_ns, all_runtimes_ns, + peak_physical, peak_virtual, physical_delta, virtual_delta, + env_triple + ) VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, CAST(? AS BIGINT[]), ?, ?, ?, ?, ?) + ON CONFLICT (measurement_id) DO UPDATE SET + commit_sha = excluded.commit_sha, + value_ns = excluded.value_ns, + all_runtimes_ns = excluded.all_runtimes_ns, + peak_physical = excluded.peak_physical, + peak_virtual = excluded.peak_virtual, + physical_delta = excluded.physical_delta, + virtual_delta = excluded.virtual_delta, + env_triple = excluded.env_triple + "#, + params![ + mid, + r.commit_sha, + r.dataset, + r.dataset_variant, + r.scale_factor, + r.query_idx, + r.storage, + r.engine, + r.format, + r.value_ns, + runtimes_literal(&r.all_runtimes_ns), + r.peak_physical, + r.peak_virtual, + r.physical_delta, + r.virtual_delta, + r.env_triple, + ], + )?; + Ok(was_update) +} + +fn insert_vector_search( + tx: &duckdb::Transaction<'_>, + r: &VectorSearchRun, +) -> Result { + let mid = measurement_id_vector_search(r); + let was_update = exists(tx, "vector_search_runs", mid)?; + tx.execute( + r#" + INSERT INTO vector_search_runs ( + measurement_id, commit_sha, dataset, layout, flavor, threshold, + value_ns, all_runtimes_ns, matches, rows_scanned, bytes_scanned, + iterations, env_triple + ) VALUES (?, ?, ?, ?, ?, ?, ?, CAST(? AS BIGINT[]), ?, ?, ?, ?, ?) + ON CONFLICT (measurement_id) DO UPDATE SET + commit_sha = excluded.commit_sha, + value_ns = excluded.value_ns, + all_runtimes_ns = excluded.all_runtimes_ns, + matches = excluded.matches, + rows_scanned = excluded.rows_scanned, + bytes_scanned = excluded.bytes_scanned, + iterations = excluded.iterations, + env_triple = excluded.env_triple + "#, + params![ + mid, + r.commit_sha, + r.dataset, + r.layout, + r.flavor, + r.threshold, + r.value_ns, + runtimes_literal(&r.all_runtimes_ns), + r.matches, + r.rows_scanned, + r.bytes_scanned, + r.iterations, + r.env_triple, + ], + )?; + Ok(was_update) +} + +fn exists(tx: &duckdb::Transaction<'_>, table: &str, mid: i64) -> Result { + // Table name is from a closed enum of literals above, never user input. + let sql = format!("SELECT 1 FROM {table} WHERE measurement_id = ? LIMIT 1"); + let mut stmt = tx.prepare(&sql)?; + let exists = stmt.exists(params![mid])?; + Ok(exists) +} + +fn runtimes_literal(values: &[i64]) -> String { + let mut s = String::with_capacity(values.len() * 8 + 2); + s.push('['); + for (i, v) in values.iter().enumerate() { + if i > 0 { + s.push(','); + } + s.push_str(&v.to_string()); + } + s.push(']'); + s +} + +fn memory_quartet_consistent(r: &QueryMeasurement) -> bool { + let any = r.peak_physical.is_some() + || r.peak_virtual.is_some() + || r.physical_delta.is_some() + || r.virtual_delta.is_some(); + let all = r.peak_physical.is_some() + && r.peak_virtual.is_some() + && r.physical_delta.is_some() + && r.virtual_delta.is_some(); + !any || all +} diff --git a/benchmarks-website/server/src/lib.rs b/benchmarks-website/server/src/lib.rs new file mode 100644 index 00000000000..ae0a19b6cb9 --- /dev/null +++ b/benchmarks-website/server/src/lib.rs @@ -0,0 +1,19 @@ +// SPDX-License-Identifier: Apache-2.0 +// SPDX-FileCopyrightText: Copyright the Vortex contributors + +//! Vortex benchmarks website v3 (alpha) server. +//! +//! This crate is a leaf binary that owns a DuckDB file on local disk, +//! accepts authenticated `/api/ingest` POSTs, and serves a small read API +//! plus the HTML pages contributed by the web-ui component. + +pub mod api; +pub mod app; +pub mod auth; +pub mod db; +pub mod error; +pub mod html; +pub mod ingest; +pub mod records; +pub mod schema; +pub mod slug; diff --git a/benchmarks-website/server/src/main.rs b/benchmarks-website/server/src/main.rs new file mode 100644 index 00000000000..93768fdbfca --- /dev/null +++ b/benchmarks-website/server/src/main.rs @@ -0,0 +1,42 @@ +// SPDX-License-Identifier: Apache-2.0 +// SPDX-FileCopyrightText: Copyright the Vortex contributors + +//! Binary entrypoint for the bench.vortex.dev v3 alpha server. + +use std::env; +use std::path::PathBuf; + +use anyhow::Context as _; +use anyhow::Result; +use tracing_subscriber::EnvFilter; + +#[tokio::main] +async fn main() -> Result<()> { + tracing_subscriber::fmt() + .with_env_filter( + EnvFilter::try_from_env("VORTEX_BENCH_LOG").unwrap_or_else(|_| EnvFilter::new("info")), + ) + .init(); + + let db_path: PathBuf = env::var("VORTEX_BENCH_DB") + .unwrap_or_else(|_| "bench.duckdb".to_string()) + .into(); + let bearer_token = + env::var("INGEST_BEARER_TOKEN").context("INGEST_BEARER_TOKEN env var must be set")?; + let bind_addr = env::var("VORTEX_BENCH_BIND").unwrap_or_else(|_| "127.0.0.1:3000".to_string()); + + let state = vortex_bench_server::app::AppState::open(&db_path, bearer_token) + .with_context(|| format!("opening DuckDB at {}", db_path.display()))?; + let app = vortex_bench_server::app::router(state); + + let listener = tokio::net::TcpListener::bind(&bind_addr) + .await + .with_context(|| format!("binding to {bind_addr}"))?; + tracing::info!( + addr = %listener.local_addr()?, + db = %db_path.display(), + "bench server listening" + ); + axum::serve(listener, app).await?; + Ok(()) +} diff --git a/benchmarks-website/server/src/records.rs b/benchmarks-website/server/src/records.rs new file mode 100644 index 00000000000..0217675cdea --- /dev/null +++ b/benchmarks-website/server/src/records.rs @@ -0,0 +1,173 @@ +// SPDX-License-Identifier: Apache-2.0 +// SPDX-FileCopyrightText: Copyright the Vortex contributors + +//! Wire shapes for `POST /api/ingest`. +//! +//! These types deserialize the ingest envelope defined in +//! `benchmarks-website/planning/02-contracts.md`. Each variant of [`Record`] +//! is gated by `#[serde(deny_unknown_fields)]`, so unknown fields produce +//! a 400 with the offending record's index. + +use serde::Deserialize; + +/// One ingest payload. +/// +/// `run_meta` and `commit` are added by the post-ingest script around the +/// JSONL of bare records the Rust emitter writes. +#[derive(Debug, Deserialize)] +#[serde(deny_unknown_fields)] +pub struct Envelope { + pub run_meta: RunMeta, + pub commit: CommitInfo, + pub records: Vec, +} + +/// Run-level metadata. `schema_version` is checked against +/// [`crate::schema::SCHEMA_VERSION`] before any record is processed. +#[derive(Debug, Deserialize)] +#[serde(deny_unknown_fields)] +pub struct RunMeta { + pub benchmark_id: String, + pub schema_version: i32, + pub started_at: String, +} + +/// Columns for the `commits` dim table. The wire field for `commit_sha` is +/// renamed to `sha` per the contract. +#[derive(Debug, Deserialize)] +#[serde(deny_unknown_fields)] +pub struct CommitInfo { + pub sha: String, + pub timestamp: String, + pub message: String, + pub author_name: String, + pub author_email: String, + pub committer_name: String, + pub committer_email: String, + pub tree_sha: String, + pub url: String, +} + +/// A single ingest record, discriminated by `kind`. +#[derive(Debug, Deserialize)] +#[serde(tag = "kind", rename_all = "snake_case")] +pub enum Record { + QueryMeasurement(QueryMeasurement), + CompressionTime(CompressionTime), + CompressionSize(CompressionSize), + RandomAccessTime(RandomAccessTime), + VectorSearchRun(VectorSearchRun), +} + +/// SQL query suite measurement (TPC-H, ClickBench, ...). +#[derive(Debug, Deserialize)] +#[serde(deny_unknown_fields)] +pub struct QueryMeasurement { + pub commit_sha: String, + pub dataset: String, + #[serde(default)] + pub dataset_variant: Option, + #[serde(default)] + pub scale_factor: Option, + pub query_idx: i32, + pub storage: String, + pub engine: String, + pub format: String, + pub value_ns: i64, + pub all_runtimes_ns: Vec, + #[serde(default)] + pub peak_physical: Option, + #[serde(default)] + pub peak_virtual: Option, + #[serde(default)] + pub physical_delta: Option, + #[serde(default)] + pub virtual_delta: Option, + #[serde(default)] + pub env_triple: Option, +} + +/// Encode/decode timing from `compress-bench`. +#[derive(Debug, Deserialize)] +#[serde(deny_unknown_fields)] +pub struct CompressionTime { + pub commit_sha: String, + pub dataset: String, + #[serde(default)] + pub dataset_variant: Option, + pub format: String, + pub op: String, + pub value_ns: i64, + pub all_runtimes_ns: Vec, + #[serde(default)] + pub env_triple: Option, +} + +/// On-disk size from `compress-bench`. One-shot, no per-iteration data. +#[derive(Debug, Deserialize)] +#[serde(deny_unknown_fields)] +pub struct CompressionSize { + pub commit_sha: String, + pub dataset: String, + #[serde(default)] + pub dataset_variant: Option, + pub format: String, + pub value_bytes: i64, +} + +/// Take-time timing from `random-access-bench`. +#[derive(Debug, Deserialize)] +#[serde(deny_unknown_fields)] +pub struct RandomAccessTime { + pub commit_sha: String, + pub dataset: String, + pub format: String, + pub value_ns: i64, + pub all_runtimes_ns: Vec, + #[serde(default)] + pub env_triple: Option, +} + +/// Cosine-similarity scan from `vector-search-bench`. +#[derive(Debug, Deserialize)] +#[serde(deny_unknown_fields)] +pub struct VectorSearchRun { + pub commit_sha: String, + pub dataset: String, + pub layout: String, + pub flavor: String, + pub threshold: f64, + pub value_ns: i64, + pub all_runtimes_ns: Vec, + pub matches: i64, + pub rows_scanned: i64, + pub bytes_scanned: i64, + pub iterations: i32, + #[serde(default)] + pub env_triple: Option, +} + +impl Record { + /// The `commit_sha` referenced by this record. Every record carries one; + /// the server checks the envelope's `commit.sha` matches. + pub fn commit_sha(&self) -> &str { + match self { + Self::QueryMeasurement(r) => &r.commit_sha, + Self::CompressionTime(r) => &r.commit_sha, + Self::CompressionSize(r) => &r.commit_sha, + Self::RandomAccessTime(r) => &r.commit_sha, + Self::VectorSearchRun(r) => &r.commit_sha, + } + } + + /// The wire `kind` string. Useful for logging and error messages. + pub fn kind(&self) -> &'static str { + match self { + Self::QueryMeasurement(_) => "query_measurement", + Self::CompressionTime(_) => "compression_time", + Self::CompressionSize(_) => "compression_size", + Self::RandomAccessTime(_) => "random_access_time", + Self::VectorSearchRun(_) => "vector_search_run", + } + } +} diff --git a/benchmarks-website/server/src/schema.rs b/benchmarks-website/server/src/schema.rs new file mode 100644 index 00000000000..72d456c3043 --- /dev/null +++ b/benchmarks-website/server/src/schema.rs @@ -0,0 +1,93 @@ +// SPDX-License-Identifier: Apache-2.0 +// SPDX-FileCopyrightText: Copyright the Vortex contributors + +//! DuckDB schema DDL applied on server boot. +//! +//! See `benchmarks-website/planning/01-schema.md` for the column contracts. +//! There is no migration framework at alpha: if the schema changes, delete +//! the DuckDB file and restart. + +/// DDL for the `commits` dim plus the five fact tables. +pub const SCHEMA_DDL: &str = r#" +CREATE TABLE IF NOT EXISTS commits ( + commit_sha TEXT PRIMARY KEY NOT NULL, + timestamp TIMESTAMPTZ NOT NULL, + message TEXT NOT NULL, + author_name TEXT NOT NULL, + author_email TEXT NOT NULL, + committer_name TEXT NOT NULL, + committer_email TEXT NOT NULL, + tree_sha TEXT NOT NULL, + url TEXT NOT NULL +); + +CREATE TABLE IF NOT EXISTS query_measurements ( + measurement_id BIGINT PRIMARY KEY NOT NULL, + commit_sha TEXT NOT NULL, + dataset TEXT NOT NULL, + dataset_variant TEXT, + scale_factor TEXT, + query_idx INTEGER NOT NULL, + storage TEXT NOT NULL, + engine TEXT NOT NULL, + format TEXT NOT NULL, + value_ns BIGINT NOT NULL, + all_runtimes_ns BIGINT[] NOT NULL, + peak_physical BIGINT, + peak_virtual BIGINT, + physical_delta BIGINT, + virtual_delta BIGINT, + env_triple TEXT +); + +CREATE TABLE IF NOT EXISTS compression_times ( + measurement_id BIGINT PRIMARY KEY NOT NULL, + commit_sha TEXT NOT NULL, + dataset TEXT NOT NULL, + dataset_variant TEXT, + format TEXT NOT NULL, + op TEXT NOT NULL, + value_ns BIGINT NOT NULL, + all_runtimes_ns BIGINT[] NOT NULL, + env_triple TEXT +); + +CREATE TABLE IF NOT EXISTS compression_sizes ( + measurement_id BIGINT PRIMARY KEY NOT NULL, + commit_sha TEXT NOT NULL, + dataset TEXT NOT NULL, + dataset_variant TEXT, + format TEXT NOT NULL, + value_bytes BIGINT NOT NULL +); + +CREATE TABLE IF NOT EXISTS random_access_times ( + measurement_id BIGINT PRIMARY KEY NOT NULL, + commit_sha TEXT NOT NULL, + dataset TEXT NOT NULL, + format TEXT NOT NULL, + value_ns BIGINT NOT NULL, + all_runtimes_ns BIGINT[] NOT NULL, + env_triple TEXT +); + +CREATE TABLE IF NOT EXISTS vector_search_runs ( + measurement_id BIGINT PRIMARY KEY NOT NULL, + commit_sha TEXT NOT NULL, + dataset TEXT NOT NULL, + layout TEXT NOT NULL, + flavor TEXT NOT NULL, + threshold DOUBLE NOT NULL, + value_ns BIGINT NOT NULL, + all_runtimes_ns BIGINT[] NOT NULL, + matches BIGINT NOT NULL, + rows_scanned BIGINT NOT NULL, + bytes_scanned BIGINT NOT NULL, + iterations INTEGER NOT NULL, + env_triple TEXT +); +"#; + +/// Schema version expected by the server. The ingest envelope's +/// `run_meta.schema_version` must match this exactly at alpha. +pub const SCHEMA_VERSION: i32 = 1; diff --git a/benchmarks-website/server/src/slug.rs b/benchmarks-website/server/src/slug.rs new file mode 100644 index 00000000000..1fc8c3a43c1 --- /dev/null +++ b/benchmarks-website/server/src/slug.rs @@ -0,0 +1,134 @@ +// SPDX-License-Identifier: Apache-2.0 +// SPDX-FileCopyrightText: Copyright the Vortex contributors + +//! Opaque slugs for `/api/chart/:slug`. +//! +//! Per `02-contracts.md`, the web-ui treats slugs as opaque strings: it +//! receives them from `/api/groups` and feeds them back unchanged to +//! `/api/chart/:slug`. The server is free to choose any format. +//! +//! Slugs here are `.` where `` names the +//! source fact table and the JSON encodes the chart key. Round-tripping the +//! slug back gives a strongly-typed [`ChartKey`]. + +use anyhow::Context as _; +use anyhow::Result; +use anyhow::anyhow; +use base64::Engine as _; +use base64::engine::general_purpose::URL_SAFE_NO_PAD; +use serde::Deserialize; +use serde::Serialize; + +const PREFIX_QUERY: &str = "qm"; +const PREFIX_COMPRESSION_TIME: &str = "ct"; +const PREFIX_COMPRESSION_SIZE: &str = "cs"; +const PREFIX_RANDOM_ACCESS: &str = "rat"; +const PREFIX_VECTOR_SEARCH: &str = "vsr"; + +/// The strongly-typed chart key parsed from a slug. +#[derive(Debug, Clone, Serialize, Deserialize, PartialEq)] +#[serde(tag = "k")] +pub enum ChartKey { + /// `query_measurements` chart: `(dataset, query_idx)` per `01-schema.md`. + /// Group context (`dataset_variant`, `scale_factor`, `storage`) is carried + /// alongside so the slug fully specifies the chart. + QueryMeasurement { + dataset: String, + dataset_variant: Option, + scale_factor: Option, + storage: String, + query_idx: i32, + }, + /// `compression_times` chart: `(dataset, dataset_variant)`. + CompressionTime { + dataset: String, + dataset_variant: Option, + }, + /// `compression_sizes` chart: `(dataset, dataset_variant)`. + CompressionSize { + dataset: String, + dataset_variant: Option, + }, + /// `random_access_times` chart: `dataset`. + RandomAccess { dataset: String }, + /// `vector_search_runs` chart: `(dataset, layout, threshold)`. + VectorSearch { + dataset: String, + layout: String, + threshold: f64, + }, +} + +impl ChartKey { + fn prefix(&self) -> &'static str { + match self { + Self::QueryMeasurement { .. } => PREFIX_QUERY, + Self::CompressionTime { .. } => PREFIX_COMPRESSION_TIME, + Self::CompressionSize { .. } => PREFIX_COMPRESSION_SIZE, + Self::RandomAccess { .. } => PREFIX_RANDOM_ACCESS, + Self::VectorSearch { .. } => PREFIX_VECTOR_SEARCH, + } + } + + /// Render the slug for this chart key. + pub fn to_slug(&self) -> String { + let json = serde_json::to_vec(self).expect("ChartKey is always JSON-serializable"); + format!("{}.{}", self.prefix(), URL_SAFE_NO_PAD.encode(json)) + } + + /// Parse a slug previously produced by [`Self::to_slug`]. + pub fn from_slug(slug: &str) -> Result { + let (_, encoded) = slug + .split_once('.') + .ok_or_else(|| anyhow!("slug missing '.' separator"))?; + let json = URL_SAFE_NO_PAD + .decode(encoded.as_bytes()) + .context("slug payload was not valid base64url")?; + let key: Self = serde_json::from_slice(&json).context("slug payload was not valid JSON")?; + Ok(key) + } +} + +#[cfg(test)] +mod tests { + use super::*; + + fn roundtrip(key: ChartKey) { + let slug = key.to_slug(); + let parsed = ChartKey::from_slug(&slug).expect("parses back"); + assert_eq!(parsed, key); + } + + #[test] + fn query_measurement_roundtrips() { + roundtrip(ChartKey::QueryMeasurement { + dataset: "tpch".into(), + dataset_variant: None, + scale_factor: Some("1".into()), + storage: "nvme".into(), + query_idx: 7, + }); + } + + #[test] + fn vector_search_roundtrips() { + roundtrip(ChartKey::VectorSearch { + dataset: "cohere-large-10m".into(), + layout: "partitioned".into(), + threshold: 0.75, + }); + } + + #[test] + fn random_access_roundtrips() { + roundtrip(ChartKey::RandomAccess { + dataset: "taxi".into(), + }); + } + + #[test] + fn malformed_slug_rejected() { + assert!(ChartKey::from_slug("not-a-slug").is_err()); + assert!(ChartKey::from_slug("qm.****").is_err()); + } +} diff --git a/benchmarks-website/server/static/CHART_JS_LICENSE.md b/benchmarks-website/server/static/CHART_JS_LICENSE.md new file mode 100644 index 00000000000..f216610fd7e --- /dev/null +++ b/benchmarks-website/server/static/CHART_JS_LICENSE.md @@ -0,0 +1,9 @@ +The MIT License (MIT) + +Copyright (c) 2014-2024 Chart.js Contributors + +Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. diff --git a/benchmarks-website/server/static/chart-init.js b/benchmarks-website/server/static/chart-init.js new file mode 100644 index 00000000000..5447b54aee2 --- /dev/null +++ b/benchmarks-website/server/static/chart-init.js @@ -0,0 +1,96 @@ +// SPDX-License-Identifier: Apache-2.0 +// SPDX-FileCopyrightText: Copyright the Vortex contributors + +// Hydrate the Chart.js line chart on /chart/:slug. +// +// The server embeds the chart payload as a JSON diff --git a/benchmarks-website/server/tests/snapshots/landing_page.snap b/benchmarks-website/server/tests/snapshots/landing_page.snap new file mode 100644 index 00000000000..34873e5eb89 --- /dev/null +++ b/benchmarks-website/server/tests/snapshots/landing_page.snap @@ -0,0 +1,5 @@ +--- +source: benchmarks-website/server/tests/web_ui.rs +expression: body +--- +bench.vortex.dev

tpch sf=1 [nvme]

Compression

Compression Size

Random Access

cohere-large-10m / partitioned

diff --git a/benchmarks-website/server/tests/web_ui.rs b/benchmarks-website/server/tests/web_ui.rs new file mode 100644 index 00000000000..4bed7a35271 --- /dev/null +++ b/benchmarks-website/server/tests/web_ui.rs @@ -0,0 +1,362 @@ +// SPDX-License-Identifier: Apache-2.0 +// SPDX-FileCopyrightText: Copyright the Vortex contributors + +//! Integration tests for the web-ui HTML routes. +//! +//! Builds a temp DuckDB via the same `/api/ingest` path real callers use, +//! seeds it with a multi-commit fixture so chart series have more than one +//! point, then snapshots the rendered HTML for both routes plus a chart slug +//! round-trip. + +use std::net::SocketAddr; + +use anyhow::Context as _; +use anyhow::Result; +use serde_json::Value; +use serde_json::json; +use tempfile::TempDir; +use tokio::net::TcpListener; +use tokio::task::JoinHandle; +use vortex_bench_server::app::AppState; +use vortex_bench_server::app::router; + +const TOKEN: &str = "test-bearer-token"; + +struct Server { + addr: SocketAddr, + _tmp: TempDir, + handle: JoinHandle<()>, +} + +impl Server { + async fn start() -> Result { + let tmp = TempDir::new()?; + let db_path = tmp.path().join("bench.duckdb"); + let state = AppState::open(&db_path, TOKEN.to_string())?; + let app = router(state); + + let listener = TcpListener::bind("127.0.0.1:0").await?; + let addr = listener.local_addr()?; + let handle = tokio::spawn(async move { + axum::serve(listener, app).await.unwrap(); + }); + Ok(Self { + addr, + _tmp: tmp, + handle, + }) + } + + fn url(&self, path: &str) -> String { + format!("http://{}{}", self.addr, path) + } +} + +impl Drop for Server { + fn drop(&mut self) { + self.handle.abort(); + } +} + +/// Three synthetic commits, oldest first. Picked so the rendered output has +/// short SHAs that are visually distinct in snapshots. +fn commits() -> &'static [(&'static str, &'static str, &'static str)] { + &[ + ( + "1111111111111111111111111111111111111111", + "2026-04-23T12:00:00Z", + "first commit", + ), + ( + "2222222222222222222222222222222222222222", + "2026-04-24T12:00:00Z", + "second commit", + ), + ( + "3333333333333333333333333333333333333333", + "2026-04-25T12:00:00Z", + "third commit", + ), + ] +} + +/// Build a fixture envelope for one commit; `value_bias` is added to each +/// numeric measurement so successive commits produce a non-flat time series. +fn envelope_for(sha: &str, ts: &str, msg: &str, value_bias: i64) -> Value { + json!({ + "run_meta": { + "benchmark_id": "web-ui-fixture", + "schema_version": 1, + "started_at": ts + }, + "commit": { + "sha": sha, + "timestamp": ts, + "message": msg, + "author_name": "Test Author", + "author_email": "author@example.com", + "committer_name": "Test Committer", + "committer_email": "committer@example.com", + "tree_sha": "fedcba9876543210fedcba9876543210fedcba98", + "url": format!("https://github.com/vortex-data/vortex/commit/{sha}") + }, + "records": [ + { + "kind": "query_measurement", + "commit_sha": sha, + "dataset": "tpch", + "scale_factor": "1", + "query_idx": 1, + "storage": "nvme", + "engine": "datafusion", + "format": "vortex-file-compressed", + "value_ns": 1_000_000 + value_bias, + "all_runtimes_ns": [1_000_000 + value_bias] + }, + { + "kind": "query_measurement", + "commit_sha": sha, + "dataset": "tpch", + "scale_factor": "1", + "query_idx": 1, + "storage": "nvme", + "engine": "duckdb", + "format": "parquet", + "value_ns": 800_000 + value_bias, + "all_runtimes_ns": [800_000 + value_bias] + }, + { + "kind": "compression_time", + "commit_sha": sha, + "dataset": "tpch-lineitem", + "format": "vortex-file-compressed", + "op": "encode", + "value_ns": 9_000 + value_bias, + "all_runtimes_ns": [9_000 + value_bias] + }, + { + "kind": "compression_size", + "commit_sha": sha, + "dataset": "tpch-lineitem", + "format": "vortex-file-compressed", + "value_bytes": 4_000 + value_bias + }, + { + "kind": "random_access_time", + "commit_sha": sha, + "dataset": "taxi", + "format": "vortex-file-compressed", + "value_ns": 500 + value_bias, + "all_runtimes_ns": [500 + value_bias] + }, + { + "kind": "vector_search_run", + "commit_sha": sha, + "dataset": "cohere-large-10m", + "layout": "partitioned", + "flavor": "vortex-turboquant", + "threshold": 0.75, + "value_ns": 7_000 + value_bias, + "all_runtimes_ns": [7_000 + value_bias], + "matches": 42, + "rows_scanned": 1_000_000, + "bytes_scanned": 5_000_000, + "iterations": 1 + } + ] + }) +} + +async fn seed(server: &Server) -> Result<()> { + let client = reqwest::Client::new(); + for (i, (sha, ts, msg)) in commits().iter().enumerate() { + let bias = (i as i64) * 50_000; + let resp = client + .post(server.url("/api/ingest")) + .bearer_auth(TOKEN) + .json(&envelope_for(sha, ts, msg, bias)) + .send() + .await?; + anyhow::ensure!( + resp.status().is_success(), + "seed ingest #{i} failed: {}", + resp.status() + ); + } + Ok(()) +} + +fn insta_settings() -> insta::Settings { + let mut s = insta::Settings::clone_current(); + s.set_snapshot_path("snapshots"); + s.set_prepend_module_to_snapshot(false); + s +} + +#[tokio::test] +async fn landing_page_snapshot() -> Result<()> { + let server = Server::start().await?; + seed(&server).await?; + + let client = reqwest::Client::new(); + let resp = client.get(server.url("/")).send().await?; + assert_eq!(resp.status(), 200); + let content_type = resp + .headers() + .get(reqwest::header::CONTENT_TYPE) + .and_then(|v| v.to_str().ok()) + .unwrap_or(""); + assert!( + content_type.starts_with("text/html"), + "expected text/html, got {content_type:?}" + ); + let body = resp.text().await?; + + insta_settings().bind(|| { + insta::assert_snapshot!("landing_page", body); + }); + Ok(()) +} + +#[tokio::test] +async fn chart_page_snapshot() -> Result<()> { + let server = Server::start().await?; + seed(&server).await?; + + let client = reqwest::Client::new(); + // Pick the query_measurements chart: it has two series (engine:format + // combinations) so the snapshot exercises multi-series rendering. + let groups: Value = client + .get(server.url("/api/groups")) + .send() + .await? + .json() + .await?; + let slug = groups["groups"] + .as_array() + .context("groups is array")? + .iter() + .find(|g| { + g["name"] + .as_str() + .map(|s| s.starts_with("tpch")) + .unwrap_or(false) + }) + .and_then(|g| g["charts"].as_array()) + .and_then(|c| c.first()) + .and_then(|c| c["slug"].as_str()) + .context("tpch chart slug")? + .to_string(); + + let resp = client + .get(server.url(&format!("/chart/{slug}"))) + .send() + .await?; + assert_eq!(resp.status(), 200); + let body = resp.text().await?; + assert!( + body.contains(r#"