Skip to content

Commit

Permalink
ci/eval: use duckdb for creating diffs
Browse files Browse the repository at this point in the history
  • Loading branch information
paparodeo committed Dec 9, 2024
1 parent c3c2032 commit d95b9cc
Show file tree
Hide file tree
Showing 7 changed files with 221 additions and 194 deletions.
31 changes: 13 additions & 18 deletions .github/workflows/eval.yml
Original file line number Diff line number Diff line change
Expand Up @@ -153,11 +153,17 @@ jobs:
outputs:
baseRunId: ${{ steps.baseRunId.outputs.baseRunId }}
steps:
- name: Download the list of all attributes
uses: actions/download-artifact@fa0a91b85d4f404e444e00e005971372dc801d16 # v4.1.8
with:
name: paths
path: paths

- name: Download output paths and eval stats for all systems
uses: actions/download-artifact@fa0a91b85d4f404e444e00e005971372dc801d16 # v4.1.8
with:
pattern: intermediate-*
path: intermediate
path: afterIntermediate

- name: Check out the PR at the test merge commit
uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2
Expand All @@ -168,18 +174,6 @@ jobs:
- name: Install Nix
uses: cachix/install-nix-action@08dcb3a5e62fa31e2da3d490afc4176ef55ecd72 # v30

- name: Combine all output paths and eval stats
run: |
nix-build nixpkgs/ci -A eval.combine \
--arg resultsDir ./intermediate \
-o prResult
- name: Upload the combined results
uses: actions/upload-artifact@b4b15b8c7c6ac21ea08fcf65892d2ee8f75cf882 # v4.4.3
with:
name: result
path: prResult/*

- name: Get base run id
if: needs.attrs.outputs.baseSha
id: baseRunId
Expand Down Expand Up @@ -216,17 +210,18 @@ jobs:
- uses: actions/download-artifact@v4
if: steps.baseRunId.outputs.baseRunId
with:
name: result
path: baseResult
pattern: intermediate-*
path: beforeIntermediate
github-token: ${{ github.token }}
run-id: ${{ steps.baseRunId.outputs.baseRunId }}

- name: Compare against the base branch
if: steps.baseRunId.outputs.baseRunId
run: |
nix-build nixpkgs/ci -A eval.compare \
--arg beforeResultDir ./baseResult \
--arg afterResultDir ./prResult \
--arg beforeResultDir ./beforeIntermediate \
--arg afterResultDir ./afterIntermediate \
--arg systemsFile ./paths/systems.json \
-o comparison
cat comparison/step-summary.md >> "$GITHUB_STEP_SUMMARY"
# TODO: Request reviews from maintainers for packages whose files are modified in the PR
Expand Down Expand Up @@ -262,7 +257,7 @@ jobs:
| sort > before
# And the labels that should be there
jq -r '.labels[]' comparison/changed-paths.json \
jq -r '.[].label' < comparison/labels.json \
| sort > after
# Remove the ones not needed anymore
Expand Down
164 changes: 0 additions & 164 deletions ci/eval/compare.jq

This file was deleted.

112 changes: 112 additions & 0 deletions ci/eval/compare.sql
Original file line number Diff line number Diff line change
@@ -0,0 +1,112 @@
-- can create table from intermeidate outputs rather than needing jq to preprocess outpaths.json
CREATE MACRO raw(dir) AS TABLE
SELECT
cols[1] AS cmbAttr,
string_split(cols[-1], ';').list_sort() AS paths
FROM (
SELECT c.regexp_split_to_array(' *') AS cols
FROM read_csv(dir || '/*/paths', header = FALSE, sep = '') AS t(c)
);

CREATE TABLE before AS from raw('data/before');
CREATE TABLE after AS from raw('data/after');

/*
* expected file format: [ "aarch64-linux","aarch64-darwin", ... ]
*/
CREATE VIEW systems AS FROM 'data/systems.json' as t(target);

CREATE MACRO targetToOS(target) AS target.string_split('-')[-1];
CREATE MACRO targetToMachine(target) AS target.string_split('-')[1];
CREATE MACRO outDefault(paths) AS apply(paths, p -> if(p[1] = '/', 'out=' || p, p));

/*
* Build the `changed` table:
* ┌─────────┬──────────────────────┬─────────┬─────────┬──────────────────────┐
* │ action │ attr │ machine │ os │ paths │
* ├─────────┼──────────────────────┼─────────┼─────────┼──────────────────────┤
* │ change │ tambura │ x86_64 │ linux │ {out=/nix/store/rq… │
* │ change │ tests.cc-wrapper.l… │ x86_64 │ darwin │ {out=/nix/store/aa… │
* │ add │ cargo-llvm-cov │ aarch64 │ linux │ {out=/nix/store/ri… │
* │ remove │ emscripten │ aarch64 │ darwin │ {out=/nix/store/mb… │
* └───────────────────────────────────────────────────────────────────────────┘
*/
CREATE TABLE changed AS
-- find paths that only exist in `after` table
WITH newPaths AS (
SELECT
after.*
FROM after
LEFT JOIN before USING(paths)
WHERE before.paths IS NULL
),
-- mark which paths were added or changed and append the removed attrs
combined AS (
SELECT
newPaths.*,
if(before.cmbAttr IS NULL, 'add', 'change') AS action
FROM newPaths
LEFT JOIN before USING(cmbAttr)
UNION ALL
SELECT
before.*,
'remove' AS action
FROM before
LEFT JOIN after USING(cmbAttr)
WHERE after.cmbAttr IS NULL
)
-- split attr.target into columns `attr`, `machine`, `os`
-- prepare paths for json output: [ { "out" : "/nix/store/.. }, { ... } ]
-- filter out targets not in the system table
SELECT
action,
cmbAttr.regexp_replace('\.[^.]*$', '') AS attr,
target.targetToMachine() AS machine,
target.targetToOS() AS os,
map(paths.apply(c -> c[1]), paths.apply(c -> c[-1])) AS paths,
FROM (
SELECT
* EXCLUDE(paths),
cmbAttr.string_split('.')[-1] AS target,
paths.outDefault().apply(c -> c.string_split('=')) AS paths
FROM combined
) AS t
WHERE EXISTS (FROM systems WHERE systems.target = t.target);

/*
* Build the `rebuilds` table by counting added / changed paths. Attributes can
* have a many to one mapping to a path thus there are likely more changed
* attributes than paths.
* ┌────────┬─────────┬─────────┐
* │ builds │ machine │ os │
* ├────────┼─────────┼─────────┤
* │ 64 │ aarch64 │ linux │
* │ 44 │ x86_64 │ darwin │
* │ 44 │ aarch64 │ darwin │
* │ 64 │ x86_64 │ linux │
* └────────┴─────────┴─────────┘
*/
CREATE TABLE rebuilds AS
SELECT * REPLACE(max(builds) AS builds) -- filter out zero entries when builds > 0
FROM (
-- collect rebuilds for each system ignoring attributes mapping to same paths
SELECT
count(DISTINCT paths) AS builds,
machine,
os,
FROM changed
WHERE action != 'remove'
GROUP BY machine, os
UNION ALL
-- add zero builds for each system. Needed to prevent empty rows when the
-- rebuild count is zero.
SELECT
0 AS builds,
target.targetToMachine() AS machine,
target.targetToOS() AS os,
FROM systems
)
GROUP BY os, machine;

COPY (FROM rebuilds ORDER BY os, machine) TO 'rebuilds.json' (ARRAY);
COPY (FROM changed ORDER BY attr, machine, os) TO 'changed.json' (ARRAY);
Loading

0 comments on commit d95b9cc

Please sign in to comment.