Skip to content

Commit

Permalink
Add download rates view (#181)
Browse files Browse the repository at this point in the history
* Add download_rates view for MSAK dl throughput

* Actually create view

* Fix syntax

* Rename field

* Extra \n

* Rename download_rates to throughput1_download_rates.

* Rewrite throughput1_downloads view

* Update sql filename

* Merge branch 'main' into sandbox-roberto-download-rates-view
  • Loading branch information
robertodauria authored Oct 1, 2024
1 parent eee24ce commit 2cfbc0e
Show file tree
Hide file tree
Showing 2 changed files with 66 additions and 0 deletions.
1 change: 1 addition & 0 deletions views/create_dataset_views.sh
Original file line number Diff line number Diff line change
Expand Up @@ -129,6 +129,7 @@ create_view ${SRC_PROJECT} ${DST_PROJECT} cloudflare ./cloudflare/speedtest_spee

# MSAK
create_view ${SRC_PROJECT} ${DST_PROJECT} msak ./msak/throughput1.sql
create_view ${SRC_PROJECT} ${DST_PROJECT} msak ./msak/throughput1_downloads.sql

# stats-pipeline
create_view ${SRC_PROJECT} ${DST_PROJECT} statistics ./statistics/v0_global_asn.sql
Expand Down
65 changes: 65 additions & 0 deletions views/msak/throughput1_downloads.sql
Original file line number Diff line number Diff line change
@@ -0,0 +1,65 @@
WITH
test_time_range AS (
SELECT raw.MeasurementID as mid, min(raw.StartTime) as test_start_time, min(raw.EndTime) as test_end_time
FROM `{{.ProjectID}}.msak.throughput1`
WHERE
date BETWEEN "2024-01-01" AND CURRENT_DATE
AND raw.Direction = "download"
GROUP BY raw.MeasurementID

-- Ignore tests longer than 60s.
HAVING TIMESTAMP_DIFF(test_end_time, test_start_time, SECOND) <= 60
),

-- Debug subquery - Get all streams' snapshots for a single MeasurementID.
-- stream_snapshots AS (
-- SELECT raw.MeasurementID, raw.UUID, sm.TCPInfo.BytesAcked, sm.ElapsedTime
-- FROM `{{.ProjectID}}.msak.throughput1` msak
-- JOIN UNNEST(raw.ServerMeasurements) sm
-- JOIN test_time_range r ON msak.raw.MeasurementID = r.mid
-- -- Verify that the test's start time + the measurement's elapsed time doesn't exceed end_time.
-- WHERE
-- UNIX_MICROS(msak.raw.StartTime) + sm.ElapsedTime <= UNIX_MICROS(r.test_end_time)
-- AND date BETWEEN "2024-01-01" AND CURRENT_DATE
-- AND msak.raw.MeasurementID = "15614e5d-6e3b-4e60-bf33-9d488dad06b3"
-- AND raw.Direction = "download"
-- ),

stream_bytes_acked AS (
-- Get the last TCPInfo.BytesAcked for snapshots between the first stream that started
-- and the first stream that terminated.
SELECT a.MeasurementId,
a.UUID,
date,
ANY_VALUE(client) as client,
ANY_VALUE(a.CongestionControl) as cc,
r.test_start_time as StartTime,
r.test_end_time as EndTime,
max(sm.ElapsedTime) as elapsed,
max(sm.TCPInfo.BytesAcked) as max_bytes_acked
FROM `{{.ProjectID}}.msak.throughput1` msak
JOIN UNNEST(raw.ServerMeasurements) sm
JOIN test_time_range r ON msak.raw.MeasurementID = r.mid
-- Verify that the test's start time + the measurement's elapsed time doesn't exceed end_time.
WHERE
UNIX_MICROS(msak.raw.StartTime) + sm.ElapsedTime <= UNIX_MICROS(r.test_end_time)
AND date BETWEEN "2024-01-01" AND CURRENT_DATE
AND raw.Direction = "download"
GROUP BY a.MeasurementID, a.UUID, date, r.test_start_time, r.test_end_time
)

-- SELECT * FROM stream_snapshots

SELECT
MeasurementID as id,
date,
STRUCT (
StartTime,
EndTime,
SUM(max_bytes_acked) / MAX(elapsed) * 8 as ThroughputMbps,
COUNT(*) as NumStreams,
ANY_VALUE(cc) as CongestionControl
) as a,
ANY_VALUE(client) as client
FROM stream_bytes_acked
GROUP BY MeasurementID, date, StartTime, EndTime

0 comments on commit 2cfbc0e

Please sign in to comment.