-
Notifications
You must be signed in to change notification settings - Fork 2
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Add support for static deployments of autojoin data JOINs and UNIONs (#…
…184) * Separate script from library functions * Add support for joining v2 autonode datatypes * Deploy autonode v2 datatypes * Use project parameter for views * Add TODO to remove autojoin view deployments
- Loading branch information
1 parent
ad38690
commit 846d6b4
Showing
6 changed files
with
175 additions
and
49 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,53 @@ | ||
-- | ||
-- ndt7_joined - joins the raw ndt7 and annotation2 autoloaded datasets with standard columns. | ||
-- | ||
WITH prendt7 AS ( | ||
SELECT | ||
raw.Download IS NOT NULL AS isDownload, | ||
raw.Upload IS NOT NULL AS isUpload, | ||
ARRAY_REVERSE(raw.Download.ServerMeasurements)[SAFE_OFFSET(0)].BBRInfo IS NOT NULL AS isBBR, | ||
ARRAY_REVERSE(raw.Download.ServerMeasurements)[SAFE_OFFSET(0)].TCPInfo.BytesAcked AS downloadBytesAcked, | ||
ARRAY_REVERSE(raw.Download.ServerMeasurements)[SAFE_OFFSET(0)].TCPInfo.ElapsedTime AS downloadElapsedTime, | ||
ARRAY_REVERSE(raw.Download.ServerMeasurements)[SAFE_OFFSET(0)].TCPInfo.MinRTT AS downloadMinRTT, | ||
ARRAY_REVERSE(raw.Download.ServerMeasurements)[SAFE_OFFSET(0)].TCPInfo.BytesRetrans AS downloadBytesRetrans, | ||
ARRAY_REVERSE(raw.Download.ServerMeasurements)[SAFE_OFFSET(0)].TCPInfo.BytesSent AS downloadBytesSent, | ||
ARRAY_REVERSE(raw.Upload.ServerMeasurements)[SAFE_OFFSET(0)].TCPInfo.BytesReceived AS uploadBytesReceived, | ||
ARRAY_REVERSE(raw.Upload.ServerMeasurements)[SAFE_OFFSET(0)].TCPInfo.ElapsedTime AS uploadElapsedTime, | ||
ARRAY_REVERSE(raw.Upload.ServerMeasurements)[SAFE_OFFSET(0)].TCPInfo.MinRTT AS uploadMinRTT, | ||
* | ||
FROM `{{.ProjectID}}.autoload_v2_{{ORG}}_ndt.ndt7_raw` | ||
|
||
), ndt7 AS ( | ||
SELECT | ||
-- Pick the download or upload UUID per row. | ||
IF(isDownload, raw.Download.UUID, IF(isUpload, raw.Upload.UUID, NULL)) AS id, | ||
-- Construct the summary 'a' record for compatibility with standard columns. | ||
STRUCT ( | ||
IF(isDownload, raw.Download.UUID, IF(isUpload, raw.Upload.UUID, NULL)) AS UUID, | ||
IF(isDownload, raw.Download.StartTime, IF(isUpload, raw.Upload.StartTime, NULL)) AS TestTime, | ||
IF(isBBR, "bbr", "unknown") AS CongestionControl, | ||
8 * IF(isDownload, downloadBytesAcked / downloadElapsedTime, | ||
IF(isUpload, uploadBytesReceived / uploadElapsedTime, NULL)) AS MeanThroughputMbps, | ||
IF(isDownload, downloadMinRTT, IF(isUpload, uploadMinRTT, NULL)) / 1000 AS MinRTT, -- unit: ms | ||
IF(isDownload, downloadBytesRetrans / downloadBytesSent, IF(isUpload, 0, NULL)) AS LossRate | ||
) AS a, | ||
* | ||
FROM prendt7 | ||
), ann2 AS ( | ||
SELECT raw.UUID AS id, * | ||
FROM `{{.ProjectID}}.autoload_v2_{{ORG}}_ndt.annotation2_raw` | ||
) | ||
|
||
SELECT | ||
-- Standard column order. | ||
ndt7.id, | ||
ndt7.date, | ||
ndt7.archiver, | ||
ann2.raw.server, | ||
ann2.raw.client, | ||
ndt7.a, | ||
ndt7.raw | ||
FROM ndt7 LEFT JOIN ann2 | ||
ON ndt7.id = ann2.id AND ndt7.date = ann2.date | ||
WHERE ndt7.id IS NOT NULL | ||
|
Empty file.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,47 @@ | ||
#!/bin/bash | ||
# | ||
# create_autojoin_dataset_views.sh creates all datasets and views for the | ||
# autojoin pipeline. Like create_dataset_views.sh, every directory is a dataset | ||
# name and every sql file within the dataset subdirectory should be a view query | ||
# template. | ||
# | ||
# Example usage: | ||
# | ||
# ./create_autojoin_dataset_views.sh "self" mlab-sandbox | ||
# ./create_autojoin_dataset_views.sh "self" mlab-oti | ||
|
||
set -eu | ||
USAGE="$0 <key-name> <source-project>" | ||
KEYNAME=${1:?Please provide a key name to authorize operations or "self"} | ||
SRC_PROJECT=${2:?Please provide source project: $USAGE} | ||
|
||
# Setup environment. | ||
BASEDIR=$( realpath $( dirname "${BASH_SOURCE[0]}" ) ) | ||
cd ${BASEDIR} | ||
|
||
# Initialize library. | ||
source ${BASEDIR}/create_view_lib.sh | ||
create_view_init | ||
|
||
echo "Creating autojoin views" | ||
# TODO(soltesz): eliminate this in favor of automation within the autoloader. | ||
# Get list of orgs with ndt autoloaded data. | ||
datasets=$( bq ls --project_id ${SRC_PROJECT} | grep autoload | grep _ndt | grep -v autoload_v2_ndt ) | ||
echo '-- Generated query' > ./autoload_v2_ndt/ndt7_union.sql | ||
for ds in $datasets ; do | ||
org=$( echo $ds | tr '_' ' ' | awk '{print $3}' ) | ||
create_org_joined_view ${SRC_PROJECT} ${org} | ||
if grep -q SELECT ./autoload_v2_ndt/ndt7_union.sql ; then | ||
# If there is already a SELECT statement in the union, append a "UNION ALL" before the next. | ||
echo 'UNION ALL' >> ./autoload_v2_ndt/ndt7_union.sql | ||
fi | ||
echo 'SELECT * FROM `{{.ProjectID}}.'$ds'.ndt7_joined`' >> ./autoload_v2_ndt/ndt7_union.sql | ||
done | ||
|
||
# Only deploy view if it contains at least one SELECT. | ||
if grep -q SELECT ./autoload_v2_ndt/ndt7_union.sql ; then | ||
# NOTE: Must create "ndt7_union" last because it references the views above. | ||
create_view ${SRC_PROJECT} ${SRC_PROJECT} autoload_v2_ndt ./autoload_v2_ndt/ndt7_union.sql | ||
fi | ||
|
||
echo "All views created successfully" |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,61 @@ | ||
# Library for creating dataset views. | ||
|
||
function create_view_init() { | ||
# Git info is nominally exported from the caller | ||
if [ -z "${TAG_NAME-}" -o -z "${COMMIT_SHA-}" ]; then | ||
echo "Not Git" | ||
export TAG_NAME="manual" | ||
export COMMIT_SHA="undefined" | ||
fi | ||
|
||
if [[ "${KEYNAME}" != "self" ]] ; then | ||
echo "${!KEYNAME}" > /tmp/sa.json | ||
export GOOGLE_APPLICATION_CREDENTIALS=/tmp/sa.json | ||
# Guarantee that `gcloud config get-value accounnt` works as intended. | ||
gcloud auth activate-service-account --key-file /tmp/sa.json | ||
fi | ||
# Extract service account user name. | ||
USER=$( gcloud config get-value account ) | ||
|
||
BQ_CREATE_VIEW=bq_create_view | ||
if [[ -x ./bq_create_view ]] ; then | ||
BQ_CREATE_VIEW=./bq_create_view | ||
fi | ||
} | ||
|
||
function create_view() { | ||
local src_project=$1 | ||
local dst_project=$2 | ||
local dataset=$3 | ||
local template=$4 | ||
|
||
description=$( | ||
awk '/^--/ {print substr($0, 3)} /^SELECT/ {exit(0)}' ${template} ) | ||
description+=$'\n'$'\n'"Release tag: $TAG_NAME Commit: $COMMIT_SHA" | ||
description+=$'\n'"View of data from '${src_project}'." | ||
description+=$'\n'"Using: github.com/m-lab/..${template}" | ||
description+=$'\n'"On :"`date` | ||
|
||
# Strip filename down to view name. | ||
# Note that _nofilter views are generated with .SQL~ suffix to prevent checkin | ||
view="${template%%.sql}" | ||
view="${view%%.SQL~}" | ||
view="${view##*/}" | ||
|
||
echo "Creating "${dst_project}.${dataset}.${view}" using "${template} | ||
|
||
${BQ_CREATE_VIEW} \ | ||
-src-project "${src_project}" \ | ||
-create-view "${dst_project}.${dataset}.${view}" \ | ||
-template "${template}" \ | ||
-description "${description}" \ | ||
-editor "${USER}" | ||
} | ||
|
||
function create_org_joined_view() { | ||
local project=$1 | ||
local org=$2 | ||
mkdir -p autoload_v2_${org}_ndt | ||
sed -e 's/{{ORG}}/'${org}'/g' autoload_v2_ndt/ndt7_joined.template.sql > autoload_v2_${org}_ndt/ndt7_joined.sql | ||
create_view ${project} ${project} autoload_v2_${org}_ndt ./autoload_v2_${org}_ndt/ndt7_joined.sql | ||
} |