Skip to content

Commit

Permalink
[sc-27082] Do the query text concatenation as CTE (#891)
Browse files Browse the repository at this point in the history
* [sc-27082] Do the query text concatenation as CTE

* bump version

* fix the query
  • Loading branch information
usefulalgorithm authored Jul 4, 2024
1 parent f8364b3 commit d3862cf
Show file tree
Hide file tree
Showing 2 changed files with 25 additions and 46 deletions.
69 changes: 24 additions & 45 deletions metaphor/redshift/access_event.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,12 +5,28 @@
from asyncpg import Connection, Record

REDSHIFT_USAGE_SQL_TEMPLATE = """
WITH queries as (
WITH queries AS (
SELECT
sqt.*,
SUM(LEN(sqt.text)) OVER (PARTITION BY sqt.query) AS length
FROM stl_querytext as sqt
ORDER BY length DESC, sqt.sequence
*,
SUM(LEN(text)) OVER (PARTITION BY query) AS length
FROM
stl_querytext
), filtered AS (
SELECT
q.query,
LISTAGG(
CASE
WHEN LEN(RTRIM(q.text)) = 0 THEN q.text
ELSE RTRIM(q.text)
END,
''
) WITHIN GROUP (ORDER BY q.sequence) AS querytxt
FROM
queries AS q
WHERE
q.length < 65536
GROUP BY
q.query
)
SELECT DISTINCT
ss.userid,
Expand All @@ -19,23 +35,7 @@
ss.rows,
ss.bytes,
ss.tbl,
REPLACE(
REPLACE(
LISTAGG(
CASE
WHEN LEN(RTRIM(q.text)) = 0 THEN q.text
ELSE RTRIM(q.text)
END,
''
)
WITHIN GROUP (ORDER BY q.sequence)
OVER (
PARTITION BY q.userid, q.xid, q.pid, q.query
),
'\r', ''
),
'\\n', ''
) AS querytxt,
q.querytxt,
sti.database,
sti.schema,
sti.table,
Expand All @@ -45,36 +45,15 @@
FROM stl_scan ss
JOIN svv_table_info sti ON ss.tbl = sti.table_id
JOIN stl_query sq ON ss.query = sq.query
JOIN queries q ON ss.query = q.query
JOIN filtered q ON ss.query = q.query
JOIN svl_user_info sui ON sq.userid = sui.usesysid
WHERE ss.starttime >= '{start_time}'
AND ss.starttime < '{end_time}'
AND sq.aborted = 0
AND q.length < 65536
GROUP BY
ss.userid,
ss.query,
sui.usename,
ss.rows,
ss.bytes,
ss.tbl,
sti.database,
sti.schema,
sti.table,
sq.starttime,
sq.endtime,
sq.aborted,
ss.endtime,
q.text,
q.userid,
q.xid,
q.pid,
q.query,
q.sequence
ORDER BY ss.endtime DESC;
"""
"""
The condition `q.length < 65536` is because Redshift's LISTAGG method
The condition `length < 65536` is because Redshift's LISTAGG method
is unable to process the query if it is over 65535 characters long.
See https://docs.aws.amazon.com/redshift/latest/dg/r_WF_LISTAGG.html#r_WF_LISTAGG-data-types
"""
Expand Down
2 changes: 1 addition & 1 deletion pyproject.toml
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
[tool.poetry]
name = "metaphor-connectors"
version = "0.14.26"
version = "0.14.27"
license = "Apache-2.0"
description = "A collection of Python-based 'connectors' that extract metadata from various sources to ingest into the Metaphor app."
authors = ["Metaphor <[email protected]>"]
Expand Down

0 comments on commit d3862cf

Please sign in to comment.