Skip to content

Commit

Permalink
Strip sql comments before parsing procedure name (#16004)
Browse files Browse the repository at this point in the history
* Strip sql comments before parsing procedure name

* update changelog
  • Loading branch information
jmeunier28 authored Oct 12, 2023
1 parent 5ee75fd commit de09d7c
Show file tree
Hide file tree
Showing 6 changed files with 255 additions and 248 deletions.
1 change: 1 addition & 0 deletions sqlserver/CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@
***Fixed***:

* Properly decode query_hash when statement_text is None ([#15974](https://github.com/DataDog/integrations-core/pull/15974))
* Strip sql comments before parsing procedure name ([#16004](https://github.com/DataDog/integrations-core/pull/16004))

## 15.0.1 / 2023-10-06

Expand Down
6 changes: 3 additions & 3 deletions sqlserver/datadog_checks/sqlserver/activity.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@
from datadog_checks.base.utils.serialization import json
from datadog_checks.base.utils.tracking import tracked_method
from datadog_checks.sqlserver.const import STATIC_INFO_ENGINE_EDITION, STATIC_INFO_VERSION
from datadog_checks.sqlserver.utils import PROC_CHAR_LIMIT, extract_sql_comments, is_statement_proc
from datadog_checks.sqlserver.utils import PROC_CHAR_LIMIT, extract_sql_comments_and_procedure_name

try:
import datadog_agent
Expand Down Expand Up @@ -275,14 +275,14 @@ def _obfuscate_and_sanitize_row(self, row):
statement = obfuscate_sql_with_metadata(row['statement_text'], self.check.obfuscator_options)
procedure_statement = None
# sqlserver doesn't have a boolean data type so convert integer to boolean
row['is_proc'], procedure_name = is_statement_proc(row['text'])
comments, row['is_proc'], procedure_name = extract_sql_comments_and_procedure_name(row['text'])
if row['is_proc'] and 'text' in row:
procedure_statement = obfuscate_sql_with_metadata(row['text'], self.check.obfuscator_options)
obfuscated_statement = statement['query']
metadata = statement['metadata']
row['dd_commands'] = metadata.get('commands', None)
row['dd_tables'] = metadata.get('tables', None)
row['dd_comments'] = extract_sql_comments(row['text'])
row['dd_comments'] = comments
row['query_signature'] = compute_sql_signature(obfuscated_statement)
# procedure_signature is used to link this activity event with
# its related plan events
Expand Down
5 changes: 2 additions & 3 deletions sqlserver/datadog_checks/sqlserver/statements.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,7 @@
)
from datadog_checks.base.utils.serialization import json
from datadog_checks.base.utils.tracking import tracked_method
from datadog_checks.sqlserver.utils import PROC_CHAR_LIMIT, extract_sql_comments, is_statement_proc
from datadog_checks.sqlserver.utils import PROC_CHAR_LIMIT, extract_sql_comments_and_procedure_name

try:
import datadog_agent
Expand Down Expand Up @@ -292,7 +292,7 @@ def _normalize_queries(self, rows):
try:
statement = obfuscate_sql_with_metadata(row['statement_text'], self.check.obfuscator_options)
procedure_statement = None
row['is_proc'], procedure_name = is_statement_proc(row['text'])
comments, row['is_proc'], procedure_name = extract_sql_comments_and_procedure_name(row['text'])
if row['is_proc']:
procedure_statement = obfuscate_sql_with_metadata(row['text'], self.check.obfuscator_options)
except Exception as e:
Expand All @@ -308,7 +308,6 @@ def _normalize_queries(self, rows):
)
continue
obfuscated_statement = statement['query']
comments = extract_sql_comments(row['text'])
row['dd_comments'] = comments
# update 'text' field to be obfuscated stmt
row['text'] = obfuscated_statement
Expand Down
15 changes: 8 additions & 7 deletions sqlserver/datadog_checks/sqlserver/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -73,14 +73,14 @@ def _get_index_for_keyword(text, keyword):
return -1


def extract_sql_comments(text):
def extract_sql_comments_and_procedure_name(text):
if not text:
return []
return [], False, None
in_single_line_comment = False
in_multi_line_comment = False
comment_start = None
result = []

stripped_text = ""
for i in range(len(text)):
if in_multi_line_comment:
if i < len(text) - 1 and text[i : i + 2] == '*/':
Expand All @@ -91,8 +91,7 @@ def extract_sql_comments(text):
elif in_single_line_comment:
if text[i] == '\n':
in_single_line_comment = False
# strip any extra whitespace at the end
# of the single line comment
# strip any extra whitespace at the end of the single line comment
result.append(text[comment_start:i].rstrip())
else:
if i < len(text) - 1 and text[i : i + 2] == '--':
Expand All @@ -101,8 +100,10 @@ def extract_sql_comments(text):
elif i < len(text) - 1 and text[i : i + 2] == '/*':
in_multi_line_comment = True
comment_start = i

return result
else:
stripped_text += text[i]
is_proc, name = is_statement_proc(stripped_text)
return result, is_proc, name


def parse_sqlserver_major_version(version):
Expand Down
234 changes: 0 additions & 234 deletions sqlserver/tests/test_activity.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,6 @@
from datadog_checks.base.utils.db.utils import DBMAsyncJob, default_json_event_encoding
from datadog_checks.sqlserver import SQLServer
from datadog_checks.sqlserver.activity import DM_EXEC_REQUESTS_COLS, _hash_to_hex
from datadog_checks.sqlserver.utils import extract_sql_comments, is_statement_proc

from .common import CHECK_NAME
from .conftest import DEFAULT_TIMEOUT
Expand Down Expand Up @@ -594,239 +593,6 @@ def test_get_estimated_row_size_bytes(dbm_instance, file):
assert abs((actual_size - computed_size) / float(actual_size)) <= 0.10


@pytest.mark.parametrize(
"query,is_proc,expected_name",
[
[
"""\
CREATE PROCEDURE bobProcedure
BEGIN
SELECT name FROM bob
END;
""",
True,
"bobProcedure",
],
[
"""\
CREATE PROC bobProcedure
BEGIN
SELECT name FROM bob
END;
""",
True,
"bobProcedure",
],
[
"""\
create procedure bobProcedureLowercase
begin
select name from bob
end;
""",
True,
"bobProcedureLowercase",
],
[
"""\
/* my sql is very fun */
CREATE PROCEDURE bobProcedure
BEGIN
SELECT name FROM bob
END;
""",
True,
"bobProcedure",
],
[
"""\
CREATE /* this is fun! */ PROC bobProcedure
BEGIN
SELECT name FROM bob
END;
""",
True,
"bobProcedure",
],
[
"""\
-- this is a comment!
CREATE
-- additional comment here!
PROCEDURE bobProcedure
BEGIN
SELECT name FROM bob
END;
""",
True,
"bobProcedure",
],
[
"CREATE TABLE bob_table",
False,
None,
],
[
"Exec procedure",
False,
None,
],
[
"CREATEprocedure",
False,
None,
],
[
"procedure create",
False,
None,
],
],
)
def test_is_statement_procedure(query, is_proc, expected_name):
p, name = is_statement_proc(query)
assert p == is_proc
assert re.match(name, expected_name, re.IGNORECASE) if expected_name else expected_name == name


@pytest.mark.parametrize(
"query,expected_comments",
[
[
None,
[],
],
[
"",
[],
],
[
"/*",
[],
],
[
"--",
[],
],
[
"/*justonecomment*/",
["/*justonecomment*/"],
],
[
"""\
/* a comment */
-- Single comment
""",
["/* a comment */", "-- Single comment"],
],
[
"/*tag=foo*/ SELECT * FROM foo;",
["/*tag=foo*/"],
],
[
"/*tag=foo*/ SELECT * FROM /*other=tag,incomment=yes*/ foo;",
["/*tag=foo*/", "/*other=tag,incomment=yes*/"],
],
[
"/*tag=foo*/ SELECT * FROM /*other=tag,incomment=yes*/ foo /*lastword=yes*/",
["/*tag=foo*/", "/*other=tag,incomment=yes*/", "/*lastword=yes*/"],
],
[
"""\
-- My Comment
CREATE PROCEDURE bobProcedure
BEGIN
SELECT name FROM bob
END;
""",
["-- My Comment"],
],
[
"""\
-- My Comment
CREATE PROCEDURE bobProcedure
-- In the middle
BEGIN
SELECT name FROM bob
END;
""",
["-- My Comment", "-- In the middle"],
],
[
"""\
-- My Comment
CREATE PROCEDURE bobProcedure
-- In the middle
BEGIN
SELECT name FROM bob
END;
-- And at the end
""",
["-- My Comment", "-- In the middle", "-- And at the end"],
],
[
"""\
-- My Comment
CREATE PROCEDURE bobProcedure
-- In the middle
/*mixed with mult-line foo*/
BEGIN
SELECT name FROM bob
END;
-- And at the end
""",
["-- My Comment", "-- In the middle", "/*mixed with mult-line foo*/", "-- And at the end"],
],
[
"""\
/* hello
this is a mult-line-comment
tag=foo,blah=tag
*/
/*
second multi-line
comment
*/
CREATE PROCEDURE bobProcedure
BEGIN
SELECT name FROM bob
END;
-- And at the end
""",
[
"/* hello this is a mult-line-comment tag=foo,blah=tag */",
"/* second multi-line comment */",
"-- And at the end",
],
],
[
"""\
/* hello
this is a mult-line-commet
tag=foo,blah=tag
*/
CREATE PROCEDURE bobProcedure
-- In the middle
/*mixed with mult-line foo*/
BEGIN
SELECT name FROM bob
END;
-- And at the end
""",
[
"/* hello this is a mult-line-commet tag=foo,blah=tag */",
"-- In the middle",
"/*mixed with mult-line foo*/",
"-- And at the end",
],
],
],
)
def test_extract_sql_comments(query, expected_comments):
comments = extract_sql_comments(query)
assert comments == expected_comments


def test_activity_collection_rate_limit(aggregator, dd_run_check, dbm_instance):
# test the activity collection loop rate limit
collection_interval = 0.1
Expand Down
Loading

0 comments on commit de09d7c

Please sign in to comment.