Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

jwt.decode sonar codemod #326

Merged
merged 7 commits into from
Mar 5, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 2 additions & 2 deletions integration_tests/test_jwt_decode_verify.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
from core_codemods.jwt_decode_verify import JwtDecodeVerify
from core_codemods.jwt_decode_verify import JwtDecodeVerify, JwtDecodeVerifyTransformer
from codemodder.codemods.test import (
BaseIntegrationTest,
original_and_expected_from_code_path,
Expand All @@ -24,4 +24,4 @@ class TestJwtDecodeVerify(BaseIntegrationTest):
expected_diff = '--- \n+++ \n@@ -8,7 +8,7 @@\n \n encoded_jwt = jwt.encode(payload, SECRET_KEY, algorithm="HS256")\n \n-decoded_payload = jwt.decode(encoded_jwt, SECRET_KEY, algorithms=["HS256"], verify=False)\n-decoded_payload = jwt.decode(encoded_jwt, SECRET_KEY, algorithms=["HS256"], options={"verify_signature": False})\n+decoded_payload = jwt.decode(encoded_jwt, SECRET_KEY, algorithms=["HS256"], verify=True)\n+decoded_payload = jwt.decode(encoded_jwt, SECRET_KEY, algorithms=["HS256"], options={"verify_signature": True})\n \n var = "something"\n'
expected_line_change = "11"
num_changes = 2
change_description = JwtDecodeVerify.change_description
change_description = JwtDecodeVerifyTransformer.change_description
8 changes: 3 additions & 5 deletions integration_tests/test_sonar_exception_without_raise.py
Original file line number Diff line number Diff line change
@@ -1,15 +1,13 @@
from core_codemods.exception_without_raise import (
ExceptionWithoutRaise,
ExceptionWithoutRaiseTransformer,
)
from core_codemods.exception_without_raise import ExceptionWithoutRaiseTransformer
from core_codemods.sonar.sonar_exception_without_raise import SonarExceptionWithoutRaise
from codemodder.codemods.test import (
BaseIntegrationTest,
original_and_expected_from_code_path,
)


class TestSonarExceptionWithoutRaise(BaseIntegrationTest):
codemod = ExceptionWithoutRaise
codemod = SonarExceptionWithoutRaise
code_path = "tests/samples/exception_without_raise.py"
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I realized this int test was importing the wrong codemod

original_code, expected_new_code = original_and_expected_from_code_path(
code_path,
Expand Down
32 changes: 32 additions & 0 deletions integration_tests/test_sonar_jwt_decode_verify.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,32 @@
from core_codemods.sonar.sonar_jwt_decode_verify import (
SonarJwtDecodeVerify,
JwtDecodeVerifySonarTransformer,
)
from codemodder.codemods.test import (
BaseIntegrationTest,
original_and_expected_from_code_path,
)


class TestJwtDecodeVerify(BaseIntegrationTest):
codemod = SonarJwtDecodeVerify
code_path = "tests/samples/jwt_decode_verify.py"
original_code, expected_new_code = original_and_expected_from_code_path(
code_path,
[
(
10,
"""decoded_payload = jwt.decode(encoded_jwt, SECRET_KEY, algorithms=["HS256"], verify=True)\n""",
),
(
11,
"""decoded_payload = jwt.decode(encoded_jwt, SECRET_KEY, algorithms=["HS256"], options={"verify_signature": True})\n""",
),
],
)
sonar_issues_json = "tests/samples/sonar_issues.json"

expected_diff = '--- \n+++ \n@@ -8,7 +8,7 @@\n \n encoded_jwt = jwt.encode(payload, SECRET_KEY, algorithm="HS256")\n \n-decoded_payload = jwt.decode(encoded_jwt, SECRET_KEY, algorithms=["HS256"], verify=False)\n-decoded_payload = jwt.decode(encoded_jwt, SECRET_KEY, algorithms=["HS256"], options={"verify_signature": False})\n+decoded_payload = jwt.decode(encoded_jwt, SECRET_KEY, algorithms=["HS256"], verify=True)\n+decoded_payload = jwt.decode(encoded_jwt, SECRET_KEY, algorithms=["HS256"], options={"verify_signature": True})\n \n var = "something"\n'
expected_line_change = "11"
num_changes = 2
change_description = JwtDecodeVerifySonarTransformer.change_description
2 changes: 2 additions & 0 deletions src/codemodder/codemods/base_visitor.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,8 @@ def __init__(
def filter_by_result(self, node):
pos_to_match = self.node_position(node)
if self.results is None:
# Returning True here means codemods without detectors (and results)
# will still run their transformations.
return True
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I documented this because it was strange to return True here (do we filter by result or not filter by result?). Then I realized the logic here is that this method is used for all codemods - those with and those without results. We still want to analyze codemods even if there are no results. Maybe later on we can have a filter_by_result that's more relevant. The one I implemented for jwt sonar returns False here, since there should always be results if we want to run the codemod.

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yes I encountered this too. We're using None as a sentinel for codemods that do not have detectors, and we do not want to short-circuit those cases. If you don't mind, could you update the comment to say something along those lines?

return any(result.match_location(pos_to_match, node) for result in self.results)

Expand Down
15 changes: 13 additions & 2 deletions src/codemodder/result.py
Original file line number Diff line number Diff line change
Expand Up @@ -34,19 +34,30 @@ class Result(ABCDataclass):
def match_location(self, pos: CodeRange, node: cst.CSTNode) -> bool:
del node
return any(
pos.start.line == location.start.line
same_line(pos, location)
and (
pos.start.column
in ((start_column := location.start.column) - 1, start_column)
)
and pos.end.line == location.end.line
and (
pos.end.column in ((end_column := location.end.column) - 1, end_column)
)
for location in self.locations
)


def same_line(pos: CodeRange, location: Location) -> bool:
return pos.start.line == location.start.line and pos.end.line == location.end.line


def fuzzy_column_match(pos: CodeRange, location: Location) -> bool:
"""Checks that a result location is within the range of node's `pos` position"""
return (
pos.start.column <= location.start.column <= pos.end.column + 1
and pos.start.column <= location.end.column <= pos.end.column + 1
)


class ResultSet(dict[str, dict[Path, list[Result]]]):
def add_result(self, result: Result):
for loc in result.locations:
Expand Down
5 changes: 5 additions & 0 deletions src/codemodder/scripts/generate_docs.py
Original file line number Diff line number Diff line change
Expand Up @@ -287,6 +287,11 @@ class DocMetadata:
].guidance_explained,
need_sarif="Yes (Sonar)",
),
"jwt-decode-verify-S5659": DocMetadata(
importance=CORE_METADATA["jwt-decode-verify"].importance,
guidance_explained=CORE_METADATA["jwt-decode-verify"].guidance_explained,
need_sarif="Yes (Sonar)",
),
}


Expand Down
2 changes: 2 additions & 0 deletions src/core_codemods/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -63,6 +63,7 @@
from .str_concat_in_seq_literal import StrConcatInSeqLiteral
from .fix_async_task_instantiation import FixAsyncTaskInstantiation
from .django_model_without_dunder_str import DjangoModelWithoutDunderStr
from .sonar.sonar_jwt_decode_verify import SonarJwtDecodeVerify

registry = CodemodCollection(
origin="pixee",
Expand Down Expand Up @@ -134,5 +135,6 @@
SonarRemoveAssertionInPytestRaises,
SonarFlaskJsonResponseType,
SonarDjangoJsonResponseType,
SonarJwtDecodeVerify,
],
)
74 changes: 43 additions & 31 deletions src/core_codemods/jwt_decode_verify.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,41 +5,17 @@
Metadata,
Reference,
ReviewGuidance,
SimpleCodemod,
)
from core_codemods.api.core_codemod import CoreCodemod
from codemodder.codemods.libcst_transformer import (
LibcstTransformerPipeline,
LibcstResultTransformer,
)
from codemodder.codemods.semgrep import SemgrepRuleDetector


class JwtDecodeVerify(SimpleCodemod):
metadata = Metadata(
name="jwt-decode-verify",
summary="Verify JWT Decode",
review_guidance=ReviewGuidance.MERGE_WITHOUT_REVIEW,
references=[
Reference(url="https://pyjwt.readthedocs.io/en/stable/api.html"),
Reference(
url="https://owasp.org/www-project-web-security-testing-guide/latest/4-Web_Application_Security_Testing/06-Session_Management_Testing/10-Testing_JSON_Web_Tokens"
),
],
)
class JwtDecodeVerifyTransformer(LibcstResultTransformer):
change_description = "Enable all verifications in `jwt.decode` call."
detector_pattern = r"""
rules:
- pattern-either:
- patterns:
- pattern: jwt.decode(..., verify=False, ...)
- pattern-inside: |
import jwt
...
- patterns:
- pattern: |
jwt.decode(..., options={..., "$KEY": False, ...}, ...)
- metavariable-regex:
metavariable: $KEY
regex: verify_
- pattern-inside: |
import jwt
...
"""

def _replace_opts_dict(self, opts_dict):
new_dict_elements = []
Expand Down Expand Up @@ -100,3 +76,39 @@ def is_verify_keyword(element: cst.DictElement) -> bool:
matchers.matches(element.key, matchers.SimpleString())
and "verify" in element.key.value
)


JwtDecodeVerify = CoreCodemod(
metadata=Metadata(
name="jwt-decode-verify",
summary="Verify JWT Decode",
review_guidance=ReviewGuidance.MERGE_WITHOUT_REVIEW,
references=[
Reference(url="https://pyjwt.readthedocs.io/en/stable/api.html"),
Reference(
url="https://owasp.org/www-project-web-security-testing-guide/latest/4-Web_Application_Security_Testing/06-Session_Management_Testing/10-Testing_JSON_Web_Tokens"
),
],
),
transformer=LibcstTransformerPipeline(JwtDecodeVerifyTransformer),
detector=SemgrepRuleDetector(
r"""
rules:
- pattern-either:
- patterns:
- pattern: jwt.decode(..., verify=False, ...)
- pattern-inside: |
import jwt
...
- patterns:
- pattern: |
jwt.decode(..., options={..., "$KEY": False, ...}, ...)
- metavariable-regex:
metavariable: $KEY
regex: verify_
- pattern-inside: |
import jwt
...
"""
),
)
42 changes: 42 additions & 0 deletions src/core_codemods/sonar/sonar_jwt_decode_verify.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,42 @@
import libcst as cst
from codemodder.codemods.base_codemod import Reference
from codemodder.result import same_line, fuzzy_column_match
from codemodder.codemods.sonar import SonarCodemod
from codemodder.codemods.libcst_transformer import (
LibcstTransformerPipeline,
)
from core_codemods.jwt_decode_verify import JwtDecodeVerify, JwtDecodeVerifyTransformer


class JwtDecodeVerifySonarTransformer(JwtDecodeVerifyTransformer):
def filter_by_result(self, node) -> bool:
"""
Special case result-matching for this rule because the sonar
results returned have a start/end column for the verify keyword
within the `decode` call, not for the entire call like semgrep returns.
"""
match node:
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This solution makes sense to me and it works for now. In the longer term, I think we should filter the Sonar/SAST results before they ever reach the transformer. This means we would add some logic to each detector that requires it. This would mean implementing some kind of visitor that validates each of the detected locations before passing the results to the transformer. There's a bit of a performance impact in this case since it effectively means another pass on the file, but it only applies to files where there are already results.

I'm saying this not because I think we need this change right now but because I've encountered similar issues with remediating another SAST tool and I think that updating the filtering logic here for each case is going to become cumbersome.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Very interesting! The detector for this particular codemod is semgrep so that's also a little wrinkle.

case cst.Call():
pos_to_match = self.node_position(node)
return any(
self.match_location(pos_to_match, result)
for result in self.results or []
)
return False

def match_location(self, pos, result):
return any(
same_line(pos, location) and fuzzy_column_match(pos, location)
for location in result.locations
)
Comment on lines +29 to +31
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Did you encounter any examples where fuzzy_column_match is needed? From my observations most nodes reported by sonar would match libcst ones with one exception: Tuples. If you look at the match_location from SonarResult you can see the exception treated there. We should discuss how to treat location match at some point.

Copy link
Contributor Author

@clavedeluna clavedeluna Mar 4, 2024

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

You're going to have to give me an example because adding fuzzy_column_match is the entire point of this PR. As I stated in the PR description, sonar returns index for the keyword=value, while we need jwt.decode(...keyword=value). Unit tests have exact sonar results, too. So inspect those as well.

Or perhaps Im' not understanding your comment?

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

In the simplest terms, does anything break if you remove fuzzy_column_match and replace it by exact match?
If so, does it break consistently? (e.g. always with the same type of node)

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yes, there are no location matches without fuzzy matching.



SonarJwtDecodeVerify = SonarCodemod.from_core_codemod(
name="jwt-decode-verify-S5659",
other=JwtDecodeVerify,
rules=["python:S5659"],
new_references=[
Reference(url="https://rules.sonarsource.com/python/RSPEC-5659/"),
],
transformer=LibcstTransformerPipeline(JwtDecodeVerifySonarTransformer),
)
68 changes: 68 additions & 0 deletions tests/codemods/test_sonar_jwt_decode_verify.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,68 @@
import json
from core_codemods.sonar.sonar_jwt_decode_verify import SonarJwtDecodeVerify
from codemodder.codemods.test import BaseSASTCodemodTest


class TestSonarJwtDecodeVerify(BaseSASTCodemodTest):
codemod = SonarJwtDecodeVerify
tool = "sonar"

def test_name(self):
assert self.codemod.name == "jwt-decode-verify-S5659"

def test_simple(self, tmpdir):
input_code = """
import jwt

SECRET_KEY = "mysecretkey"
payload = {
"user_id": 123,
"username": "john",
}

encoded_jwt = jwt.encode(payload, SECRET_KEY, algorithm="HS256")
decoded_payload = jwt.decode(encoded_jwt, SECRET_KEY, algorithms=["HS256"], verify=False)
decoded_payload = jwt.decode(encoded_jwt, SECRET_KEY, algorithms=["HS256"], options={"verify_signature": False})
"""
expected = """
import jwt

SECRET_KEY = "mysecretkey"
payload = {
"user_id": 123,
"username": "john",
}

encoded_jwt = jwt.encode(payload, SECRET_KEY, algorithm="HS256")
decoded_payload = jwt.decode(encoded_jwt, SECRET_KEY, algorithms=["HS256"], verify=True)
decoded_payload = jwt.decode(encoded_jwt, SECRET_KEY, algorithms=["HS256"], options={"verify_signature": True})
"""
issues = {
"issues": [
{
"rule": "python:S5659",
"status": "OPEN",
"component": f"{tmpdir / 'code.py'}",
"textRange": {
"startLine": 11,
"endLine": 11,
"startOffset": 76,
"endOffset": 88,
},
},
{
"rule": "python:S5659",
"status": "OPEN",
"component": f"{tmpdir / 'code.py'}",
"textRange": {
"startLine": 12,
"endLine": 12,
"startOffset": 84,
"endOffset": 111,
},
},
]
}
self.run_and_assert(
tmpdir, input_code, expected, results=json.dumps(issues), num_changes=2
)
Loading