Skip to content

Commit

Permalink
Continued work on parser overhaul
Browse files Browse the repository at this point in the history
  • Loading branch information
AdnaneKhan committed Oct 4, 2024
1 parent 4aa6711 commit 7421fb2
Show file tree
Hide file tree
Showing 15 changed files with 655 additions and 430 deletions.
12 changes: 9 additions & 3 deletions gatox/enumerate/enumerate.py
Original file line number Diff line number Diff line change
Expand Up @@ -263,13 +263,19 @@ def enumerate_organization(self, org: str):
except KeyboardInterrupt:
Output.warn("Keyboard interrupt detected, exiting enumeration!")

self.enumerate_new()

return organization

def enumerate_new(self):
"""Temporarily build new enumeration functionality
alongside the old one and then will cut over.
"""
PwnRequestVisitor.find_pwn_requests(WorkflowGraphBuilder().graph)

Output.info("Resolving actions!")
WorkflowGraphBuilder().initialize_nodes(self.api)
Output.info("Traversing graph!")
PwnRequestVisitor.find_pwn_requests(WorkflowGraphBuilder().graph, self.api)

def enumerate_repo_only(self, repo_name: str, large_enum=False):
"""Enumerate only a single repository. No checks for org-level
Expand Down Expand Up @@ -345,7 +351,7 @@ def enumerate_repos(self, repo_names: list):
repo_wrappers.append(repo_obj)
except KeyboardInterrupt:
Output.warn("Keyboard interrupt detected, exiting enumeration!")



self.enumerate_new()

return repo_wrappers
524 changes: 224 additions & 300 deletions gatox/enumerate/repository.py

Large diffs are not rendered by default.

50 changes: 50 additions & 0 deletions gatox/models/composite.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,50 @@
import yaml
from yaml import CSafeLoader


class Composite:
"""
A class to parse GitHub Action ymls.
"""

def __init__(self, action_yml: str):
"""
Initializes the CompositeParser instance by loading and parsing the provided YAML file.
Args:
action_yml (str): The YAML file to parse.
"""
self.composite = False
self.parsed_yml = None
try:
self.parsed_yml = yaml.load(action_yml.replace("\t", " "), Loader=CSafeLoader)
except (
yaml.parser.ParserError,
yaml.scanner.ScannerError,
yaml.constructor.ConstructorError,
) as parse_error:
self.invalid = True
except ValueError as parse_error:
self.invalid = True
except Exception as parse_error:
print(
"Received an exception while parsing action contents: "
+ str(parse_error)
)
self.invalid = True

if not self.parsed_yml or type(self.parsed_yml) != dict:
self.invalid = True
else:
self.composite = self._check_composite()

def _check_composite(self):
"""
Checks if the parsed YAML file represents a composite GitHub Actions workflow.
Returns:
bool: True if the parsed YAML file represents a composite GitHub
Actions workflow, False otherwise.
"""
if "runs" in self.parsed_yml and "using" in self.parsed_yml["runs"]:
return self.parsed_yml["runs"]["using"] == "composite"
111 changes: 106 additions & 5 deletions gatox/workflow_graph/graph_builder.py
Original file line number Diff line number Diff line change
@@ -1,10 +1,12 @@
import networkx as nx

from gatox.models.workflow import Workflow
from gatox.models.repository import Repository
from gatox.models.composite import Composite
from gatox.workflow_graph.node_factory import NodeFactory
from gatox.workflow_graph.graph.tagged_graph import TaggedGraph
from gatox.workflow_graph.nodes.job import JobNode
from gatox.workflow_graph.nodes.action import ActionNode
from gatox.workflow_graph.nodes.workflow import WorkflowNode
from gatox.caching.cache_manager import CacheManager


class WorkflowGraphBuilder:
Expand Down Expand Up @@ -35,7 +37,7 @@ def add_callee_job(
Adds a reference to a called workflow (reusable workflow)
"""
if not job_def or not job_node:
return
return

callee_node = NodeFactory.create_called_workflow_node(
callee, workflow_wrapper.branch, workflow_wrapper.repo_name
Expand All @@ -45,6 +47,87 @@ def add_callee_job(
self.graph.add_node(callee_node, **callee_node.get_attrs())
self.graph.add_edge(job_node, callee_node, relation="uses")

def initialize_action_node(self, node: ActionNode, api):
"""
Initialize an ActionNode by retrieving and parsing its contents.
Args:
node (ActionNode): The action node to initialize.
api (object): The API client used to retrieve raw action contents.
"""
action_metadata = node.action_info
node.initialized = True

def get_action_contents(repo, path, ref):
"""
Retrieve and cache the action contents.
Args:
repo (str): The repository name.
path (str): The path to the action file.
ref (str): The reference (e.g., branch or tag).
Returns:
str: The contents of the action file.
"""
contents = CacheManager().get_action(repo, path, ref)
if not contents:
contents = api.retrieve_raw_action(repo, path, ref)
if contents:
CacheManager().set_action(repo, path, ref, contents)
return contents

ref = node.caller_ref if action_metadata['local'] else action_metadata['ref']
contents = get_action_contents(action_metadata["repo"], action_metadata["path"], ref)

if not contents:
return False

parsed_action = Composite(contents)

if parsed_action.composite:
steps = parsed_action.parsed_yml["runs"].get("steps", [])

prev_step_node = None
for iter, step in enumerate(steps):

calling_name = parsed_action.parsed_yml.get("name", f"EMPTY")
step_node = NodeFactory.create_step_node(
step,
ref,
action_metadata["repo"],
action_metadata["path"],
calling_name,
iter
)
self.graph.add_node(step_node, **step_node.get_attrs())

# Steps are sequential, so for reachability checks
# the job only "contains" the first step.
if prev_step_node:
self.graph.add_edge(prev_step_node, step_node, relation="next")
prev_step_node = step_node
else:
self.graph.add_edge(node, step_node, relation="contains")

def initialize_callee_node(self, workflow: WorkflowNode, api):
"""Initialize a callee workflow with the workflow yaml
"""
if 'uninitialized' in workflow.get_tags():
slug, ref, path = workflow.get_parts()
callee_wf = CacheManager().get_workflow(slug, f"{path}:{ref}")
if not callee_wf:
callee_wf = api.retrieve_repo_file(
slug, path, ref
)
if callee_wf:
CacheManager().set_workflow(slug, f"{path}:{ref}", callee_wf)

self.graph.remove_tags_from_node(workflow, ['uninitialized'])

self.build_workflow_jobs(callee_wf, workflow)


def build_graph_from_yaml(
self, workflow_wrapper: Workflow, repo_wrapper: Repository
):
Expand All @@ -58,18 +141,24 @@ def build_graph_from_yaml(
if added:
self.graph.add_node(repo, **repo.get_attrs())

workflow = workflow_wrapper.parsed_yml

wf_node = NodeFactory.create_workflow_node(
workflow_wrapper,
workflow_wrapper.branch,
workflow_wrapper.repo_name,
workflow_wrapper.getPath(),
)

if not 'uninitialized' in wf_node.get_tags():
self.graph.remove_tags_from_node(wf_node, 'uninitialized')

self.graph.add_node(wf_node, **wf_node.get_attrs())
self.graph.add_edge(repo, wf_node, relation="contains")

self.build_workflow_jobs(workflow_wrapper, wf_node)

def build_workflow_jobs(self, workflow_wrapper: Workflow, wf_node: WorkflowNode):

workflow = workflow_wrapper.parsed_yml
jobs = workflow.get("jobs", {})

if not jobs:
Expand Down Expand Up @@ -119,6 +208,7 @@ def build_graph_from_yaml(
job_name,
iter,
)

self.graph.add_node(step_node, **step_node.get_attrs())

# Steps are sequential, so for reachability checks
Expand All @@ -143,3 +233,14 @@ def build_graph_from_yaml(
)
self.graph.add_node(action_node, **action_node.get_attrs())
self.graph.add_edge(step_node, action_node, relation="uses")

def initialize_nodes(self, api):
uninit_nodes = self.graph.get_nodes_by_tag(
"uninitialized"
).copy()
for node in uninit_nodes:
if 'ActionNode' in node.get_tags():
self.initialize_action_node(node, api)
self.graph.remove_tags_from_node(node, ['uninitialized'])
elif 'WorkflowNode' in node.get_tags():
self.initialize_callee_node(node, api)
15 changes: 9 additions & 6 deletions gatox/workflow_graph/node_factory.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@
from gatox.workflow_graph.nodes.action import ActionNode
from gatox.models.workflow import Workflow
from gatox.models.repository import Repository

from gatox.models.composite import Composite
from gatox.workflow_parser.utility import parse_github_path


Expand Down Expand Up @@ -73,7 +73,6 @@ def create_workflow_node(workflow_data: Workflow, ref, repo_name, workflow_path)
workflow_node = WorkflowNode(ref, repo_name, workflow_path)
if workflow_node.name in NodeFactory.NODE_CACHE:
NodeFactory.NODE_CACHE[workflow_node.name].initialize(workflow_data)
# Need to flip the tags.

return NodeFactory.NODE_CACHE[workflow_node.name]
else:
Expand Down Expand Up @@ -141,7 +140,11 @@ def create_action_node(action_name, ref, action_path, repo_name, params={}):
Returns:
ActionNode: The created ActionNode instance.
"""
action_node = ActionNode(action_name, ref, action_path, repo_name, params)
NodeFactory.NODE_CACHE[action_node.name] = action_node
return action_node
"""
name = f"{repo_name}:{ref}:{action_path}:{action_name}"
if name in NodeFactory.NODE_CACHE:
return NodeFactory.NODE_CACHE[name]
else:
action_node = ActionNode(action_name, ref, action_path, repo_name, params)
NodeFactory.NODE_CACHE[action_node.name] = action_node
return action_node
50 changes: 48 additions & 2 deletions gatox/workflow_graph/nodes/action.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
from gatox.workflow_graph.nodes.node import Node

from gatox.workflow_parser.utility import decompose_action_ref

class ActionNode(Node):
"""
Expand All @@ -17,6 +18,22 @@ class ActionNode(Node):
type (str): The type of the action.
"""

# Set of actions that we do not need
# to pull down yamls for.
KNOWN_GOOD = set([
"azure/login",
"github/codeql-action/analyze",
"docker/login-action",
"github/codeql-action",
"github/codeql-action/init",
"codecov/codecov-action",
"docker/setup-buildx-action"
])

KNOWN_GATES = set([
"sushichop/action-repository-permission"
])

def __init__(
self, action_name: str, ref: str, action_path: str, repo_name: str, params: dict
):
Expand All @@ -38,8 +55,32 @@ def __init__(
self.is_gate = False
self.metadata = False
self.initialized = False
self.caller_ref = ref
self.type = "UNK"

self.action_info = decompose_action_ref(action_name, repo_name)

if not self.action_info['local']:

if '@' in self.action_info['key']:
initial_path = self.action_info['key'].split('@')[0]
else:
initial_path - self.action_info['key']
# By default, we only check actions if they belong to another
# repo in the same org.
if not self.action_info['key'].startswith(repo_name.split('/')[0]):
self.initialized = True
if self.action_info['key'].startswith('actions/'):
self.initialized = True
if initial_path in self.KNOWN_GOOD:
self.initialized = True

if initial_path in self.KNOWN_GATES:
self.is_gate = True
elif self.action_info['docker']:
# We don't resolve docker actions
self.initialized = True

def __hash__(self):
"""
Return the hash value of the ActionNode instance.
Expand Down Expand Up @@ -76,6 +117,12 @@ def get_tags(self):
if self.is_sink:
tags.add("sink")

if not self.initialized:
tags.add("uninitialized")

if self.is_gate:
tags.add("permission_check")

return tags

def get_attrs(self):
Expand All @@ -89,6 +136,5 @@ def get_attrs(self):
self.__class__.__name__: True,
"type": self.type,
"is_soft_gate": False,
"is_hard_gate": False,
"initialized": self.initialized,
"is_hard_gate": False
}
1 change: 1 addition & 0 deletions gatox/workflow_graph/nodes/job.py
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,7 @@ def __init__(self, job_name: str, ref: str, repo_name: str, workflow_path: str):
# Create a unique ID for this step.
self.name = f"{repo_name}:{ref}:{workflow_path}:{job_name}"
self.params = {}
self.repo_name = repo_name
self.if_condition = None
self.deployments = []
self.self_hosted = False
Expand Down
2 changes: 1 addition & 1 deletion gatox/workflow_graph/nodes/step.py
Original file line number Diff line number Diff line change
Expand Up @@ -76,7 +76,7 @@ def __process_action(self, step_data: str):
if type(ref_param) is not str:
self.is_checkout = False
elif "${{" in ref_param and "base" not in ref_param:
# self.metadata = ref_param
self.metadata = ref_param
self.is_checkout = True
elif (
"github-script" in uses
Expand Down
6 changes: 6 additions & 0 deletions gatox/workflow_graph/nodes/workflow.py
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,12 @@ def __eq__(self, other):
def set_params(self, params):
self.params = params

def get_parts(self):

repo, ref, path = self.name.split(':')

return repo, ref, path

def __get_triggers(self, workflow_data: dict):
"""Retrieve the triggers associated with the Workflow node."""
triggers = workflow_data["on"]
Expand Down
Loading

0 comments on commit 7421fb2

Please sign in to comment.