Skip to content

Commit

Permalink
Continued refactoring.
Browse files Browse the repository at this point in the history
  • Loading branch information
AdnaneKhan committed May 9, 2024
1 parent 62ef002 commit 5bce2cd
Show file tree
Hide file tree
Showing 6 changed files with 96 additions and 75 deletions.
2 changes: 0 additions & 2 deletions gato/cli/output.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,8 +4,6 @@
from gato.cli import (RED_DASH, GREEN_PLUS, GREEN_EXCLAIM, RED_EXCLAIM,
BRIGHT_DASH, YELLOW_EXCLAIM, YELLOW_DASH)



from colorama import Style, Fore


Expand Down
7 changes: 5 additions & 2 deletions gato/enumerate/enumerate.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@
from gato.enumerate.repository import RepositoryEnum
from gato.enumerate.organization import OrganizationEnum
from gato.enumerate.recommender import Recommender
from gato.enumerate.ingest.ingest import DataIngestor
from gato.caching import CacheManager

logger = logging.getLogger(__name__)
Expand Down Expand Up @@ -184,7 +185,7 @@ def enumerate_organization(self, org: str):
result = self.org_e.api.call_post('/graphql', wf_query)
# Sometimes we don't get a 200, fall back in this case.
if result.status_code == 200:
self.repo_e.construct_workflow_cache(result.json()['data']['nodes'])
DataIngestor.construct_workflow_cache(result.json()['data']['nodes'])
else:
Output.warn("GraphQL query failed, will revert to REST workflow query for impacted repositories!")
try:
Expand All @@ -196,6 +197,7 @@ def enumerate_organization(self, org: str):
Output.tabbed(
f"Enumerating: {Output.bright(repo.name)}!"
)

self.repo_e.enumerate_repository(repo, large_org_enum=len(enum_list) > 25)
self.repo_e.enumerate_repository_secrets(repo)

Expand Down Expand Up @@ -240,6 +242,7 @@ def enumerate_repo_only(self, repo_name: str, large_enum=False):
Output.tabbed(
f"Enumerating: {Output.bright(repo.name)}!"
)


self.repo_e.enumerate_repository(repo, large_org_enum=large_enum)
self.repo_e.enumerate_repository_secrets(repo)
Expand Down Expand Up @@ -282,7 +285,7 @@ def enumerate_repos(self, repo_names: list):
for i in range (0, 3):
result = self.repo_e.api.call_post('/graphql', wf_query)
if result.status_code == 200:
self.repo_e.construct_workflow_cache(result.json()['data'].values())
DataIngestor.construct_workflow_cache(result.json()['data'].values())
break
else:
Output.warn(f"GraphQL query failed with {result.status_code} on attempt {str(i+1)}, will try again!")
Expand Down
1 change: 1 addition & 0 deletions gato/enumerate/ingest/__init.__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
from .ingest import DataIngestor
72 changes: 72 additions & 0 deletions gato/enumerate/ingest/ingest.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,72 @@
from gato.caching.cache_manager import CacheManager
from gato.models import Workflow, Repository

class DataIngestor:

def __init__(self, queue):
"""
Args:
queue (queue): Queue to use for processing data.
"""
self.queue = queue


@staticmethod
def construct_workflow_cache(yml_results):
"""Creates a cache of workflow yml files retrieved from graphQL. Since
graphql and REST do not have parity, we still need to use rest for most
enumeration calls. This method saves off all yml files, so during org
level enumeration if we perform yml enumeration the cached file is used
instead of making github REST requests.
Args:
yml_results (list): List of results from individual GraphQL queries
(100 nodes at a time).
"""

cache = CacheManager()
for result in yml_results:
# If we get any malformed/missing data just skip it and
# Gato will fall back to the contents API for these few cases.
if not result:
continue

if 'nameWithOwner' not in result:
continue

owner = result['nameWithOwner']
cache.set_empty(owner)
# Empty means no yamls, so just skip.
if not result['object']:
continue

for yml_node in result['object']['entries']:
yml_name = yml_node['name']
if yml_name.lower().endswith('yml') or yml_name.lower().endswith('yaml'):
contents = yml_node['object']['text']
wf_wrapper = Workflow(owner, contents, yml_name)

cache.set_workflow(owner, yml_name, wf_wrapper)
repo_data = {
'full_name': result['nameWithOwner'],
'html_url': result['url'],
'visibility': 'private' if result['isPrivate'] else 'public',
'default_branch': result['defaultBranchRef']['name'],
'fork': result['isFork'],
'stargazers_count': result['stargazers']['totalCount'],
'pushed_at': result['pushedAt'],
'permissions': {
'pull': result['viewerPermission'] == 'READ' or \
result['viewerPermission'] == 'TRIAGE' or \
result['viewerPermission'] == 'WRITE' or \
result['viewerPermission'] == 'ADMIN',
'push': result['viewerPermission'] == 'WRITE' or \
result['viewerPermission'] == 'ADMIN',
'admin': result['viewerPermission'] == 'ADMIN'
},
'archived': result['isArchived'],
'isFork': False
}

repo_wrapper = Repository(repo_data)
cache.set_repository(repo_wrapper)
88 changes: 18 additions & 70 deletions gato/enumerate/repository.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,6 @@
from gato.caching import CacheManager
from gato.notifications import send_slack_webhook


logger = logging.getLogger(__name__)

class RepositoryEnum():
Expand Down Expand Up @@ -182,20 +181,27 @@ def __perform_yml_enumeration(self, repository: Repository):

# Checks any local workflows referenced by this
self.__check_callees(parsed_yml, repository, rules)

if wf_injection and not skip_checks and not rules:
injection_package = self.__create_info_package(parsed_yml.wf_name,workflow_url, wf_injection, rules)
commit_date, author = self.api.get_file_last_updated(repository.name, ".github/workflows/" + parsed_yml.wf_name)

# We first check the result from GQL, if the last push was within 24 hours, then we check if the last
# push impacted the specific workflow.
if self.__is_within_last_day(repository.repo_data['pushed_at']):
commit_date, author = self.api.get_file_last_updated(repository.name, ".github/workflows/" + parsed_yml.wf_name)
if self.__is_within_last_day(commit_date) and '[bot]' not in author:
send_slack_webhook(injection_package)
repository.set_injection(injection_package)
if self.is_within_last_day(commit_date) and '[bot]' not in author:
send_slack_webhook(injection_package)
if pwn_reqs and not skip_checks:

if pwn_reqs and not skip_checks:
pwn_request_package = self.__create_info_package(parsed_yml.wf_name,workflow_url, pwn_reqs, rules)
commit_date, author = self.api.get_file_last_updated(repository.name, ".github/workflows/" + parsed_yml.wf_name)

if self.is_within_last_day(commit_date) and '[bot]' not in author:
send_slack_webhook(pwn_request_package)

# We first check the result from GQL, if the last push was within 24 hours, then we check if the last
# push impacted the specific workflow.
if self.__is_within_last_day(repository.repo_data['pushed_at']):
commit_date, author = self.api.get_file_last_updated(repository.name, ".github/workflows/" + parsed_yml.wf_name)
if self.__is_within_last_day(commit_date) and '[bot]' not in author:
send_slack_webhook(pwn_request_package)
repository.set_pwn_request(pwn_request_package)

if self_hosted_jobs:
Expand All @@ -216,14 +222,14 @@ def __perform_yml_enumeration(self, repository: Repository):

return runner_wfs

def is_within_last_day(self, timestamp_str, format='%Y-%m-%dT%H:%M:%SZ'):
def __is_within_last_day(self, timestamp_str, format='%Y-%m-%dT%H:%M:%SZ'):
# Convert the timestamp string to a datetime object
date = datetime.strptime(timestamp_str, format)

# Get the current date and time
now = datetime.now()

# Calculate the date 3 days ago
# Calculate the date 1 days ago
seven_days_ago = now - timedelta(days=1)

# Return True if the date is within the last day, False otherwise
Expand Down Expand Up @@ -312,61 +318,3 @@ def enumerate_repository_secrets(

if org_secrets:
repository.set_accessible_org_secrets(org_secrets)

def construct_workflow_cache(self, yml_results):
"""Creates a cache of workflow yml files retrieved from graphQL. Since
graphql and REST do not have parity, we still need to use rest for most
enumeration calls. This method saves off all yml files, so during org
level enumeration if we perform yml enumeration the cached file is used
instead of making github REST requests.
Args:
yml_results (list): List of results from individual GraphQL queries
(100 nodes at a time).
"""

cache = CacheManager()
for result in yml_results:
# If we get any malformed/missing data just skip it and
# Gato will fall back to the contents API for these few cases.
if not result:
continue

if 'nameWithOwner' not in result:
continue

owner = result['nameWithOwner']
cache.set_empty(owner)
# Empty means no yamls, so just skip.
if not result['object']:
continue

for yml_node in result['object']['entries']:
yml_name = yml_node['name']
if yml_name.lower().endswith('yml') or yml_name.lower().endswith('yaml'):
contents = yml_node['object']['text']
wf_wrapper = Workflow(owner, contents, yml_name)

cache.set_workflow(owner, yml_name, wf_wrapper)
repo_data = {
'full_name': result['nameWithOwner'],
'html_url': result['url'],
'visibility': 'private' if result['isPrivate'] else 'public',
'default_branch': result['defaultBranchRef']['name'],
'fork': result['isFork'],
'stargazers_count': result['stargazers']['totalCount'],
'permissions': {
'pull': result['viewerPermission'] == 'READ' or \
result['viewerPermission'] == 'TRIAGE' or \
result['viewerPermission'] == 'WRITE' or \
result['viewerPermission'] == 'ADMIN',
'push': result['viewerPermission'] == 'WRITE' or \
result['viewerPermission'] == 'ADMIN',
'admin': result['viewerPermission'] == 'ADMIN'
},
'archived': result['isArchived'],
'isFork': False
}

repo_wrapper = Repository(repo_data)
cache.set_repository(repo_wrapper)
1 change: 0 additions & 1 deletion gato/github/api.py
Original file line number Diff line number Diff line change
Expand Up @@ -1173,7 +1173,6 @@ def get_file_last_updated(self, repo_name: str, file_path: str):
f'/repos/{repo_name}/commits',params={"path": file_path}
)


commit_date = resp.json()[0]['commit']['author']['date']
commit_author = resp.json()[0]['commit']['author']['name']

Expand Down

0 comments on commit 5bce2cd

Please sign in to comment.