Skip to content

Commit

Permalink
Merge pull request #339 from Brown-University-Library/roles_check_script
Browse files Browse the repository at this point in the history
improves roles-checking.
  • Loading branch information
birkin authored Jul 26, 2023
2 parents 40f9d54 + 60fea79 commit e56fd72
Show file tree
Hide file tree
Showing 8 changed files with 304 additions and 26 deletions.
16 changes: 8 additions & 8 deletions requirements/base.in
Original file line number Diff line number Diff line change
@@ -1,9 +1,9 @@
bdrxml
django==3.2.15
django-crispy-forms
django-dotenv
django-markdown-deux
django-pagedown
requests

bdrxml==1.4
django-crispy-forms==1.11.2
django-dotenv==1.4.2
django-markdown-deux==1.0.5
django-pagedown==2.2.0
django==3.2.18
pillow~=9.0.0 # older version auto-installed by django-pagedown contains vulnerability
requests==2.26.0
trio==0.22.0
32 changes: 25 additions & 7 deletions requirements/base.txt
Original file line number Diff line number Diff line change
@@ -1,17 +1,27 @@
#
# This file is autogenerated by pip-compile
# To update, run:
# This file is autogenerated by pip-compile with Python 3.9
# by the following command:
#
# pip-compile ./requirements/base.in
#
asgiref==3.3.4
# via django
async-generator==1.10
# via trio
attrs==23.1.0
# via
# outcome
# trio
bdrxml==1.4
# via -r ./requirements/base.in
certifi==2020.12.5
# via requests
charset-normalizer==2.0.3
# via requests
django==3.2.18
# via
# -r ./requirements/base.in
# django-pagedown
django-crispy-forms==1.11.2
# via -r ./requirements/base.in
django-dotenv==1.4.2
Expand All @@ -20,20 +30,22 @@ django-markdown-deux==1.0.5
# via -r ./requirements/base.in
django-pagedown==2.2.0
# via -r ./requirements/base.in
django==3.2.15
# via
# -r ./requirements/base.in
# django-pagedown
eulxml==1.1.3
# via bdrxml
exceptiongroup==1.1.1
# via trio
idna==2.10
# via requests
# via
# requests
# trio
isodate==0.6.0
# via rdflib
lxml==4.7.1
# via eulxml
markdown2==2.4.0
# via django-markdown-deux
outcome==1.2.0
# via trio
pillow==9.0.1
# via
# -r ./requirements/base.in
Expand All @@ -52,8 +64,14 @@ requests==2.26.0
# via -r ./requirements/base.in
six==1.16.0
# via eulxml
sniffio==1.3.0
# via trio
sortedcontainers==2.4.0
# via trio
sqlparse==0.4.2
# via django
trio==0.22.0
# via -r ./requirements/base.in
urllib3==1.26.7
# via requests

Expand Down
2 changes: 1 addition & 1 deletion requirements/local.txt
Original file line number Diff line number Diff line change
@@ -1,3 +1,3 @@
-r base.txt
responses==0.10.9
pip-tools==5.5.0
pip-tools==6.13.0
Empty file added rome_app/lib/__init__.py
Empty file.
132 changes: 132 additions & 0 deletions rome_app/lib/roles_checker.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,132 @@
"""
This script checks the Biography roles field for roles that are not in the Roles table.
Usage:
- cd ./ttwr
- export ROME__PROJECT_DIR_ROOT_PATH="/full/path/to/stuff/github_dir/"
- export ROME__DOTENV_PATH="/full/path/to/stuff/.env"
- export ROME__LOG_LEVEL="DEBUG"
- source ../env/bin/activate
- python ./lib/roles_checker.py
Sample output:
Roles in Biography/Roles that are not in the Roles table:
{ 'name': 'Person_A', 'id': 1, 'invalid_roles': ['bad_role_A', 'bad_role_B'] }
{ 'name': 'Person_B', 'id': 2, 'invalid_roles': ['bad_role_C'] }
etc...
"""

import json, logging, os, pathlib, pprint, sys
import django, dotenv

""" Allows script to be run from command-line or as import. """
try:
level_dict = { 'DEBUG': logging.DEBUG, 'INFO': logging.INFO }
ENVAR_LOG_LEVEL = os.environ['ROME__LOG_LEVEL']
print( f'ENVAR_LOG_LEVEL, ``{ENVAR_LOG_LEVEL}``' )
LEVEL_OBJECT = level_dict[ ENVAR_LOG_LEVEL ]
logging.basicConfig(
level=LEVEL_OBJECT,
format='[%(asctime)s] %(levelname)s [%(module)s-%(funcName)s()::%(lineno)d] %(message)s',
datefmt='%d/%b/%Y %H:%M:%S' )
log = logging.getLogger( 'example_script' )
log.debug( 'starting log' )
except Exception as e:
log = logging.getLogger(__name__)


## run code ---------------------------------------------------------
def run_code():
""" Runs code.
Called by `__main__`. """
from rome_app.models import Biography, Role
log.debug( 'starting run_code()' )
bios = Biography.objects.all().order_by( 'name' )
problems = []
for (i, bio) in enumerate( bios ):
error_entry = { 'name': bio.name, 'id': bio.id, 'invalid_roles': [] } # type: ignore -- `id` is valid.
log.debug( f'checking bio.name, ``{bio.name}``' )
roles = bio.roles
log.debug( f'roles, ``{roles}``' )
split_roles = []
if roles:
split_roles = roles.split( ';' )
log.debug( f'split_roles, ``{split_roles}``' )
validity = 'init'
for role in split_roles:
role = role.strip()
validity = check_role( role )
if validity == 'invalid':
error_entry['invalid_roles'].append( role )
if error_entry['invalid_roles']:
problems.append( error_entry )
# if i > 10:
# break
log.info( f'problems, ``{pprint.pformat(problems, sort_dicts=False)}``' )
# jsn = json.dumps( problems, sort_keys=False, indent=2 )
# log.info( f'jsn, ``{jsn}``')
# log.info( f'number of problem-entries, ``{len(problems)}``' )
# log.debug( 'end of run_code()' )
# return jsn
log.info( f'number of problem-entries, ``{len(problems)}``' )
log.debug( 'end of run_code()' )
return problems


def check_role( role_to_check: str ):
""" Checks biography-role against Roles table.
Note: the additional assert is because the django docs say:
"... In MySQL, a database table’s “collation” setting determines whether exact comparisons are case-sensitive. This is a database setting, not a Django setting. ...
Called by `run_code()`. """
from rome_app.models import Role
log.debug( f'role_to_check (stripped), ``{role_to_check}``' )
try:
role_lookup = Role.objects.get( text__exact=role_to_check )
log.debug( f'type(role_lookup), ``{type(role_lookup)}``' )
assert role_to_check == role_lookup.text # because the "__exact" may not be good enough if the mysql-collation is case-insensitive.
validity_check = 'valid'
except Exception as e:
log.debug( f'exception, ``{e}``')
log.debug( f'role, ``{role_to_check}`` not found' )
validity_check = 'invalid'
log.debug( f'validity_check, ``{validity_check}``' )
return validity_check


## helper -- setup environment --------------------------------------
def setup_environment():
""" Updates sys.path and reads the .env settings.
Called by `__main__`. """
log.debug( 'setting up environment' )
## allows bdr_tools to be imported ------------------------------
PROJECT_ROOT = os.environ['ROME__PROJECT_DIR_ROOT_PATH']
log.debug( f'PROJECT_ROOT, ``{PROJECT_ROOT}``' )
if PROJECT_ROOT not in sys.path:
sys.path.append( PROJECT_ROOT )
## loads .env settings ------------------------------------------
DOTENV_PATH = os.environ['ROME__DOTENV_PATH']
log.debug( f'DOTENV_PATH, ``{DOTENV_PATH}``' )
try:
dotenv.read_dotenv( DOTENV_PATH )
log.debug( 'dotenv successfully read' )
except Exception as e:
log.exception( 'problem reading dotenv' )
## loads django --------------------------------------------------
os.environ.setdefault("DJANGO_SETTINGS_MODULE", "config.settings")
cwd = os.getcwd() # assumes the cwd is the project directory
if cwd not in sys.path:
sys.path.append( cwd )
django.setup() # ok, now django-related imports will work
log.debug( 'django.setup() complete' )
return


## caller -----------------------------------------------------------
if __name__ == "__main__":
log.debug( 'starting if __name__ == "__main__"')
setup_environment() # loads .env settings and envars
run_code() # THIS IS WHERE WORK IS DONE
log.debug( 'eof' )


## eof
84 changes: 84 additions & 0 deletions rome_app/lib/version_helper.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,84 @@
import datetime, json, logging, os, pprint, subprocess

import trio
from django.conf import settings

log = logging.getLogger(__name__)


def make_context( request, rq_now, info_txt ):
""" Assembles data-dct.
Called by views.version() """
context = {
'request': {
'url': '%s://%s%s' % (
request.scheme,
request.META.get( 'HTTP_HOST', '127.0.0.1' ), # HTTP_HOST doesn't exist for client-tests
request.META.get('REQUEST_URI', request.META['PATH_INFO'])
),
'timestamp': str( rq_now )
},
'response': {
'ip': request.META.get('REMOTE_ADDR', 'unknown'),
'version': info_txt,
'timetaken': str( datetime.datetime.now() - rq_now )
}
}
return context


class GatherCommitAndBranchData:

def __init__( self ):
self.commit_data = ''
self.branch_data = ''
# self.results_dct = {}

async def manage_git_calls( self ):
""" Triggers calling subprocess commands concurrently.
Called by views.version() """
log.debug( 'manage_git_calls' )
results_holder_dct = {} # receives git responses as they're produced
async with trio.open_nursery() as nursery:
nursery.start_soon( self.fetch_commit_data, results_holder_dct )
nursery.start_soon( self.fetch_branch_data, results_holder_dct )
log.debug( f'final results_holder_dct, ```{pprint.pformat(results_holder_dct)}```' )
self.commit = results_holder_dct['commit']
self.branch = results_holder_dct['branch']
return

async def fetch_commit_data( self, results_holder_dct ):
""" Fetches commit-data.
Called by manage_git_calls() """
log.debug( 'fetch_commit_data' )
original_directory = os.getcwd()
git_dir = settings.BASE_DIR
os.chdir( git_dir )
output_obj: subprocess.CompletedProcess = await trio.run_process( ['git', 'log'], capture_stdout=True )
output: str = output_obj.stdout.decode( 'utf-8' )
os.chdir( original_directory )
lines = output.split( '\n' )
commit = lines[0]
results_holder_dct['commit'] = commit
return

async def fetch_branch_data( self, results_holder_dct ):
""" Fetches branch-data.
Called by manage_git_calls() """
log.debug( 'fetch_branch_data' )
original_directory = os.getcwd()
git_dir = settings.BASE_DIR
os.chdir( git_dir )
output_obj: subprocess.CompletedProcess = await trio.run_process( ['git', 'branch'], capture_stdout=True )
output: str = output_obj.stdout.decode( 'utf-8' )
os.chdir( original_directory )
lines = output.split( '\n' )
branch = 'init'
for line in lines:
if line[0:1] == '*':
branch = line[2:]
break
results_holder_dct['branch'] = branch
return

## end class GatherCommitAndBranchData
6 changes: 6 additions & 0 deletions rome_app/urls_app.py
Original file line number Diff line number Diff line change
Expand Up @@ -43,5 +43,11 @@
re_path(r'^biographies/new/$', views.new_biography, name='new_biography'),

re_path(r'^search/$', views.search_page, name= 'search_page'),

## helper view
re_path( r'^version/$', views.version, name='version_url' ),

## temp data-correction
re_path( r'^temp_roles_checker/$', views.temp_roles_checker, name='temp_roles_checker_url' ),
]

Loading

0 comments on commit e56fd72

Please sign in to comment.