Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

add user rbac and iap #1

Merged
merged 6 commits into from
Aug 11, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
105 changes: 105 additions & 0 deletions label_studio/khan/iap/middleware.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,105 @@
import logging
from time import time
from typing import Dict, Optional

from django.contrib import auth
from django.core.exceptions import ObjectDoesNotExist
from django.conf import settings
from google.oauth2 import id_token
from google.auth.transport import requests

from organizations.models import Organization


logger = logging.getLogger(__name__)

User = auth.get_user_model()

IAP_AUDIENCE = getattr(settings, "IAP_AUDIENCE", None)
IAP_HEADER = getattr(settings, "IAP_HEADER", "x-goog-iap-jwt-assertion")
IAP_CERT_URL = getattr(
settings,
"IAP_CERT_URL",
"https://www.gstatic.com/iap/verify/public_key"
)


def _create_user(email):
"""Create a new user with the provided email as username and email.
If an organization already exists, assign the new user to it, otherwise create
a new org created by the user.
"""
user = User.objects.create_user(email=email, username=email)
user.set_unusable_password()

if Organization.objects.exists():
org = Organization.objects.first()
org.add_user(user)
else:
org = Organization.create_organization(
created_by=user, title='Label Studio')
Comment on lines +35 to +40
Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

this was pulled from the existing user signup logic

user.active_organization = org
user.save(update_fields=['active_organization'])
return user


def _decode_token(request) -> Optional[Dict]:
"""Decode the jwt assertion from the request header.
If the token header is missing, or the token is invalid, returns None
otherwise returns the decoded assertion as a Dict
"""
logger.debug("decoding user from jwt")
encoded_token = request.headers.get(IAP_HEADER)
if encoded_token is None:
logger.warn("no token provided in header")
return None
try:
return id_token.verify_token(
encoded_token,
requests.Request(),
audience=IAP_AUDIENCE,
certs_url=IAP_CERT_URL,
)
except ValueError as e:
logger.warn("invalid token: %s", e)
Copy link

@dat-boris dat-boris Aug 10, 2023

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

super nit: we might not want to log the exception if it contains token content? Though I guess it is only JWT signed, so no secret will be leaked, but this might still contain PII.

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

digging through the code, these ValueErrors do not contain the contents of the token's themselves, so we should be good to keep logging the exceptions.

return None


def _get_user(request):
"""Looks up a user object based on the email address present in the jwt assertion
on the request header. If the token is missing or invalid, None is returned.
If the email address does not belong to an existing user, a new one is created and
returned.
"""
token = _decode_token(request)
if token is None:
return None

logger.debug("decoded token: %s", token)
user = None
if email := token.get("email"):
try:
user = User.objects.get(email=email)
except ObjectDoesNotExist:
user = _create_user(email)

return user


def IAPUserMiddleware(get_response):
"""Middleware function that looks up or creates a user based on the IAP JWT assertion
and then logs that user in.
"""
def middleware(request):
if not (request.user and request.user.is_authenticated):
# if a user either doesn't exist on the request or isn't authenticated
if (user := _get_user(request)) is not None:
request.user = user
request.session['last_login'] = time()
auth.login(
request,
user,
backend='django.contrib.auth.backends.ModelBackend'
)
return get_response(request)
return middleware
Empty file.
6 changes: 6 additions & 0 deletions label_studio/khan/rbac/admin.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
from django.contrib import admin

from khan.rbac.models import UserRole


admin.site.register(UserRole)
5 changes: 5 additions & 0 deletions label_studio/khan/rbac/apps.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
from django.apps import AppConfig


class RbacConfig(AppConfig):
name = 'khan.rbac'
23 changes: 23 additions & 0 deletions label_studio/khan/rbac/migrations/0001_initial.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,23 @@
# Generated by Django 3.2.19 on 2023-07-26 18:29
Copy link

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

pardon my ignorance - what does this do and why should it be in version control?

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

this is a db migration, so it is responsible for creating the database tables/columns/etc. it is generated by manage.py makemigrations and applied using manage.py migrate the latter of which is done automagically when label-studio starts. (this is actually a label-studio-ism, not a django-ism, and I'm personally on the fence about it, but nobody asked me 😉 ). These are basically python DDL.


from django.db import migrations, models
import django.db.models.deletion


class Migration(migrations.Migration):

initial = True

dependencies = [
('users', '0006_user_allow_newsletters'),
]

operations = [
migrations.CreateModel(
name='UserRole',
fields=[
('user', models.OneToOneField(on_delete=django.db.models.deletion.CASCADE, primary_key=True, related_name='role', serialize=False, to='users.user')),
('role', models.IntegerField(choices=[(1, 'Labeler'), (2, 'Labeling Coordinator'), (3, 'Labeling Infra')], default=1, verbose_name='role')),
],
),
]
Empty file.
40 changes: 40 additions & 0 deletions label_studio/khan/rbac/models.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,40 @@
from django.contrib.auth.models import auth
from django.db import models

from django.conf import settings
from django.db import models
from django.db.models.signals import post_save
from django.dispatch import receiver
from django.utils.translation import gettext_lazy as _

from khan.rbac.roles import Role

User = auth.get_user_model()


class UserRole(models.Model):
user = models.OneToOneField(
settings.AUTH_USER_MODEL,
related_name="role",
on_delete=models.CASCADE,
primary_key=True,
)
role = models.IntegerField(
_("role"),
default=Role.LABELER,
choices=Role.choices,
)

def has_perm(self, perm: str) -> bool:
return Role(self.role).has_perm(perm)

def __str__(self) -> str:
return self.user.email + " - " + Role(self.role).name


# on user save, if the user is being created, create a UserRole for them,
# with the default UserRole.role value
@receiver(post_save, sender=User)
def init_user(sender, instance=None, created=False, **kwargs):
if created:
UserRole.objects.create(user=instance)
29 changes: 29 additions & 0 deletions label_studio/khan/rbac/permission.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,29 @@
import logging
from django.core.exceptions import ObjectDoesNotExist

from rest_framework.permissions import BasePermission

logger = logging.getLogger(__name__)


class RBACPermissionClass(BasePermission):
def has_permission(self, request, view):
if isinstance(view.permission_required, str):
perm = view.permission_required
else:
perm = getattr(view.permission_required, request.method, None)

if perm is None:
logger.warning(
"path: %s method: %s has no perms",
request.path,
request.method,
)
return False

try:
return request.user.role.has_perm(perm)
except ObjectDoesNotExist:
# the user does not have a valid role assigned - default to no access
logger.error("user %s has no role assigned", request.user.email)
return False
50 changes: 50 additions & 0 deletions label_studio/khan/rbac/roles.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,50 @@
from typing import Dict, Set

from django.db import models

from core.permissions import all_permissions


class Role(models.IntegerChoices):
LABELER = 1
LABELING_COORDINATOR = 2
LABELING_INFRA = 3

def has_perm(self, perm):
return perm in _roles[self]


_roles: Dict[Role, Set[str]] = {}

_roles[Role.LABELER] = {

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

priase: love this! Thanks for working out so many of the required permissions.

Also love how the set union is work for composing the permissions!

all_permissions.organizations_view,
all_permissions.projects_view,
all_permissions.tasks_view,
all_permissions.tasks_change,
all_permissions.annotations_create,
all_permissions.annotations_view,
all_permissions.annotations_change,
all_permissions.annotations_delete,
all_permissions.actions_perform,
all_permissions.predictions_any,
all_permissions.avatar_any,
all_permissions.labels_create,
all_permissions.labels_view,
all_permissions.labels_change,
all_permissions.labels_delete,
}

_roles[Role.LABELING_COORDINATOR] = _roles[Role.LABELER] | {
all_permissions.projects_create,
all_permissions.projects_change,
all_permissions.projects_delete,
all_permissions.tasks_create,
all_permissions.tasks_delete,
}

_roles[Role.LABELING_INFRA] = _roles[Role.LABELING_COORDINATOR] | {
all_permissions.organizations_create,
all_permissions.organizations_change,
all_permissions.organizations_delete,
all_permissions.organizations_invite,
}
3 changes: 3 additions & 0 deletions label_studio/khan/readme.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
# Khan specific label studio apps

See the confluence page [here](https://khanacademy.atlassian.net/l/cp/uGiyWj2R) for more info.
49 changes: 49 additions & 0 deletions label_studio/khan/settings/base.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,49 @@
"""This is the settings file that serves as the base for all other khan run environments.
It inherits from the label-studio core base settings, and all khan's settings files should inherit from this.
"""
from core.settings.base import *

# Make sure our custom django app is installed
INSTALLED_APPS.extend([

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

django n00b wondering: No action required. I wonder if this is how we get the characteristic of that LS_TOKEN > iap-jwt authentication, since both the rest_framework and thelabel_manager (?) app would run first?

[1] https://github.com/Khan/label-studio/blob/develop/label_studio/core/settings/base.py#L178

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

IIRC, the order of installed apps doesn't really matter, what does matter is the order of MIDDLEWARE and REST_FRAMEWORK['DEFAULT_PERMISSION_CLASSES'] and REST_FRAMEWORK['DEFAULT_AUTHENTICATION_CLASSES'] as well as, in the internals of Django/DRF the order those two things call each other/deferentially treat the other. ie: does the drf internals respect the user added from JWT? or does it prefer it's own authentication if a token is provided. And, with at least anecdotal evidence, it seems that drf prefers the user from the auth token, when provided, but will fall back to the django user from the middleware.

"khan",
"khan.rbac"
])

# Add our Rules Permissions Class to drf permissions classes so our
# custom RBAC works
REST_FRAMEWORK['DEFAULT_PERMISSION_CLASSES'].append(
"khan.rbac.permission.RBACPermissionClass"
)

# Default Logging to INFO level
LOGGING['root']['level'] = get_env('LOG_LEVEL', 'INFO')

# Default to PSQL
DATABASES = {'default': DATABASES_ALL[DJANGO_DB_POSTGRESQL]}

# IAP _should_ mean users never even see the login page, but on the off chance they do,
# they shouldn't be able to create their own user accounts
DISABLE_SIGNUP_WITHOUT_LINK = True

# IAP Audience used for IAP JWT validation
IAP_AUDIENCE = get_env("IAP_AUDIENCE")

# Don't send telemetry data to tele.labelstud.io
COLLECT_ANALYTICS = False

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

praise: Thank you for adding this!


# Not sure if things below this line are needed, they came from the label-studio/settings/label-studio file
MIDDLEWARE.append('organizations.middleware.DummyGetSessionMiddleware')
MIDDLEWARE.append('core.middleware.UpdateLastActivityMiddleware')
if INACTIVITY_SESSION_TIMEOUT_ENABLED:
MIDDLEWARE.append('core.middleware.InactivitySessionTimeoutMiddleWare')

ADD_DEFAULT_ML_BACKENDS = False

SESSION_ENGINE = "django.contrib.sessions.backends.signed_cookies"

RQ_QUEUES = {}

# in Label Studio Community version, feature flags are always ON
FEATURE_FLAGS_DEFAULT_VALUE = True

STORAGE_PERSISTENCE = get_bool_env('STORAGE_PERSISTENCE', True)
15 changes: 15 additions & 0 deletions label_studio/khan/settings/local.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,15 @@
"""This is a settings file intended for use when running the label studio app locally"""
from khan.settings.base import *

# For local, we want to be able to create users since we don't have IAP
DISABLE_SIGNUP_WITHOUT_LINK = False

# Unless set in the env, use DEBUG level logging for local runs
LOGGING['root']['level'] = get_env('LOG_LEVEL', 'DEBUG')

# Use sqlite for local
DATABASES = {'default': DATABASES_ALL[DJANGO_DB_SQLITE]}

# Unless set in the env, run in DEBUG mode
DEBUG = get_bool_env('DEBUG', True)
DEBUG_PROPAGATE_EXCEPTIONS = get_bool_env('DEBUG_PROPAGATE_EXCEPTIONS', True)
6 changes: 6 additions & 0 deletions label_studio/khan/settings/prod.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
"""This is a settings file intended for use when running the label studio app in the production environment"""
from khan.settings.base import *

# make sure our IAP user middleware is present so we populate a user
# object on the request from the IAP jwt
MIDDLEWARE.append("khan.iap.middleware.IAPUserMiddleware")
13 changes: 13 additions & 0 deletions label_studio/khan/settings/test.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,13 @@
"""This is a settings file intended for use when running the label studio app in the test environment"""
from khan.settings.base import *

# make sure our IAP user middleware is present so we populate a user
# object on the request from the IAP jwt
MIDDLEWARE.append("khan.iap.middleware.IAPUserMiddleware")

# Unless set in the env, use DEBUG level logging for local runs
LOGGING['root']['level'] = get_env('LOG_LEVEL', 'DEBUG')

# Unless set in the env, run in DEBUG mode
DEBUG = get_bool_env('DEBUG', True)
DEBUG_PROPAGATE_EXCEPTIONS = get_bool_env('DEBUG_PROPAGATE_EXCEPTIONS', True)