diff --git a/label_studio/khan/iap/middleware.py b/label_studio/khan/iap/middleware.py new file mode 100644 index 000000000000..b1601d34e36a --- /dev/null +++ b/label_studio/khan/iap/middleware.py @@ -0,0 +1,105 @@ +import logging +from time import time +from typing import Dict, Optional + +from django.contrib import auth +from django.core.exceptions import ObjectDoesNotExist +from django.conf import settings +from google.oauth2 import id_token +from google.auth.transport import requests + +from organizations.models import Organization + + +logger = logging.getLogger(__name__) + +User = auth.get_user_model() + +IAP_AUDIENCE = getattr(settings, "IAP_AUDIENCE", None) +IAP_HEADER = getattr(settings, "IAP_HEADER", "x-goog-iap-jwt-assertion") +IAP_CERT_URL = getattr( + settings, + "IAP_CERT_URL", + "https://www.gstatic.com/iap/verify/public_key" +) + + +def _create_user(email): + """Create a new user with the provided email as username and email. + If an organization already exists, assign the new user to it, otherwise create + a new org created by the user. + """ + user = User.objects.create_user(email=email, username=email) + user.set_unusable_password() + + if Organization.objects.exists(): + org = Organization.objects.first() + org.add_user(user) + else: + org = Organization.create_organization( + created_by=user, title='Label Studio') + user.active_organization = org + user.save(update_fields=['active_organization']) + return user + + +def _decode_token(request) -> Optional[Dict]: + """Decode the jwt assertion from the request header. + If the token header is missing, or the token is invalid, returns None + otherwise returns the decoded assertion as a Dict + """ + logger.debug("decoding user from jwt") + encoded_token = request.headers.get(IAP_HEADER) + if encoded_token is None: + logger.warn("no token provided in header") + return None + try: + return id_token.verify_token( + encoded_token, + requests.Request(), + audience=IAP_AUDIENCE, + certs_url=IAP_CERT_URL, + ) + except ValueError as e: + logger.warn("invalid token: %s", e) + return None + + +def _get_user(request): + """Looks up a user object based on the email address present in the jwt assertion + on the request header. If the token is missing or invalid, None is returned. + If the email address does not belong to an existing user, a new one is created and + returned. + """ + token = _decode_token(request) + if token is None: + return None + + logger.debug("decoded token: %s", token) + user = None + if email := token.get("email"): + try: + user = User.objects.get(email=email) + except ObjectDoesNotExist: + user = _create_user(email) + + return user + + +def IAPUserMiddleware(get_response): + """Middleware function that looks up or creates a user based on the IAP JWT assertion + and then logs that user in. + """ + def middleware(request): + if not (request.user and request.user.is_authenticated): + # if a user either doesn't exist on the request or isn't authenticated + if (user := _get_user(request)) is not None: + request.user = user + request.session['last_login'] = time() + auth.login( + request, + user, + backend='django.contrib.auth.backends.ModelBackend' + ) + return get_response(request) + return middleware diff --git a/label_studio/khan/rbac/__init__.py b/label_studio/khan/rbac/__init__.py new file mode 100644 index 000000000000..e69de29bb2d1 diff --git a/label_studio/khan/rbac/admin.py b/label_studio/khan/rbac/admin.py new file mode 100644 index 000000000000..e9413e59e302 --- /dev/null +++ b/label_studio/khan/rbac/admin.py @@ -0,0 +1,6 @@ +from django.contrib import admin + +from khan.rbac.models import UserRole + + +admin.site.register(UserRole) diff --git a/label_studio/khan/rbac/apps.py b/label_studio/khan/rbac/apps.py new file mode 100644 index 000000000000..42ef98d50ade --- /dev/null +++ b/label_studio/khan/rbac/apps.py @@ -0,0 +1,5 @@ +from django.apps import AppConfig + + +class RbacConfig(AppConfig): + name = 'khan.rbac' diff --git a/label_studio/khan/rbac/migrations/0001_initial.py b/label_studio/khan/rbac/migrations/0001_initial.py new file mode 100644 index 000000000000..a140fa68ffe7 --- /dev/null +++ b/label_studio/khan/rbac/migrations/0001_initial.py @@ -0,0 +1,23 @@ +# Generated by Django 3.2.19 on 2023-07-26 18:29 + +from django.db import migrations, models +import django.db.models.deletion + + +class Migration(migrations.Migration): + + initial = True + + dependencies = [ + ('users', '0006_user_allow_newsletters'), + ] + + operations = [ + migrations.CreateModel( + name='UserRole', + fields=[ + ('user', models.OneToOneField(on_delete=django.db.models.deletion.CASCADE, primary_key=True, related_name='role', serialize=False, to='users.user')), + ('role', models.IntegerField(choices=[(1, 'Labeler'), (2, 'Labeling Coordinator'), (3, 'Labeling Infra')], default=1, verbose_name='role')), + ], + ), + ] diff --git a/label_studio/khan/rbac/migrations/__init__.py b/label_studio/khan/rbac/migrations/__init__.py new file mode 100644 index 000000000000..e69de29bb2d1 diff --git a/label_studio/khan/rbac/models.py b/label_studio/khan/rbac/models.py new file mode 100644 index 000000000000..cf0e0155129a --- /dev/null +++ b/label_studio/khan/rbac/models.py @@ -0,0 +1,40 @@ +from django.contrib.auth.models import auth +from django.db import models + +from django.conf import settings +from django.db import models +from django.db.models.signals import post_save +from django.dispatch import receiver +from django.utils.translation import gettext_lazy as _ + +from khan.rbac.roles import Role + +User = auth.get_user_model() + + +class UserRole(models.Model): + user = models.OneToOneField( + settings.AUTH_USER_MODEL, + related_name="role", + on_delete=models.CASCADE, + primary_key=True, + ) + role = models.IntegerField( + _("role"), + default=Role.LABELER, + choices=Role.choices, + ) + + def has_perm(self, perm: str) -> bool: + return Role(self.role).has_perm(perm) + + def __str__(self) -> str: + return self.user.email + " - " + Role(self.role).name + + +# on user save, if the user is being created, create a UserRole for them, +# with the default UserRole.role value +@receiver(post_save, sender=User) +def init_user(sender, instance=None, created=False, **kwargs): + if created: + UserRole.objects.create(user=instance) diff --git a/label_studio/khan/rbac/permission.py b/label_studio/khan/rbac/permission.py new file mode 100644 index 000000000000..9bb41120a589 --- /dev/null +++ b/label_studio/khan/rbac/permission.py @@ -0,0 +1,29 @@ +import logging +from django.core.exceptions import ObjectDoesNotExist + +from rest_framework.permissions import BasePermission + +logger = logging.getLogger(__name__) + + +class RBACPermissionClass(BasePermission): + def has_permission(self, request, view): + if isinstance(view.permission_required, str): + perm = view.permission_required + else: + perm = getattr(view.permission_required, request.method, None) + + if perm is None: + logger.warning( + "path: %s method: %s has no perms", + request.path, + request.method, + ) + return False + + try: + return request.user.role.has_perm(perm) + except ObjectDoesNotExist: + # the user does not have a valid role assigned - default to no access + logger.error("user %s has no role assigned", request.user.email) + return False diff --git a/label_studio/khan/rbac/roles.py b/label_studio/khan/rbac/roles.py new file mode 100644 index 000000000000..f29ad4b8a871 --- /dev/null +++ b/label_studio/khan/rbac/roles.py @@ -0,0 +1,50 @@ +from typing import Dict, Set + +from django.db import models + +from core.permissions import all_permissions + + +class Role(models.IntegerChoices): + LABELER = 1 + LABELING_COORDINATOR = 2 + LABELING_INFRA = 3 + + def has_perm(self, perm): + return perm in _roles[self] + + +_roles: Dict[Role, Set[str]] = {} + +_roles[Role.LABELER] = { + all_permissions.organizations_view, + all_permissions.projects_view, + all_permissions.tasks_view, + all_permissions.tasks_change, + all_permissions.annotations_create, + all_permissions.annotations_view, + all_permissions.annotations_change, + all_permissions.annotations_delete, + all_permissions.actions_perform, + all_permissions.predictions_any, + all_permissions.avatar_any, + all_permissions.labels_create, + all_permissions.labels_view, + all_permissions.labels_change, + all_permissions.labels_delete, +} + +_roles[Role.LABELING_COORDINATOR] = _roles[Role.LABELER] | { + all_permissions.projects_create, + all_permissions.projects_change, + all_permissions.projects_delete, + all_permissions.tasks_create, + all_permissions.tasks_delete, +} + +_roles[Role.LABELING_INFRA] = _roles[Role.LABELING_COORDINATOR] | { + all_permissions.organizations_create, + all_permissions.organizations_change, + all_permissions.organizations_delete, + all_permissions.organizations_invite, +} diff --git a/label_studio/khan/readme.md b/label_studio/khan/readme.md new file mode 100644 index 000000000000..972ecf08b47d --- /dev/null +++ b/label_studio/khan/readme.md @@ -0,0 +1,3 @@ +# Khan specific label studio apps + +See the confluence page [here](https://khanacademy.atlassian.net/l/cp/uGiyWj2R) for more info. diff --git a/label_studio/khan/settings/base.py b/label_studio/khan/settings/base.py new file mode 100644 index 000000000000..b13a92c0a709 --- /dev/null +++ b/label_studio/khan/settings/base.py @@ -0,0 +1,49 @@ +"""This is the settings file that serves as the base for all other khan run environments. +It inherits from the label-studio core base settings, and all khan's settings files should inherit from this. +""" +from core.settings.base import * + +# Make sure our custom django app is installed +INSTALLED_APPS.extend([ + "khan", + "khan.rbac" +]) + +# Add our Rules Permissions Class to drf permissions classes so our +# custom RBAC works +REST_FRAMEWORK['DEFAULT_PERMISSION_CLASSES'].append( + "khan.rbac.permission.RBACPermissionClass" +) + +# Default Logging to INFO level +LOGGING['root']['level'] = get_env('LOG_LEVEL', 'INFO') + +# Default to PSQL +DATABASES = {'default': DATABASES_ALL[DJANGO_DB_POSTGRESQL]} + +# IAP _should_ mean users never even see the login page, but on the off chance they do, +# they shouldn't be able to create their own user accounts +DISABLE_SIGNUP_WITHOUT_LINK = True + +# IAP Audience used for IAP JWT validation +IAP_AUDIENCE = get_env("IAP_AUDIENCE") + +# Don't send telemetry data to tele.labelstud.io +COLLECT_ANALYTICS = False + +# Not sure if things below this line are needed, they came from the label-studio/settings/label-studio file +MIDDLEWARE.append('organizations.middleware.DummyGetSessionMiddleware') +MIDDLEWARE.append('core.middleware.UpdateLastActivityMiddleware') +if INACTIVITY_SESSION_TIMEOUT_ENABLED: + MIDDLEWARE.append('core.middleware.InactivitySessionTimeoutMiddleWare') + +ADD_DEFAULT_ML_BACKENDS = False + +SESSION_ENGINE = "django.contrib.sessions.backends.signed_cookies" + +RQ_QUEUES = {} + +# in Label Studio Community version, feature flags are always ON +FEATURE_FLAGS_DEFAULT_VALUE = True + +STORAGE_PERSISTENCE = get_bool_env('STORAGE_PERSISTENCE', True) diff --git a/label_studio/khan/settings/local.py b/label_studio/khan/settings/local.py new file mode 100644 index 000000000000..a4775dcb4181 --- /dev/null +++ b/label_studio/khan/settings/local.py @@ -0,0 +1,15 @@ +"""This is a settings file intended for use when running the label studio app locally""" +from khan.settings.base import * + +# For local, we want to be able to create users since we don't have IAP +DISABLE_SIGNUP_WITHOUT_LINK = False + +# Unless set in the env, use DEBUG level logging for local runs +LOGGING['root']['level'] = get_env('LOG_LEVEL', 'DEBUG') + +# Use sqlite for local +DATABASES = {'default': DATABASES_ALL[DJANGO_DB_SQLITE]} + +# Unless set in the env, run in DEBUG mode +DEBUG = get_bool_env('DEBUG', True) +DEBUG_PROPAGATE_EXCEPTIONS = get_bool_env('DEBUG_PROPAGATE_EXCEPTIONS', True) diff --git a/label_studio/khan/settings/prod.py b/label_studio/khan/settings/prod.py new file mode 100644 index 000000000000..336b7292b6c1 --- /dev/null +++ b/label_studio/khan/settings/prod.py @@ -0,0 +1,6 @@ +"""This is a settings file intended for use when running the label studio app in the production environment""" +from khan.settings.base import * + +# make sure our IAP user middleware is present so we populate a user +# object on the request from the IAP jwt +MIDDLEWARE.append("khan.iap.middleware.IAPUserMiddleware") diff --git a/label_studio/khan/settings/test.py b/label_studio/khan/settings/test.py new file mode 100644 index 000000000000..afb278a433dc --- /dev/null +++ b/label_studio/khan/settings/test.py @@ -0,0 +1,13 @@ +"""This is a settings file intended for use when running the label studio app in the test environment""" +from khan.settings.base import * + +# make sure our IAP user middleware is present so we populate a user +# object on the request from the IAP jwt +MIDDLEWARE.append("khan.iap.middleware.IAPUserMiddleware") + +# Unless set in the env, use DEBUG level logging for local runs +LOGGING['root']['level'] = get_env('LOG_LEVEL', 'DEBUG') + +# Unless set in the env, run in DEBUG mode +DEBUG = get_bool_env('DEBUG', True) +DEBUG_PROPAGATE_EXCEPTIONS = get_bool_env('DEBUG_PROPAGATE_EXCEPTIONS', True)