Skip to content

Commit

Permalink
institution-user metrics view
Browse files Browse the repository at this point in the history
- add base `ElasticsearchListView`
    - use `elasticsearch_dsl.Search` as queryset-analogue for code reuse
    - allows filtering (with 'eq' and 'ne' operators) following
      `filterable_fields` on the serializer
    - allows sorting, following `default_ordering` and `ordering_fields`
      on the view
    - allows pagination (with `page` and `page[size]` params)
- update "new" institution user metrics view with ElasticsearchListView
- add `MonthlyReport.most_recent_yearmonth`
  • Loading branch information
aaxelb committed Sep 11, 2024
1 parent e1bf00e commit 60d3f6e
Show file tree
Hide file tree
Showing 6 changed files with 333 additions and 7 deletions.
111 changes: 111 additions & 0 deletions api/base/elasticsearch_dsl_views.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,111 @@
from __future__ import annotations
import abc
import typing

import elasticsearch_dsl as edsl
from rest_framework import generics
from rest_framework import exceptions as drf_exceptions
from rest_framework.settings import api_settings as drf_settings
if typing.TYPE_CHECKING:
from rest_framework import serializers

from api.base.filters import FilterMixin
from api.base.views import JSONAPIBaseView


class ElasticsearchListView(FilterMixin, JSONAPIBaseView, generics.ListAPIView, abc.ABC):
'''abstract view class using `elasticsearch_dsl.Search` as a queryset-analogue
builds a `Search` based on `self.get_default_search()` and the request's
query parameters for filtering, sorting, and pagination -- fetches only
the data required for the response, just like with a queryset!
'''
serializer_class: type[serializers.BaseSerializer] # required on subclasses

default_ordering: str | None = None # name of a serializer field, prepended with "-" for descending sort
ordering_fields: frozenset[str] = frozenset() # serializer field names

@abc.abstractmethod
def get_default_search(self) -> edsl.Search | None:
'''the base `elasticsearch_dsl.Search` for this list, based on url path
(common jsonapi query parameters will be considered automatically)
'''
...

###
# beware! inheritance shenanigans below

# override FilterMixin to disable all operators besides 'eq' and 'ne'
MATCHABLE_FIELDS = ()
COMPARABLE_FIELDS = ()
DEFAULT_OPERATOR_OVERRIDES = {}
# (if you want to add fulltext-search or range-filter support, remove the override
# and update `__add_search_filter` to handle those operators -- tho note that the
# underlying elasticsearch field mapping will need to be compatible with the query)

# override DEFAULT_FILTER_BACKENDS rest_framework setting
# (filtering handled in-view to reuse logic from FilterMixin)
filter_backends = ()

# note: because elasticsearch_dsl.Search supports slicing and gives results when iterated on,
# it works fine with default pagination

# override rest_framework.generics.GenericAPIView
def get_queryset(self):
_search = self.get_default_search()
if _search is None:
return []
# using parsing logic from FilterMixin (oddly nested dict and all)
for _parsed_param in self.parse_query_params(self.request.query_params).values():
for _parsed_filter in _parsed_param.values():
_search = self.__add_search_filter(
_search,
elastic_field_name=_parsed_filter['source_field_name'],
operator=_parsed_filter['op'],
value=_parsed_filter['value'],
)
return self.__add_sort(_search)

###
# private methods

def __add_sort(self, search: edsl.Search) -> edsl.Search:
_elastic_sort = self.__get_elastic_sort()
return (search if _elastic_sort is None else search.sort(_elastic_sort))

def __get_elastic_sort(self) -> str | None:
_sort_param = self.request.query_params.get(drf_settings.ORDERING_PARAM, self.default_ordering)
if not _sort_param:
return None
_sort_field, _ascending = (
(_sort_param[1:], False)
if _sort_param.startswith('-')
else (_sort_param, True)
)
if _sort_field not in self.ordering_fields:
raise drf_exceptions.ValidationError(
f'invalid value for {drf_settings.ORDERING_PARAM} query param (valid values: {", ".join(self.ordering_fields)})',
)
_serializer_field = self.get_serializer().fields[_sort_field]
_elastic_sort_field = _serializer_field.source
return (_elastic_sort_field if _ascending else f'-{_elastic_sort_field}')

def __add_search_filter(
self,
search: edsl.Search,
elastic_field_name: str,
operator: str,
value: str,
) -> edsl.Search:
match operator: # operators from FilterMixin
case 'eq':
if value == '':
return search.exclude('exists', field=elastic_field_name)
return search.filter('term', **{elastic_field_name: value})
case 'ne':
if value == '':
return search.filter('exists', field=elastic_field_name)
return search.exclude('term', **{elastic_field_name: value})
case _:
raise NotImplementedError(f'unsupported filter operator "{operator}"')
8 changes: 8 additions & 0 deletions api/institutions/serializers.py
Original file line number Diff line number Diff line change
Expand Up @@ -324,6 +324,11 @@ class NewInstitutionUserMetricsSerializer(JSONAPISerializer):
class Meta:
type_ = 'institution-users'

filterable_fields = frozenset({
'department',
'orcid_id',
})

id = IDField(source='meta.id', read_only=True)
user_name = ser.CharField(read_only=True)
department = ser.CharField(read_only=True, source='department_name')
Expand All @@ -349,3 +354,6 @@ class Meta:
)

links = LinksField({})

def get_absolute_url(self):
return None # there is no detail view for institution-users
31 changes: 29 additions & 2 deletions api/institutions/views.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,10 +12,12 @@
from osf.metrics import InstitutionProjectCounts
from osf.models import OSFUser, Node, Institution, Registration
from osf.metrics import UserInstitutionProjectCounts
from osf.metrics.reports import InstitutionalUserReport
from osf.utils import permissions as osf_permissions

from api.base import permissions as base_permissions
from api.base.filters import ListFilterMixin, FilterMixin
from api.base.elasticsearch_dsl_views import ElasticsearchListView
from api.base.filters import ListFilterMixin
from api.base.views import JSONAPIBaseView
from api.base.serializers import JSONAPISerializer
from api.base.utils import get_object_or_error, get_user_auth
Expand Down Expand Up @@ -533,7 +535,7 @@ def get_default_queryset(self):
return self._make_elasticsearch_results_filterable(search, id=institution._id, department=DEFAULT_ES_NULL_VALUE)


class _NewInstitutionUserMetricsList(InstitutionMixin, FilterMixin, JSONAPIBaseView):
class _NewInstitutionUserMetricsList(InstitutionMixin, ElasticsearchListView):
'''list view for institution-users metrics
used only when the INSTITUTIONAL_DASHBOARD_2024 feature flag is active
Expand All @@ -553,6 +555,31 @@ class _NewInstitutionUserMetricsList(InstitutionMixin, FilterMixin, JSONAPIBaseV

serializer_class = NewInstitutionUserMetricsSerializer

default_ordering = '-storage_byte_count'
ordering_fields = frozenset((
'user_name',
'department',
'month_last_login',
'account_creation_date',
'public_projects',
'private_projects',
'public_registration_count',
'embargoed_registration_count',
'published_preprint_count',
'public_file_count',
'storage_byte_count',
))

def get_default_search(self):
_yearmonth = InstitutionalUserReport.most_recent_yearmonth()
if _yearmonth is None:
return None
return (
InstitutionalUserReport.search()
.filter('term', report_yearmonth=str(_yearmonth))
.filter('term', institution_id=self.get_institution()._id)
)


institution_user_metrics_list_view = toggle_view_by_flag(
flag_name=osf.features.INSTITUTIONAL_DASHBOARD_2024,
Expand Down
160 changes: 159 additions & 1 deletion api_tests/institutions/views/test_institution_user_metric_list.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@
import csv
from io import StringIO
from random import random
from urllib.parse import urlencode

import pytest
from waffle.testutils import override_flag
Expand All @@ -14,10 +15,11 @@
)

from osf.metrics import UserInstitutionProjectCounts
from osf.metrics.reports import InstitutionalUserReport

@pytest.mark.es
@pytest.mark.django_db
class TestInstitutionUserMetricList:
class TestOldInstitutionUserMetricList:

@pytest.fixture(autouse=True)
def _waffled(self):
Expand Down Expand Up @@ -262,3 +264,159 @@ def test_filter_and_sort(self, app, url, user, user2, user3, admin, user4, popul
assert data[0]['attributes']['department'] == 'Biology dept'
assert data[1]['attributes']['department'] == 'N/A'
assert data[2]['attributes']['department'] == 'Psychology dept'


@pytest.mark.django_db
class TestNewInstitutionUserMetricList:
@pytest.fixture(autouse=True)
def _waffled(self):
with override_flag(osf.features.INSTITUTIONAL_DASHBOARD_2024, active=True):
yield # these tests apply only after institution dashboard improvements

@pytest.fixture()
def institution(self):
return InstitutionFactory()

@pytest.fixture()
def rando(self):
return AuthUserFactory()

@pytest.fixture()
def institutional_admin(self, institution):
_admin_user = AuthUserFactory()
institution.get_group('institutional_admins').user_set.add(_admin_user)
return _admin_user

@pytest.fixture()
def unshown_reports(self, institution):
# unshown because another institution
_another_institution = InstitutionFactory()
_report_factory('2024-08', _another_institution, user_id='nother_inst')
# unshown because old
_report_factory('2024-07', institution, user_id='old')

@pytest.fixture()
def reports(self, institution):
return [
_report_factory(
'2024-08', institution,
user_id='u_sparse',
storage_byte_count=53,
),
_report_factory(
'2024-08', institution,
user_id='u_orc',
orcid_id='5555-4444-3333-2222',
storage_byte_count=8277,
),
_report_factory(
'2024-08', institution,
user_id='u_blargl',
department_name='blargl',
storage_byte_count=34834834,
),
_report_factory(
'2024-08', institution,
user_id='u_orcomma',
orcid_id='4444-3333-2222-1111',
department_name='a department, or so, that happens, incidentally, to have commas',
storage_byte_count=736662999298,
),
]

@pytest.fixture()
def url(self, institution):
return f'/{API_BASE}institutions/{institution._id}/metrics/users/'

def test_anon(self, app, url):
_resp = app.get(url, expect_errors=True)
assert _resp.status_code == 401

def test_rando(self, app, url, rando):
_resp = app.get(url, auth=rando.auth, expect_errors=True)
assert _resp.status_code == 403

@pytest.mark.es
def test_get_empty(self, app, url, institutional_admin):
_resp = app.get(url, auth=institutional_admin.auth)
assert _resp.status_code == 200
assert _resp.json['data'] == []

@pytest.mark.es
def test_get_reports(self, app, url, institutional_admin, institution, reports, unshown_reports):
_resp = app.get(url, auth=institutional_admin.auth)
assert _resp.status_code == 200
assert len(_resp.json['data']) == len(reports)
_expected_user_ids = {_report.user_id for _report in reports}
assert set(_user_ids(_resp)) == _expected_user_ids

@pytest.mark.es
def test_filter_reports(self, app, url, institutional_admin, institution, reports, unshown_reports):
for _query, _expected_user_ids in (
({'filter[department]': 'nunavum'}, set()),
({'filter[department]': 'incidentally'}, set()),
({'filter[department]': 'blargl'}, {'u_blargl'}),
({'filter[department]': 'a department, or so, that happens, incidentally, to have commas'}, {'u_orcomma'}),
({'filter[department][eq]': 'nunavum'}, set()),
({'filter[department][eq]': 'blargl'}, {'u_blargl'}),
({'filter[department][eq]': 'a department, or so, that happens, incidentally, to have commas'}, {'u_orcomma'}),
({'filter[department][ne]': 'nunavum'}, {'u_sparse', 'u_blargl', 'u_orc', 'u_orcomma'}),

({'filter[orcid_id][eq]': '5555-4444-3333-2222'}, {'u_orc'}),
({'filter[orcid_id][ne]': ''}, {'u_orc', 'u_orcomma'}),
({'filter[orcid_id][eq]': ''}, {'u_sparse', 'u_blargl'}),
({
'filter[orcid_id]': '',
'filter[department]': 'blargl',
}, {'u_blargl'}),
({
'filter[orcid_id]': '',
'filter[department][ne]': 'blargl',
}, {'u_sparse'}),
({
'filter[orcid_id]': '5555-4444-3333-2222',
'filter[department][ne]': 'blargl',
}, {'u_orc'}),
({
'filter[orcid_id]': '5555-4444-3333-2222',
'filter[department][ne]': '',
}, set()),
):
_resp = app.get(f'{url}?{urlencode(_query)}', auth=institutional_admin.auth)
assert _resp.status_code == 200
assert set(_user_ids(_resp)) == _expected_user_ids

@pytest.mark.es
def test_sort_reports(self, app, url, institutional_admin, institution, reports, unshown_reports):
for _query, _expected_user_id_list in (
({'sort': 'storage_byte_count'}, ['u_sparse', 'u_orc', 'u_blargl', 'u_orcomma']),
({'sort': '-storage_byte_count'}, ['u_orcomma', 'u_blargl', 'u_orc', 'u_sparse']),
):
_resp = app.get(f'{url}?{urlencode(_query)}', auth=institutional_admin.auth)
assert _resp.status_code == 200
assert list(_user_ids(_resp)) == _expected_user_id_list

@pytest.mark.es
def test_paginate_reports(self, app, url, institutional_admin, institution, reports, unshown_reports):
for _query, _expected_user_id_list in (
({'sort': 'storage_byte_count', 'page[size]': 2}, ['u_sparse', 'u_orc']),
({'sort': 'storage_byte_count', 'page[size]': 2, 'page': 2}, ['u_blargl', 'u_orcomma']),
({'sort': '-storage_byte_count', 'page[size]': 3}, ['u_orcomma', 'u_blargl', 'u_orc']),
({'sort': '-storage_byte_count', 'page[size]': 3, 'page': 2}, ['u_sparse']),
):
_resp = app.get(f'{url}?{urlencode(_query)}', auth=institutional_admin.auth)
assert _resp.status_code == 200
assert list(_user_ids(_resp)) == _expected_user_id_list

def _user_ids(api_response):
for _datum in api_response.json['data']:
yield _datum['relationships']['user']['data']['id']

def _report_factory(yearmonth, institution, **kwargs):
_report = InstitutionalUserReport(
report_yearmonth=yearmonth,
institution_id=institution._id,
**kwargs,
)
_report.save(refresh=True)
return _report
Loading

0 comments on commit 60d3f6e

Please sign in to comment.