forked from CenterForOpenScience/osf.io
-
Notifications
You must be signed in to change notification settings - Fork 0
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- Loading branch information
Showing
2 changed files
with
129 additions
and
0 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,106 @@ | ||
from __future__ import annotations | ||
import typing | ||
|
||
if typing.TYPE_CHECKING: | ||
import elasticsearch_dsl | ||
|
||
from osf.metrics.counted_usage import CountedAuthUsage | ||
from osf.metrics.reports import PublicItemUsageReport | ||
from osf.metrics.utils import YearMonth | ||
from ._base import MonthlyReporter | ||
|
||
|
||
_CHUNK_SIZE = 500 | ||
|
||
|
||
class PublicItemUsageReporter(MonthlyReporter): | ||
'''build a PublicItemUsageReport for each public item | ||
includes projects, project components, registrations, registration components, and preprints | ||
''' | ||
|
||
def report(self, yearmonth: YearMonth): | ||
for _itembucket in self._iter_itembuckets(yearmonth): | ||
yield self._report_from_itembucket(_itembucket) | ||
|
||
def _item_page_search(self, yearmonth) -> elasticsearch_dsl.Search: | ||
_usage_search = ( | ||
CountedAuthUsage.search() | ||
.filter('term', item_public=True) | ||
.filter('range', timestamp={ | ||
'gte': yearmonth.target_month(), | ||
'lt': yearmonth.next_month(), | ||
}) | ||
.update_from_dict({'size': 0}) # only aggregations, no hits | ||
) | ||
# the main agg: use a composite aggregation to page thru *every* item | ||
_agg_items = _usage_search.aggs.bucket( | ||
'agg_items', | ||
'composite', | ||
sources=[{'item_osfid': {'terms': {'field': 'item_guid'}}}], | ||
size=_CHUNK_SIZE, | ||
) | ||
# nested agg: for each item, get platform_iri values | ||
_agg_items.bucket('agg_platform_iri', 'terms', field='platform_iri') | ||
# nested agg: for each item, get provider_id values | ||
_agg_items.bucket('agg_provider_id', 'terms', field='provider_id') | ||
# nested agg: for each item, get item_type values | ||
_agg_items.bucket('agg_item_type', 'terms', field='item_type') | ||
# nested agg: for each item, get view and download count | ||
_agg_action = _agg_items.bucket( | ||
'agg_action', | ||
'terms', | ||
field='action_labels', | ||
include=[ | ||
CountedAuthUsage.ActionLabel.VIEW.value, | ||
CountedAuthUsage.ActionLabel.DOWNLOAD.value, | ||
], | ||
) | ||
# nested nested agg: for each item-action pair, get a session count | ||
_agg_action.bucket( | ||
'agg_session_count', | ||
'cardinality', | ||
field='session_id', | ||
precision_threshold=40000, # maximum precision | ||
) | ||
return _usage_search | ||
|
||
def _iter_itembuckets(self, yearmonth: YearMonth): | ||
_search = self._item_page_search(yearmonth) | ||
while _search is not None: | ||
_page_response = _search.execute() | ||
_agg_items = _page_response.aggregations.agg_items | ||
yield from _agg_items.buckets | ||
# update the search for the next page | ||
if len(_agg_items.buckets) == _CHUNK_SIZE: | ||
_search.aggs['agg_items'].after = _agg_items.after_key | ||
else: | ||
_search = None | ||
|
||
def _report_from_itembucket(self, itembucket): | ||
_report = PublicItemUsageReport( | ||
item_osfid=itembucket.key.item_osfid, | ||
item_type=_agg_keys(itembucket.agg_item_type), | ||
provider_id=_agg_keys(itembucket.agg_provider_id), | ||
platform_iri=_agg_keys(itembucket.agg_platform_iri), | ||
# default counts to zero, will be updated if non-zero | ||
view_count=0, | ||
view_session_count=0, | ||
download_count=0, | ||
download_session_count=0, | ||
) | ||
for _actionbucket in itembucket.agg_action: | ||
if _actionbucket.key == CountedAuthUsage.ActionLabel.VIEW.value: | ||
_report.view_count = _actionbucket.doc_count | ||
_report.view_session_count = _actionbucket.agg_session_count.value | ||
elif _actionbucket.key == CountedAuthUsage.ActionLabel.DOWNLOAD.value: | ||
_report.download_count = _actionbucket.doc_count | ||
_report.download_session_count = _actionbucket.agg_session_count.value | ||
return _report | ||
|
||
|
||
### | ||
# local helpers | ||
|
||
def _agg_keys(bucket_agg_result) -> list: | ||
return [_bucket.key for _bucket in bucket_agg_result] |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters