Skip to content

Commit

Permalink
osf:storageByteCount supplementary metadata [ENG-6187]
Browse files Browse the repository at this point in the history
  • Loading branch information
aaxelb committed Oct 21, 2024
1 parent 7a35afc commit e1d04c1
Show file tree
Hide file tree
Showing 6 changed files with 77 additions and 24 deletions.
67 changes: 45 additions & 22 deletions api/caching/tasks.py
Original file line number Diff line number Diff line change
@@ -1,11 +1,12 @@
import logging
from urllib.parse import urlparse

from django.apps import apps
from django.contrib.contenttypes.models import ContentType
from django.db import connection
from django.db.models import Sum

import requests
import logging

from django.apps import apps
from api.caching.utils import storage_usage_cache
from framework.postcommit_tasks.handlers import enqueue_postcommit_task

Expand Down Expand Up @@ -114,32 +115,54 @@ def ban_url(instance):
def update_storage_usage_cache(target_id, target_guid, per_page=500000):
if not settings.ENABLE_STORAGE_USAGE_CACHE:
return
sql = """
SELECT count(size), sum(size) from
(SELECT size FROM osf_basefileversionsthrough AS obfnv
LEFT JOIN osf_basefilenode file ON obfnv.basefilenode_id = file.id
LEFT JOIN osf_fileversion version ON obfnv.fileversion_id = version.id
LEFT JOIN django_content_type type on file.target_content_type_id = type.id
from osf.models import Guid
storage_usage_total = compute_storage_usage_total(Guid.load(target_guid))
key = cache_settings.STORAGE_USAGE_KEY.format(target_id=target_guid)
storage_usage_cache.set(key, storage_usage_total, settings.STORAGE_USAGE_CACHE_TIMEOUT)


def compute_storage_usage_total(target_obj, per_page=500000):
sql = """SELECT count(file_page.size), sum(file_page.size) from (
SELECT version.size AS size
FROM osf_basefileversionsthrough AS obfnv
LEFT JOIN osf_basefilenode AS file ON obfnv.basefilenode_id = file.id
LEFT JOIN osf_fileversion AS version ON obfnv.fileversion_id = version.id
WHERE file.provider = 'osfstorage'
AND type.model = 'abstractnode'
AND file.deleted_on IS NULL
AND file.target_object_id=%s
AND file.target_object_id=%(target_id)s
AND file.target_content_type_id = %(target_content_type_id)s
ORDER BY version.id
LIMIT %s OFFSET %s) file_page
LIMIT %(per_page)s OFFSET %(offset)s
) file_page
"""
count = per_page
last_count = 1 # initialize non-zero
offset = 0
storage_usage_total = 0
with connection.cursor() as cursor:
while count:
cursor.execute(sql, [target_id, per_page, offset])
result = cursor.fetchall()
storage_usage_total += int(result[0][1]) if result[0][1] else 0
count = int(result[0][0]) if result[0][0] else 0
offset += count

key = cache_settings.STORAGE_USAGE_KEY.format(target_id=target_guid)
storage_usage_cache.set(key, storage_usage_total, settings.STORAGE_USAGE_CACHE_TIMEOUT)
while last_count:
cursor.execute(sql, {
'target_id': target_obj.pk,
'target_content_type_id': ContentType.objects.get_for_model(target_obj).pk,
'per_page': per_page,
'offset': offset,
})
page_count, size_sum = cursor.fetchall()[0]
storage_usage_total += int(size_sum or 0)
last_count = (page_count or 0)
offset += last_count
return storage_usage_total


def get_storage_usage_total(target_obj):
_storage_usage_total = None
if settings.ENABLE_STORAGE_USAGE_CACHE:
_cache_key = cache_settings.STORAGE_USAGE_KEY.format(target_id=target_obj._id)
_storage_usage_total = storage_usage_cache.get(_cache_key)
if _storage_usage_total is None:
_storage_usage_total = compute_storage_usage_total(target_obj)
if settings.ENABLE_STORAGE_USAGE_CACHE:
storage_usage_cache.set(_cache_key, _storage_usage_total, settings.STORAGE_USAGE_CACHE_TIMEOUT)
return _storage_usage_total


def update_storage_usage(target):
Expand Down
16 changes: 16 additions & 0 deletions osf/metadata/osf_gathering.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@
from django import db
import rdflib

from api.caching.tasks import get_storage_usage_total
from osf import models as osfdb
from osf.metadata import gather
from osf.metadata.rdfutils import (
Expand Down Expand Up @@ -213,19 +214,24 @@ def pls_get_magic_metadata_basket(osf_item) -> gather.Basket:
OSFMAP_SUPPLEMENT = {
OSF.Project: {
OSF.hasOsfAddon: None,
OSF.storageByteCount: None,
OSF.storageRegion: None,
},
OSF.ProjectComponent: {
OSF.hasOsfAddon: None,
OSF.storageByteCount: None,
OSF.storageRegion: None,
},
OSF.Registration: {
OSF.storageByteCount: None,
OSF.storageRegion: None,
},
OSF.RegistrationComponent: {
OSF.storageByteCount: None,
OSF.storageRegion: None,
},
OSF.Preprint: {
OSF.storageByteCount: None,
OSF.storageRegion: None,
},
OSF.File: {
Expand Down Expand Up @@ -1149,3 +1155,13 @@ def gather_storage_region(focus):
_region_ref = rdflib.URIRef(_region.absolute_api_v2_url)
yield (OSF.storageRegion, _region_ref)
yield (_region_ref, SKOS.prefLabel, rdflib.Literal(_region.name, lang='en'))


@gather.er(
OSF.storageByteCount,
focustype_iris=[OSF.Project, OSF.ProjectComponent, OSF.Registration, OSF.RegistrationComponent, OSF.Preprint]
)
def gather_storage_byte_count(focus):
_storage_usage_total = get_storage_usage_total(focus.dbmodel)
if _storage_usage_total is not None:
yield (OSF.storageByteCount, _storage_usage_total)
Original file line number Diff line number Diff line change
@@ -1,7 +1,8 @@
@prefix osf: <https://osf.io/vocab/2022/> .
@prefix skos: <http://www.w3.org/2004/02/skos/core#> .

<http://localhost:5000/w4ibb> osf:storageRegion <http://localhost:8000/v2/regions/us/> .
<http://localhost:5000/w4ibb> osf:storageByteCount 1337 ;
osf:storageRegion <http://localhost:8000/v2/regions/us/> .

<http://localhost:8000/v2/regions/us/> a osf:Region ;
skos:prefLabel "United States"@en .
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@
@prefix skos: <http://www.w3.org/2004/02/skos/core#> .

<http://localhost:5000/w2ibb> osf:hasOsfAddon <urn:osf.io:addons:gitlab> ;
osf:storageByteCount 7 ;
osf:storageRegion <http://localhost:8000/v2/regions/us/> .

<urn:osf.io:addons:gitlab> a osf:AddonImplementation ;
Expand Down
Original file line number Diff line number Diff line change
@@ -1,7 +1,8 @@
@prefix osf: <https://osf.io/vocab/2022/> .
@prefix skos: <http://www.w3.org/2004/02/skos/core#> .

<http://localhost:5000/w5ibb> osf:storageRegion <http://localhost:8000/v2/regions/us/> .
<http://localhost:5000/w5ibb> osf:storageByteCount 17 ;
osf:storageRegion <http://localhost:8000/v2/regions/us/> .

<http://localhost:8000/v2/regions/us/> a osf:Region ;
skos:prefLabel "United States"@en .
11 changes: 11 additions & 0 deletions osf_tests/metadata/test_osf_gathering.py
Original file line number Diff line number Diff line change
Expand Up @@ -821,3 +821,14 @@ def test_gather_storage_region(self):
(self.preprintfocus.iri, OSF.storageRegion, _default_region_ref),
(_default_region_ref, SKOS.prefLabel, Literal('United States', lang='en')),
})

def test_gather_storage_byte_count(self):
assert_triples(osf_gathering.gather_storage_byte_count(self.projectfocus), {
(self.projectfocus.iri, OSF.storageByteCount, Literal(123456)),
})
assert_triples(osf_gathering.gather_storage_byte_count(self.registrationfocus), {
(self.registrationfocus.iri, OSF.storageByteCount, Literal(0)),
})
assert_triples(osf_gathering.gather_storage_byte_count(self.preprintfocus), {
(self.preprintfocus.iri, OSF.storageByteCount, Literal(1337)),
})

0 comments on commit e1d04c1

Please sign in to comment.