Skip to content

Commit

Permalink
stats: add new fields for migrated stats
Browse files Browse the repository at this point in the history
  • Loading branch information
zzacharo committed Dec 19, 2024
1 parent cf8f25d commit 0658289
Show file tree
Hide file tree
Showing 10 changed files with 253 additions and 1 deletion.
10 changes: 9 additions & 1 deletion invenio.cfg
Original file line number Diff line number Diff line change
Expand Up @@ -19,8 +19,8 @@ from cds_rdm.permissions import (
CDSRDMPreservationSyncPermissionPolicy,
)
from cds_rdm.files import storage_factory
from invenio_app_rdm.config import CELERY_BEAT_SCHEDULE as APP_RDM_CELERY_BEAT_SCHEDULE
from celery.schedules import crontab
from invenio_app_rdm.config import STATS_EVENTS as _APP_RDM_STATS_EVENTS
from invenio_vocabularies.services.custom_fields import VocabularyCF
from invenio_records_resources.services.custom_fields import KeywordCF
from invenio_rdm_records.config import (
Expand Down Expand Up @@ -550,3 +550,11 @@ VOCABULARIES_NAMES_SCHEMES = {
"lcds": {"label": _("CDS"), "validator": schemes.is_legacy_cds, "datacite": "CDS"},
}
"""Names allowed identifier schemes."""


# Invenio Stats
# =============

# We override the templates to add new fields needed for the migrated statistic events
_APP_RDM_STATS_EVENTS["file-download"]["templates"] = "cds_rdm.stats.templates.events.file_download"
_APP_RDM_STATS_EVENTS["record-view"]["templates"] = "cds_rdm.stats.templates.events.record_view"
8 changes: 8 additions & 0 deletions site/cds_rdm/stats/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@
# -*- coding: utf-8 -*-
#
# Copyright (C) 2024 CERN.
#
# CDS-RDM is free software; you can redistribute it and/or modify it under
# the terms of the MIT License; see LICENSE file for more details.

"""CDS-RDM Statistics search index templates."""
8 changes: 8 additions & 0 deletions site/cds_rdm/stats/templates/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@
# -*- coding: utf-8 -*-
#
# Copyright (C) 2024 CERN.
#
# CDS-RDM is free software; you can redistribute it and/or modify it under
# the terms of the MIT License; see LICENSE file for more details.

"""CDS-RDM Statistics search index templates."""
8 changes: 8 additions & 0 deletions site/cds_rdm/stats/templates/events/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@
# -*- coding: utf-8 -*-
#
# Copyright (C) 2024 CERN.
#
# CDS-RDM is free software; you can redistribute it and/or modify it under
# the terms of the MIT License; see LICENSE file for more details.

"""Statistics events search index templates."""
8 changes: 8 additions & 0 deletions site/cds_rdm/stats/templates/events/file_download/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@
# -*- coding: utf-8 -*-
#
# Copyright (C) 2024 CERN.
#
# CDS-RDM is free software; you can redistribute it and/or modify it under
# the terms of the MIT License; see LICENSE file for more details.

"""File download event search index templates."""
Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@
# -*- coding: utf-8 -*-
#
# Copyright (C) 2024 CERN.
#
# CDS-RDM is free software; you can redistribute it and/or modify it under
# the terms of the MIT License; see LICENSE file for more details.

"""File download event OpenSearch index templates."""
Original file line number Diff line number Diff line change
@@ -0,0 +1,104 @@
{
"index_patterns": ["__SEARCH_INDEX_PREFIX__events-stats-file-download-*"],
"settings": {
"index": {
"refresh_interval": "5s"
}
},
"mappings": {
"dynamic_templates": [
{
"date_fields": {
"match_mapping_type": "date",
"mapping": {
"type": "date",
"format": "strict_date_hour_minute_second"
}
}
}
],
"date_detection": false,
"dynamic": "strict",
"numeric_detection": false,
"properties": {
"timestamp": {
"type": "date",
"format": "strict_date_hour_minute_second"
},
"bucket_id": {
"type": "keyword"
},
"file_id": {
"type": "keyword"
},
"file_key": {
"type": "keyword"
},
"unique_id": {
"type": "keyword"
},
"country": {
"type": "keyword"
},
"visitor_id": {
"type": "keyword"
},
"is_machine": {
"type": "boolean"
},
"is_robot": {
"type": "boolean"
},
"unique_session_id": {
"type": "keyword"
},
"size": {
"type": "double"
},
"referrer": {
"type": "keyword"
},
"ip_address": {
"type": "keyword"
},
"user_agent": {
"type": "keyword"
},
"user_id": {
"type": "keyword"
},
"session_id": {
"type": "keyword"
},
"record_id": {
"type": "keyword"
},
"recid": {
"type": "keyword"
},
"parent_id": {
"type": "keyword"
},
"parent_recid": {
"type": "keyword"
},
"via_api": {
"type": "boolean"
},
"is_lcds": {
"type": "boolean",
"description": "This field marks all statistical events that have been migrated from the legacy CDS system."
},
"before_COUNTER": {
"type": "boolean",
"description": "This field applies to all migrated events where no information was available to determine whether they were human or robot events. This was later resolved with the implementation of a proper robot-checking mechanism, ensuring COUNTER compliance."
},
"updated_timestamp": {
"type": "date"
}
}
},
"aliases": {
"__SEARCH_INDEX_PREFIX__events-stats-file-download": {}
}
}
8 changes: 8 additions & 0 deletions site/cds_rdm/stats/templates/events/record_view/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@
# -*- coding: utf-8 -*-
#
# Copyright (C) 2024 CERN.
#
# CDS-RDM is free software; you can redistribute it and/or modify it under
# the terms of the MIT License; see LICENSE file for more details.

"""Record views search index templates."""
Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@
# -*- coding: utf-8 -*-
#
# Copyright (C) 2024 CERN.
#
# CDS-RDM is free software; you can redistribute it and/or modify it under
# the terms of the MIT License; see LICENSE file for more details.

"""Record view event OpenSearch index templates."""
Original file line number Diff line number Diff line change
@@ -0,0 +1,84 @@
{
"index_patterns": ["__SEARCH_INDEX_PREFIX__events-stats-record-view-*"],
"settings": {
"index": {
"refresh_interval": "5s"
}
},
"mappings": {
"date_detection": false,
"dynamic": "strict",
"numeric_detection": false,
"properties": {
"timestamp": {
"type": "date",
"format": "strict_date_hour_minute_second"
},
"labels": {
"type": "keyword"
},
"country": {
"type": "keyword"
},
"visitor_id": {
"type": "keyword"
},
"is_machine": {
"type": "boolean"
},
"is_robot": {
"type": "boolean"
},
"unique_id": {
"type": "keyword"
},
"unique_session_id": {
"type": "keyword"
},
"referrer": {
"type": "keyword"
},
"ip_address": {
"type": "keyword"
},
"user_agent": {
"type": "keyword"
},
"user_id": {
"type": "keyword"
},
"session_id":{
"type": "keyword"
},
"record_id": {
"type": "keyword"
},
"recid": {
"type": "keyword"
},
"parent_id": {
"type": "keyword"
},
"parent_recid": {
"type": "keyword"
},
"via_api": {
"type": "boolean"
},
"is_lcds": {
"type": "boolean",
"description": "This field marks all statistical events that have been migrated from the legacy CDS system."
},
"before_COUNTER": {
"type": "boolean",
"description": "This field applies to all migrated events where no information was available to determine whether they were human or robot events. This was later resolved with the implementation of a proper robot-checking mechanism, ensuring COUNTER compliance."
},
"updated_timestamp": {
"type": "date"
}
}
},
"aliases": {
"__SEARCH_INDEX_PREFIX__events-stats-record-view": {}
}
}

0 comments on commit 0658289

Please sign in to comment.