Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

apiharvester: cantook harvesting #3802

Merged
merged 1 commit into from
Jan 16, 2025
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
17 changes: 11 additions & 6 deletions data/apisources.yml
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
# -*- coding: utf-8 -*-
#
# RERO ILS
# Copyright (C) 2019 RERO
# Copyright (C) 2024 RERO
#
# This program is free software: you can redistribute it and/or modify
# it under the terms of the GNU Affero General Public License as published by
Expand All @@ -16,8 +16,13 @@
# along with this program. If not, see <http://www.gnu.org/licenses/>.


# OAI-PMH harvester configuration.
mef:
url: http://mef.test.rero.ch/api/mef
comment: 'mef persons'
size: 1000
# API harvester configuration.
VS-CANTOOK:
url: https://mediatheque-valais.cantookstation.eu
classname: 'rero_ils.modules.api_harvester.cantook.api.ApiCantook'
code: 'mv-cantook'

NJ-CANTOOK:
url: https://bm.ebibliomedia.ch
classname: 'rero_ils.modules.api_harvester.cantook.api.ApiCantook'
code: 'ebibliomedia'
6 changes: 2 additions & 4 deletions pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -154,7 +154,6 @@ reverse = "rero_ils.dojson.cli:reverse"
pjson = "rero_ils.dojson.cli:pretty_json_dump"

[tool.poetry.plugins."dojson.cli.rule"]
marc21_ebooks_to_json = "rero_ils.modules.ebooks.dojson.contrib.marc21:marc21"
marc21_dnb_to_json = "rero_ils.modules.documents.dojson.contrib.marc21tojson:marc21_dnb"
marc21_kul_to_json = "rero_ils.modules.documents.dojson.contrib.marc21tojson:marc21_kul"
marc21_loc_to_json = "rero_ils.modules.documents.dojson.contrib.marc21tojson:marc21_loc"
Expand Down Expand Up @@ -223,11 +222,10 @@ users = "rero_ils.modules.users.views:blueprint"
nooppid = "rero_ils.converters:NoopPIDConverter"

[tool.poetry.plugins."invenio_celery.tasks"]
apiharvester = "rero_ils.modules.apiharvester.tasks"
api_harvester = "rero_ils.modules.api_harvester.tasks"
collections = "rero_ils.modules.collections.tasks"
documents = "rero_ils.modules.documents.tasks"
remote_entities = "rero_ils.modules.entities.remote_entities.tasks"
ebooks = "rero_ils.modules.ebooks.tasks"
holdings = "rero_ils.modules.holdings.tasks"
items = "rero_ils.modules.items.tasks"
loans = "rero_ils.modules.loans.tasks"
Expand All @@ -252,7 +250,7 @@ acq_order_lines = "rero_ils.modules.acquisition.acq_order_lines.models"
acq_orders = "rero_ils.modules.acquisition.acq_orders.models"
acq_receipt_lines = "rero_ils.modules.acquisition.acq_receipt_lines.models"
acq_receipts = "rero_ils.modules.acquisition.acq_receipts.models"
apiharvester = "rero_ils.modules.apiharvester.models"
api_harvester = "rero_ils.modules.api_harvester.models"
budgets = "rero_ils.modules.acquisition.budgets.models"
circ_policies = "rero_ils.modules.circ_policies.models"
collections = "rero_ils.modules.collections.models"
Expand Down
24 changes: 12 additions & 12 deletions rero_ils/config.py
Original file line number Diff line number Diff line change
Expand Up @@ -392,12 +392,6 @@ def _(x):
"schedule": timedelta(minutes=60),
"enabled": False,
},
"ebooks-harvester": {
"task": "invenio_oaiharvester.tasks.list_records_from_dates",
"schedule": crontab(minute=22, hour=22),
"kwargs": {"name": "ebooks"},
"enabled": False,
},
"notification-creation": {
"task": "rero_ils.modules.notifications.tasks.create_notifications",
"schedule": crontab(minute=0, hour=3), # Every day at 05:00 UTC,
Expand Down Expand Up @@ -526,12 +520,18 @@ def _(x):
"kwargs": {"delete": True},
"enabled": False,
},
# "mef-harvester": {
# "task": "rero_ils.modules.apiharvester.tasks.harvest_records",
# "schedule": timedelta(minutes=60),
# "kwargs": {"name": "mef", "enabled": False),
# "enabled": False,
# },
"harvest-vs-cantook": {
"task": "rero_ils.modules.api_harvester.tasks.harvest_records",
"schedule": crontab(minute=33, hour=3), # Every day at 03:33 UTC,
"kwargs": {"name": "VS-CANTOOK"},
"enabled": False,
},
"harvest-nj-cantook": {
"task": "rero_ils.modules.api_harvester.tasks.harvest_records",
"schedule": crontab(minute=44, hour=4), # Every day at 04:44 UTC,
"kwargs": {"name": "NJ-CANTOOK"},
"enabled": False,
},
}

CELERY_BROKER_HEARTBEAT = 0
Expand Down
166 changes: 166 additions & 0 deletions rero_ils/modules/api_harvester/api.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,166 @@
# -*- coding: utf-8 -*-
#
# RERO ILS
# Copyright (C) 2024 RERO
#
# This program is free software: you can redistribute it and/or modify
# it under the terms of the GNU Affero General Public License as published by
# the Free Software Foundation, version 3 of the License.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU Affero General Public License for more details.
#
# You should have received a copy of the GNU Affero General Public License
# along with this program. If not, see <http://www.gnu.org/licenses/>.

"""API for cantook records."""

from __future__ import absolute_import, print_function

import click

from rero_ils.modules.api_harvester.models import ApiHarvestConfig
from rero_ils.modules.locations.api import Location
from rero_ils.modules.organisations.api import Organisation


class ApiHarvest:
"""ApiHarvest class."""

def __init__(
self, name, file_name=None, process=False, harvest_count=-1, verbose=False
):
"""Class init.

:param name: name of API config
:param file_name: to save records to file
:param process: create harvested records
:param harvest_count: how many records to harvest
:param verbose: print verbose messages
"""
config = self.get_config(name)
if not config:
raise NameError(f"API Config not found: {name}")
self.config = config
self.file = file_name
self.process = process
self.harvest_count = harvest_count
self.verbose = verbose
self._vendor = None
self._url = self.config.url
self._code = self.config.code
self._count = 0
self._count_new = 0
self._count_upd = 0
self._count_del = 0
info = {}
for organisation in Organisation.get_records_by_online_harvested_source(
self._code
):
locations = {}
for location_pid in organisation.get_online_locations():
locations[location_pid] = None
location = Location.get_record_by_pid(location_pid)
library = location.get_library()
if url := library.get_online_harvested_source_url(source=self._code):
locations[location_pid] = url
info[organisation.pid] = {
"item_type_pid": organisation.online_circulation_category(),
"locations": locations,
rerowep marked this conversation as resolved.
Show resolved Hide resolved
}
self._info = info

@classmethod
def get_config(cls, name):
"""Get config.

:param name: name of config
:returns: API config
"""
return ApiHarvestConfig.query.filter_by(name=name).first()

def get_request_url(self, start_date="1990-01-01", page=1):
"""Get request URL.

:param start_date: date from where records has to be harvested
:param page: page from where records have to be harvested
"""
raise NotImplementedError()

def create_update_record(self, record):
"""Create new record or update record.

:param record: record to create or update
"""
raise NotImplementedError()

def save_record(self, record):
"""Save record to file.

:param record: record to write to file
"""
if self.file:
self.file.write(record)

def msg_text(self, pid, msg):
"""Logging message text.

:param pid: pid for message text
:param msg: msg text for message
:returns: string message
"""
return f"{self._count}: {self._vendor}:{self._code} {pid} = {msg}"

def process_records(self, records):
"""Process records.

:param records: records to process
"""
for record in records:
if self.harvest_count >= 0 and self._count >= self.harvest_count:
break
self._count += 1
self.save_record(record)
if self.process:
pid, status = self.create_update_record(record)
self.verbose_print(self.msg_text(pid=pid, msg=status.value))

def verbose_print(self, msg):
"""Print verbose message.

:param msg: message to print if verbose
"""
if self.verbose:
click.echo(msg)

def harvest_records(self, from_date):
"""Harvest records from servers.

:param from_date: records changed after this date to harvest
:returns: count and count of records processed
"""
records = []
self.process_records(records=records)
return self._count, len(records)

@property
def count(self):
"""Get count."""
return self._count

@property
def count_new(self):
"""Get new count."""
return self._count_new

@property
def count_upd(self):
"""Get updated count."""
return self._count_upd

@property
def count_del(self):
"""Get deleted count."""
return self._count_del
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
# -*- coding: utf-8 -*-
#
# RERO ILS
# Copyright (C) 2019-2022 RERO
# Copyright (C) 2024 RERO
#
# This program is free software: you can redistribute it and/or modify
# it under the terms of the GNU Affero General Public License as published by
Expand All @@ -15,4 +15,4 @@
# You should have received a copy of the GNU Affero General Public License
# along with this program. If not, see <http://www.gnu.org/licenses/>.

"""JSON schemas."""
"""ApiCantook."""
Loading
Loading