Source code for swh.coarnotify.server.handlers
# Copyright (C) 2025 The Software Heritage developers
# See the AUTHORS file at the top-level directory of this distribution
# License: GNU Affero General Public License version 3, or any later version
# See top-level LICENSE file for more information
"""Module dedicated to the handlers of COAR Notifications."""
from importlib.metadata import version
import json
from typing import Any, Callable
from django.conf import settings
from pyld import jsonld
from swh.model.exceptions import ValidationError as SWHValidationError
from swh.model.model import (
MetadataAuthority,
MetadataAuthorityType,
MetadataFetcher,
RawExtrinsicMetadata,
)
from swh.model.swhids import ExtendedSWHID, QualifiedSWHID
from swh.storage import get_storage
from swh.storage.algos.swhid import swhid_is_known
from .models import InboundNotification, Statuses
from .utils import create_accept_cn, reject, send_cn, to_sorted_tuple, unprocessable
CNHandler = Callable[[InboundNotification], None]
[docs]
def get_handler(notification: InboundNotification) -> CNHandler | None:
"""Get a CN handler from its type.
The list of handlers by type is defined in the ``handlers`` dict.
Args:
notification: an inbound CN
Raises:
UnprocessableException: no handler available for cn
Returns:
A COAR Notification handler if one matches
"""
type_ = to_sorted_tuple(notification.payload["type"])
try:
return handlers[type_]
except KeyError:
error_message = f"Unable to process {', '.join(type_)} COAR Notifications"
unprocessable(notification, error_message)
return None
[docs]
def mention(notification: InboundNotification) -> None:
"""Handle a mention COAR Notification.
The software identifier sent in ``object.as:object`` could be an Origin URL or a
SWHID. We need to check if it exists in the storage before saving the Raw Extrinsic
Metadata.
For an Origin URL we use storage.origin_get with and without a trailing slash which
returns an ExtendedSWHID.
For a SWHID we first check it's valid (it must be a CoreSWHID or QualifiedSWHID)
then we convert it to an ExtendedSWHID so we can use it with swhid_is_known.
If these checks fail the mention is rejected.
Otherwise we send the CN with its context to the REM storage and send an Accept CN.
Args:
cn: an inbound CN
"""
context_data = notification.payload["context"] # describes the paper
object_data = notification.payload["object"] # describes the relationship
# FIXME: CN specs (1.0.1) are a bit unclear about what should context_data contains,
# especially the id. It would be more logical to find the paper URI in the id and
# then some metadata about it, but instead we might find the software URI in the id
# and then metadata about the paper. We are trying to make some changes on the
# specs but meanwhile we'll skip verifying that context.id == object.as:subject
context_type = to_sorted_tuple(context_data["type"])
if "sorg:AboutPage" not in context_type:
error_message = "Context type does not contain sorg:AboutPage"
reject(notification, error_message)
return
storage = get_storage(**settings.SWH_CONF["storage"])
software_identifier = object_data["as:object"]
software_is_archived = False
qualified_swhid: QualifiedSWHID | None = None
if software_identifier.startswith("swh:"):
try:
qualified_swhid = QualifiedSWHID.from_string(software_identifier)
extended_swhid = ExtendedSWHID.from_string(
str(qualified_swhid.to_dict()["swhid"])
)
except SWHValidationError:
error_message = f"{software_identifier} is not a valid SWHID"
reject(notification, error_message)
return
software_is_archived = swhid_is_known(storage, extended_swhid)
else:
origin_urls = [software_identifier]
origin_urls.append(
software_identifier[:-1]
if software_identifier.endswith("/")
else f"{software_identifier}/"
)
if results := [o for o in storage.origin_get(origin_urls) if o]:
software_is_archived = True
extended_swhid = results[0].swhid()
# TODO: at some point we should trigger a SCN and reprocess the mention
# instead of rejecting it because the software is missing from the archive
if not software_is_archived:
error_message = (
f"It looks like {software_identifier} has not yet been archived "
"by Software Heritage. Please request a Save Code Now on it."
)
reject(notification, error_message)
return
metadata_fetcher = MetadataFetcher(
name="swh-coarnotify", version=version("swh-coarnotify")
)
storage.metadata_fetcher_add([metadata_fetcher])
metadata_authority = MetadataAuthority(
type=MetadataAuthorityType.REGISTRY,
url=notification.payload["origin"]["id"],
)
storage.metadata_authority_add([metadata_authority])
# TODO: we extract context infos from the QualifiedSWHID here, but this should
# be done in RawExtrinsicMetadata itself
context: dict[str, Any] = {}
if qualified_swhid:
if qualified_swhid.anchor:
context[qualified_swhid.anchor.object_type.name.lower()] = (
qualified_swhid.anchor
)
for prop in ("origin", "visit", "path"):
context[prop] = getattr(qualified_swhid, prop, None)
expanded_payload = jsonld.expand(notification.payload)
try:
metadata_object = RawExtrinsicMetadata(
target=extended_swhid,
discovery_date=notification.created_at,
authority=metadata_authority,
fetcher=metadata_fetcher,
format="coarnotify-mention-v1",
metadata=json.dumps(expanded_payload).encode(),
**context,
)
except ValueError as exc:
error_message = f"Something went wrong while storing the mention: {exc}."
reject(notification, error_message)
return
storage.raw_extrinsic_metadata_add([metadata_object])
notification.status = Statuses.ACCEPTED
notification.save()
accepted_cn = create_accept_cn(
notification, summary=f"Stored mention for {software_identifier}"
)
send_cn(accepted_cn)
handlers = {
("Announce", "RelationshipAction"): mention,
}