Source code for swh.coarnotify.server.handlers

# Copyright (C) 2025  The Software Heritage developers
# See the AUTHORS file at the top-level directory of this distribution
# License: GNU Affero General Public License version 3, or any later version
# See top-level LICENSE file for more information
"""Module dedicated to the handlers of COAR Notifications."""

from importlib.metadata import version
import json
from typing import Any, Callable

from django.conf import settings
from pyld import jsonld

from swh.model.exceptions import ValidationError as SWHValidationError
from swh.model.model import (
    MetadataAuthority,
    MetadataAuthorityType,
    MetadataFetcher,
    RawExtrinsicMetadata,
)
from swh.model.swhids import ExtendedSWHID, QualifiedSWHID
from swh.storage import get_storage
from swh.storage.algos.swhid import swhid_is_known

from .models import InboundNotification, Statuses
from .utils import create_accept_cn, reject, send_cn, to_sorted_tuple, unprocessable

CNHandler = Callable[[InboundNotification], None]


[docs] def get_handler(notification: InboundNotification) -> CNHandler | None: """Get a CN handler from its type. The list of handlers by type is defined in the ``handlers`` dict. Args: notification: an inbound CN Raises: UnprocessableException: no handler available for cn Returns: A COAR Notification handler if one matches """ type_ = to_sorted_tuple(notification.payload["type"]) try: return handlers[type_] except KeyError: error_message = f"Unable to process {', '.join(type_)} COAR Notifications" unprocessable(notification, error_message) return None
[docs] def mention(notification: InboundNotification) -> None: """Handle a mention COAR Notification. The software identifier sent in ``object.as:object`` could be an Origin URL or a SWHID. We need to check if it exists in the storage before saving the Raw Extrinsic Metadata. For an Origin URL we use storage.origin_get with and without a trailing slash which returns an ExtendedSWHID. For a SWHID we first check it's valid (it must be a CoreSWHID or QualifiedSWHID) then we convert it to an ExtendedSWHID so we can use it with swhid_is_known. If these checks fail the mention is rejected. Otherwise we send the CN with its context to the REM storage and send an Accept CN. Args: cn: an inbound CN """ context_data = notification.payload["context"] # describes the paper object_data = notification.payload["object"] # describes the relationship # FIXME: CN specs (1.0.1) are a bit unclear about what should context_data contains, # especially the id. It would be more logical to find the paper URI in the id and # then some metadata about it, but instead we might find the software URI in the id # and then metadata about the paper. We are trying to make some changes on the # specs but meanwhile we'll skip verifying that context.id == object.as:subject context_type = to_sorted_tuple(context_data["type"]) if "sorg:AboutPage" not in context_type: error_message = "Context type does not contain sorg:AboutPage" reject(notification, error_message) return storage = get_storage(**settings.SWH_CONF["storage"]) software_identifier = object_data["as:object"] software_is_archived = False qualified_swhid: QualifiedSWHID | None = None if software_identifier.startswith("swh:"): try: qualified_swhid = QualifiedSWHID.from_string(software_identifier) extended_swhid = ExtendedSWHID.from_string( str(qualified_swhid.to_dict()["swhid"]) ) except SWHValidationError: error_message = f"{software_identifier} is not a valid SWHID" reject(notification, error_message) return software_is_archived = swhid_is_known(storage, extended_swhid) else: origin_urls = [software_identifier] origin_urls.append( software_identifier[:-1] if software_identifier.endswith("/") else f"{software_identifier}/" ) if results := [o for o in storage.origin_get(origin_urls) if o]: software_is_archived = True extended_swhid = results[0].swhid() # TODO: at some point we should trigger a SCN and reprocess the mention # instead of rejecting it because the software is missing from the archive if not software_is_archived: error_message = ( f"It looks like {software_identifier} has not yet been archived " "by Software Heritage. Please request a Save Code Now on it." ) reject(notification, error_message) return metadata_fetcher = MetadataFetcher( name="swh-coarnotify", version=version("swh-coarnotify") ) storage.metadata_fetcher_add([metadata_fetcher]) metadata_authority = MetadataAuthority( type=MetadataAuthorityType.REGISTRY, url=notification.payload["origin"]["id"], ) storage.metadata_authority_add([metadata_authority]) # TODO: we extract context infos from the QualifiedSWHID here, but this should # be done in RawExtrinsicMetadata itself context: dict[str, Any] = {} if qualified_swhid: if qualified_swhid.anchor: context[qualified_swhid.anchor.object_type.name.lower()] = ( qualified_swhid.anchor ) for prop in ("origin", "visit", "path"): context[prop] = getattr(qualified_swhid, prop, None) expanded_payload = jsonld.expand(notification.payload) try: metadata_object = RawExtrinsicMetadata( target=extended_swhid, discovery_date=notification.created_at, authority=metadata_authority, fetcher=metadata_fetcher, format="coarnotify-mention-v1", metadata=json.dumps(expanded_payload).encode(), **context, ) except ValueError as exc: error_message = f"Something went wrong while storing the mention: {exc}." reject(notification, error_message) return storage.raw_extrinsic_metadata_add([metadata_object]) notification.status = Statuses.ACCEPTED notification.save() accepted_cn = create_accept_cn( notification, summary=f"Stored mention for {software_identifier}" ) send_cn(accepted_cn)
handlers = { ("Announce", "RelationshipAction"): mention, }