Source code for swh.coarnotify.server.handlers

# Copyright (C) 2025 - 2026  The Software Heritage developers
# See the AUTHORS file at the top-level directory of this distribution
# License: GNU Affero General Public License version 3, or any later version
# See top-level LICENSE file for more information
"""Module dedicated to the handlers of COAR Notifications."""

from importlib.metadata import version
import json
from typing import Any

from django.conf import settings
from rdflib import Graph, URIRef

from swh.model.exceptions import ValidationError as SWHValidationError
from swh.model.model import (
    MetadataAuthority,
    MetadataAuthorityType,
    MetadataFetcher,
    RawExtrinsicMetadata,
)
from swh.model.swhids import ExtendedSWHID, QualifiedSWHID
from swh.storage import get_storage
from swh.storage.algos.swhid import swhid_is_known

from .models import Handlers, InboundNotification, Statuses
from .utils import create_accept_cn, reject, send_cn
from .validators import validate_mention


[docs] def mention(notification: InboundNotification) -> None: """Handle a mention COAR Notification. The software identifier sent in ``object.as:object`` could be an Origin URL or a SWHID. We need to check if it exists in the storage before saving the Raw Extrinsic Metadata. For an Origin URL we use storage.origin_get with and without a trailing slash which returns an ExtendedSWHID. For a SWHID we first check it's valid (it must be a CoreSWHID or QualifiedSWHID) then we convert it to an ExtendedSWHID so we can use it with swhid_is_known. If these checks fail the mention is rejected. Otherwise we send the CN with its context to the REM storage and send an Accept CN. Args: cn: an inbound CN """ graph = Graph() graph.parse(data=json.dumps(notification.payload), format="json-ld") root_id = URIRef(f"urn:uuid:{notification.id}") try: software_identifier = validate_mention(graph, root_id) except ValueError as exc: reject(notification, str(exc)) return software_is_archived = False qualified_swhid: QualifiedSWHID | None = None storage = get_storage(**settings.SWH_CONF["storage"]) if software_identifier.startswith("swh:"): try: qualified_swhid = QualifiedSWHID.from_string(software_identifier) extended_swhid = ExtendedSWHID.from_string( str(qualified_swhid.to_dict()["swhid"]) ) except SWHValidationError: error_message = f"{software_identifier} is not a valid SWHID" reject(notification, error_message) return software_is_archived = swhid_is_known(storage, extended_swhid) else: origin_urls = [software_identifier] origin_urls.append( software_identifier[:-1] if software_identifier.endswith("/") else f"{software_identifier}/" ) if results := [o for o in storage.origin_get(origin_urls) if o]: software_is_archived = True extended_swhid = results[0].swhid() # TODO: at some point we should trigger a SCN and reprocess the mention # instead of rejecting it because the software is missing from the archive if not software_is_archived: error_message = ( f"It looks like {software_identifier} has not yet been archived " "by Software Heritage. Please request a Save Code Now on it." ) reject(notification, error_message) return metadata_fetcher = MetadataFetcher( name="swh-coarnotify", version=version("swh-coarnotify") ) storage.metadata_fetcher_add([metadata_fetcher]) assert notification.sender metadata_authority = MetadataAuthority( type=MetadataAuthorityType.REGISTRY, url=notification.sender.url, ) storage.metadata_authority_add([metadata_authority]) # TODO: we extract context infos from the QualifiedSWHID here, but this should # be done in RawExtrinsicMetadata itself context: dict[str, Any] = {} if qualified_swhid: if qualified_swhid.anchor: context[qualified_swhid.anchor.object_type.name.lower()] = ( qualified_swhid.anchor ) for prop in ("origin", "visit", "path"): context[prop] = getattr(qualified_swhid, prop, None) try: metadata_object = RawExtrinsicMetadata( target=extended_swhid, discovery_date=notification.created_at, authority=metadata_authority, fetcher=metadata_fetcher, format="coarnotify-mention-v1", metadata=json.dumps(notification.payload).encode(), **context, ) except ValueError as exc: reject(notification, f"Something went wrong while storing the mention: {exc}.") return storage.raw_extrinsic_metadata_add([metadata_object]) notification.status = Statuses.ACCEPTED notification.save() accepted_cn = create_accept_cn( notification, summary=f"Stored mention for {software_identifier}" ) send_cn(accepted_cn)
handlers = { Handlers.MENTION: mention, }