Source code for swh.coarnotify.server.validators
# Copyright (C) 2026 The Software Heritage developers
# See the AUTHORS file at the top-level directory of this distribution
# License: GNU Affero General Public License version 3, or any later version
# See top-level LICENSE file for more information
"""Validators."""
from django.core.exceptions import ValidationError
from django.core.validators import URLValidator
from rdflib import Graph, Node, URIRef
from rdflib.exceptions import UniquenessError
from rdflib.namespace import RDF
from rest_framework import serializers
from swh.coarnotify.namespaces import AS, AS_ACTIVITIES, AS_OBJECTS, LDP
from swh.coarnotify.server.models import HANDLER_BY_TYPES
[docs]
def url_match(url1: str, url2: str) -> bool:
"""Compare two URLs ignoring trailing slashes.
Args:
url1: an URL
url2: another URL
Returns:
True if the url matches
"""
return url1.rstrip("/") == url2.rstrip("/")
[docs]
def simple_url_validator(url: str) -> bool:
"""Simple https? URL validator.
Uses django's ``URLValidator`` and returns False instead of raising an exception.
Args:
url: an URL to validate
Returns:
True if the URL is valid and its protocol is HTTP or HTTPS.
"""
url_validator = URLValidator(schemes=["http", "https"])
try:
url_validator(url)
except ValidationError:
return False
return True
[docs]
def validate_notification(
graph: Graph, root_id: URIRef, inbox_url: str
) -> frozenset[Node]:
"""Validate the generic COAR Notification shape.
https://coar-notify.net/specification/1.0.1/
Args:
graph: RDF graph
root_id: the document ID
inbox_url: our Inbox URL
Raises:
serializers.ValidationError: payload does not match the specs
Returns:
a set of types for the notification (to identify handler for it)
"""
errors: list[serializers.ValidationError] = []
# type is REQUIRED.
types = frozenset(graph.objects(root_id, URIRef(RDF.type)))
if not len(types):
errors.append(
serializers.ValidationError(
{"type": "The notification must have a RDF `type`"}
)
)
# It MUST include one of the Activity Stream 2.0 Activity Types.
if not types & AS_ACTIVITIES:
errors.append(
serializers.ValidationError(
{
"type": (
"The notification must have a RDF `type` Activity Streams "
"Activity `type`"
)
}
)
)
if frozenset(types) not in HANDLER_BY_TYPES:
errors.append(
serializers.ValidationError(
{
"type": (
"The notification RDF `type` is not handled by this application"
)
}
)
)
# origin is REQUIRED. It describes the system which has sent the notification.
try:
origin = graph.value(root_id, URIRef(AS.origin), any=False)
except UniquenessError:
origin = None
if not origin:
errors.append(
serializers.ValidationError(
{"origin": "The notification must have one Activity Streams `origin`"}
)
)
else:
# it MUST have an id which MUST be an HTTP URI identifying the sending system
if not isinstance(origin, URIRef): # will be a BNode if no `id` is defined
errors.append(
serializers.ValidationError(
{"object": "The Activity Streams `origin` must have an `id`"}
)
)
# it MUST have a type
if not graph.value(origin, URIRef(RDF.type)):
errors.append(
serializers.ValidationError(
{"origin": "The Activity Streams `origin` must have a RDF `type`"}
)
)
# SHOULD have an inbox which, when present, MUST have the HTTP URI of the inbox
origin_inbox = graph.value(origin, URIRef(LDP.inbox))
if origin_inbox and not simple_url_validator(str(origin_inbox)):
errors.append(
serializers.ValidationError(
{
"origin": (
"The Activity Streams `target` LDN `inbox` "
"must be a valid URL"
)
}
)
)
# target is REQUIRED. It describes the system which is intended to receive the
# notification.
try:
target = graph.value(root_id, URIRef(AS.target), any=False)
except UniquenessError:
target = None
if not target:
errors.append(
serializers.ValidationError(
{"target": "The notification must have one Activity Streams `target`"}
)
)
else:
# MUST have an id which MUST be an HTTP URI identifying the receiving system
if not isinstance(target, URIRef): # will be a BNode if no `id` is defined
errors.append(
serializers.ValidationError(
{"target": "The Activity Streams `target` must have an `id`"}
)
)
# MUST have a type
if not graph.value(target, URIRef(RDF.type)):
errors.append(
serializers.ValidationError(
{"target": "The Activity Streams `target` must have a RDF `type`"}
)
)
# MUST have an inbox which MUST have the HTTP URI of the inbox
target_inbox = graph.value(target, URIRef(LDP.inbox))
if not target_inbox:
errors.append(
serializers.ValidationError(
{
"target": (
"The Activity Streams `target` must have a LDN `inbox`"
)
}
)
)
elif str(target_inbox) != inbox_url:
errors.append(
serializers.ValidationError(
{
"target": (
f"Software Heritage inbox URL {inbox_url} does not match "
f"the Activity Streams `target` `inbox` {target_inbox}"
)
}
)
)
# object is REQUIRED. It is the focus of the activity.
try:
obj = graph.value(root_id, URIRef(AS.object), any=False)
except UniquenessError:
obj = None
if not obj:
errors.append(
serializers.ValidationError(
{"object": "The notification must have one Activity Streams `object`"}
)
)
# it MUST have an id which MUST be a URI identifying the object.
elif not isinstance(obj, URIRef): # will be a BNode if no `id` is defined
errors.append(
serializers.ValidationError(
{"object": "The Activity Streams `object` must have an `id`"}
)
)
if errors:
# mypy complains about the type here but it matches the docs
# https://www.django-rest-framework.org/api-guide/exceptions/#validationerror
raise serializers.ValidationError(errors) # type: ignore
return types
[docs]
def validate_mention(graph: Graph, root_id: URIRef) -> str:
"""Validate a CN Announce Relationship.
https://coar-notify.net/specification/1.0.1/announce-relationship/
This validation happens outside the Inbox API (it is triggered by the background
tasks) so instead of using ``serializers.ValidationError`` we simply raise a
``ValueError`` with a message listing all the errors we encountered.
Args:
graph: RDF graph
root_id: the document ID
Returns:
The SWHID or Origin URL mentioned
Raises:
ValueError: the mention is not valid
"""
errors: list[str] = []
# context is REQUIRED and MUST have an id which MUST be the HTTP URI of the
# "landing page" for the resource
try:
context = graph.value(root_id, URIRef(AS.context), any=False)
except UniquenessError:
context = None
if not context:
errors.append("The notification must have one Activity Streams `context`")
elif not isinstance(context, URIRef): # will be a BNode if no `id` is defined
errors.append("The Activity Streams `context` must have an `id`")
elif not simple_url_validator(str(context)):
errors.append(
"The Activity Streams `context` `id` is not a valid URL for the resource"
)
# ``object`` has already been validated by ``utils.validate_notification``, we know
# there's only one of it and it has an id
object_id = graph.value(root_id, URIRef(AS.object))
# object MUST have a type describing the relationship, which MUST include an
# Activity Streams 2.0 object type, and MAY include other values.
object_types = set(graph.objects(object_id, URIRef(RDF.type)))
if not object_types & AS_OBJECTS:
errors.append(
"The Activity Streams `object` must have an Activity Streams Object `type`"
)
# it also MUST have a 'triple' describing the relationship, in the form:
# as:subject: containing the URI for the local resource (The paper)
# as:relationship: containing a relationship URI
# as:object: containing the URI for the remote resource (a SWHID or an Origin URL)
for item in ["subject", "relationship", "object"]:
try:
obj_item = graph.value(object_id, AS[item], any=False)
except UniquenessError:
obj_item = None
if not obj_item:
errors.append(
f"The Activity Streams `object` must have one Activity Streams `{item}`"
)
if obj_item and item == "object":
software_identifier = str(obj_item)
# FIXME: CN specs (1.0.1) are a bit unclear about what should context_data contains,
# especially the id. It would be more logical to find the paper URI in the id and
# then some metadata about it, but instead we might find the software URI in the id
# and then metadata about the paper. We are trying to make some changes on the
# specs but meanwhile we'll skip verifying that context.id == object.as:subject
if errors:
raise ValueError(". ".join(errors))
return software_identifier