Source code for swh.coarnotify.parsers

# Copyright (C) 2025 - 2026  The Software Heritage developers
# See the AUTHORS file at the top-level directory of this distribution
# License: GNU Affero General Public License version 3, or any later version
# See top-level LICENSE file for more information
from functools import lru_cache
import json
from pathlib import Path
from typing import Any

from pyld import jsonld
from pyld.documentloader.requests import requests_document_loader
from ratelimit import limits
from rest_framework.parsers import JSONParser

from .renderers import JSONLDRenderer


[docs] class JSONLDParser(JSONParser): media_type = "application/ld+json" renderer_class = JSONLDRenderer
LOCAL_SCHEMAS = { "codemeta-2.0.jsonld": [ "https://doi.org/10.5063/schema/codemeta-2.0", "https://raw.githubusercontent.com/codemeta/codemeta/master/codemeta.jsonld", "https://raw.githubusercontent.com/codemeta/codemeta/2.0/codemeta.jsonld", "https://doi.org/doi:10.5063/schema/codemeta-2.0", "http://purl.org/codemeta/2.0", ], "codemeta-3.0.jsonld": [ "https://w3id.org/codemeta/3.0", "https://raw.githubusercontent.com/codemeta/codemeta/3.0/codemeta.jsonld", ], "codemeta-3.1.jsonld": ["https://w3id.org/codemeta/3.1"], "activitystreams2-20200921.jsonld": [ "https://www.w3.org/ns/activitystreams", "http://www.w3.org/ns/activitystreams", ], "coarnotify-1.0.1.jsonld": [ "https://coar-notify.net", "http://coar-notify.net", "https://purl.org/coar/notify", ], "schemadotorg-29.4.jsonld": ["http://schema.org", "https://schema.org"], # TODO: ietf }
[docs] def safe_jsonld_expander(doc: dict) -> list[dict]: """Uses pyld with a custom document loader to expand a JSON-LD document. - uses local codemeta, activitystreams, schema.org & coarnotify schemas - rate limit calls for other schemas (in addition to pyld's 10 @context limit) """ return jsonld.expand(doc, options={"documentLoader": _document_loader})
@lru_cache def _read_local_context(name: str) -> Any: """Reads a local JSON-LD context.""" local_file = Path(__file__).parent / "contexts" / name return json.loads(local_file.read_bytes()) @lru_cache @limits(calls=60, period=60) def _fetch_remote_context(url: str) -> dict[str, Any]: """Rate limited external JSON-LD context fetcher.""" return requests_document_loader(secure=True)(url) def _document_loader(url: str, options=None) -> dict[str, Any]: """Document loader for pyld. Tries to read local well-known JSON-LD documents instead of fetching them from the Internet every single time and rate limit calls to external URLs. Raises: RateLimitException: too many external HTTP calls. Return: A dict containing the document as expected py pyld. """ cleaned_url = url.lower().rstrip("/") for local_schema, remote_urls in LOCAL_SCHEMAS.items(): if cleaned_url in remote_urls: return { "contextUrl": None, "documentUrl": url, "document": _read_local_context(local_schema), } return _fetch_remote_context(url)