Source code for swh.coarnotify.parsers
# Copyright (C) 2025 - 2026 The Software Heritage developers
# See the AUTHORS file at the top-level directory of this distribution
# License: GNU Affero General Public License version 3, or any later version
# See top-level LICENSE file for more information
from functools import lru_cache
import json
from pathlib import Path
from typing import Any
from pyld import jsonld
from pyld.documentloader.requests import requests_document_loader
from ratelimit import limits
from rest_framework.parsers import JSONParser
from .renderers import JSONLDRenderer
[docs]
class JSONLDParser(JSONParser):
media_type = "application/ld+json"
renderer_class = JSONLDRenderer
LOCAL_SCHEMAS = {
"codemeta-2.0.jsonld": [
"https://doi.org/10.5063/schema/codemeta-2.0",
"https://raw.githubusercontent.com/codemeta/codemeta/master/codemeta.jsonld",
"https://raw.githubusercontent.com/codemeta/codemeta/2.0/codemeta.jsonld",
"https://doi.org/doi:10.5063/schema/codemeta-2.0",
"http://purl.org/codemeta/2.0",
],
"codemeta-3.0.jsonld": [
"https://w3id.org/codemeta/3.0",
"https://raw.githubusercontent.com/codemeta/codemeta/3.0/codemeta.jsonld",
],
"codemeta-3.1.jsonld": ["https://w3id.org/codemeta/3.1"],
"activitystreams2-20200921.jsonld": [
"https://www.w3.org/ns/activitystreams",
"http://www.w3.org/ns/activitystreams",
],
"coarnotify-1.0.1.jsonld": [
"https://coar-notify.net",
"http://coar-notify.net",
"https://purl.org/coar/notify",
],
"schemadotorg-29.4.jsonld": ["http://schema.org", "https://schema.org"],
# TODO: ietf
}
[docs]
def safe_jsonld_expander(doc: dict) -> list[dict]:
"""Uses pyld with a custom document loader to expand a JSON-LD document.
- uses local codemeta, activitystreams, schema.org & coarnotify schemas
- rate limit calls for other schemas (in addition to pyld's 10 @context limit)
"""
return jsonld.expand(doc, options={"documentLoader": _document_loader})
@lru_cache
def _read_local_context(name: str) -> Any:
"""Reads a local JSON-LD context."""
local_file = Path(__file__).parent / "contexts" / name
return json.loads(local_file.read_bytes())
@lru_cache
@limits(calls=60, period=60)
def _fetch_remote_context(url: str) -> dict[str, Any]:
"""Rate limited external JSON-LD context fetcher."""
return requests_document_loader(secure=True)(url)
def _document_loader(url: str, options=None) -> dict[str, Any]:
"""Document loader for pyld.
Tries to read local well-known JSON-LD documents instead of fetching them from
the Internet every single time and rate limit calls to external URLs.
Raises:
RateLimitException: too many external HTTP calls.
Return:
A dict containing the document as expected py pyld.
"""
cleaned_url = url.lower().rstrip("/")
for local_schema, remote_urls in LOCAL_SCHEMAS.items():
if cleaned_url in remote_urls:
return {
"contextUrl": None,
"documentUrl": url,
"document": _read_local_context(local_schema),
}
return _fetch_remote_context(url)