Source code for swh.indexer.citation.csl
# Copyright (C) 2026 The Software Heritage developers
# See the AUTHORS file at the top-level directory of this distribution
# License: GNU General Public License version 3, or any later version
# See top-level LICENSE file for more information
import json
import iso8601
from swh.indexer.citation.codemeta_data import CodeMetaData, CodeMetaPerson
from swh.indexer.namespaces import SPDX_LICENSES
[docs]
def codemeta_person_to_csl(person: CodeMetaPerson) -> dict[str, str]:
csl_person: dict[str, str] = {}
if person.is_organization:
csl_person["family"] = "".join(person.names)
if person.given_names:
csl_person["given"] = "".join(person.given_names)
if person.family_names:
csl_person["family"] = csl_person.get("family", "") + "".join(
person.family_names
)
if not person.is_organization and person.names and not csl_person:
# CSL expects separated name parts; fall back to a simple split when
# codemeta provides only `name`.
full_name = person.names[-1].strip()
name_parts = full_name.split()
if len(name_parts) == 1:
csl_person["family"] = name_parts[0]
elif name_parts:
csl_person["given"] = " ".join(name_parts[:-1])
csl_person["family"] = name_parts[-1]
return csl_person
[docs]
def codemeta_data_to_csl(
codemeta_data: CodeMetaData,
) -> str:
swhid = codemeta_data.swhid
csl: dict = {"type": "software"}
if codemeta_data.name:
csl["title"] = codemeta_data.name
if codemeta_data.description:
csl["abstract"] = codemeta_data.description
authors: list[dict[str, str]] = []
for author in codemeta_data.author or []:
csl_author = codemeta_person_to_csl(author)
if csl_author and csl_author not in authors:
authors.append(csl_author)
if authors:
csl["author"] = authors
date = (
codemeta_data.datePublished
or codemeta_data.dateCreated
or codemeta_data.dateModified
)
if date:
try:
parsed = iso8601.parse_date(date)
csl["issued"] = {"date-parts": [[parsed.year, parsed.month, parsed.day]]}
except iso8601.ParseError:
pass
for identifier in codemeta_data.identifier or []:
if identifier.startswith("https://doi.org/"):
csl["DOI"] = identifier
if swhid:
csl["id"] = str(swhid)
if codemeta_data.publisher:
p = codemeta_data.publisher[0]
if p.names:
publisher = "".join(p.names)
else:
given = "".join(p.given_names)
family = "".join(p.family_names)
publisher = f"{given} {family}" if given and family else given or family
if publisher:
csl["publisher"] = publisher
if codemeta_data.codeRepository:
csl["source"] = codemeta_data.codeRepository
csl_url_candidates = (
codemeta_data.url,
codemeta_data.relatedLink,
codemeta_data.downloadUrl,
codemeta_data.installUrl,
)
for csl_url in csl_url_candidates:
if csl_url is not None:
csl["URL"] = csl_url
break
licenses = []
for license in codemeta_data.license or []:
if license.startswith(str(SPDX_LICENSES)):
license_name = license[len(str(SPDX_LICENSES)) :]
if license_name.endswith(".html"):
license_name = license_name[:-5]
licenses.append(license_name)
if licenses:
csl["license"] = " and ".join(licenses)
version = codemeta_data.softwareVersion or codemeta_data.version
if version:
csl["version"] = version
return json.dumps(csl, indent=2)