# Copyright (C) 2024 The Software Heritage developers
# See the AUTHORS file at the top-level directory of this distribution
# License: GNU Affero General Public License version 3, or any later version
# See top-level LICENSE file for more information
from typing import Any, Dict, List, Optional
import yaml
from swh.indexer.bibtex import cff_to_bibtex, codemeta_to_bibtex
from swh.model.hashutil import hash_to_bytes
from swh.model.swhids import CoreSWHID, ObjectType, QualifiedSWHID
from swh.web.browse.snapshot_context import get_snapshot_context
from swh.web.utils.archive import (
lookup_intrinsic_citation_metadata_by_target_swhid,
lookup_origin_intrinsic_citation_metadata,
lookup_snapshot,
lookup_snapshot_alias,
)
from swh.web.utils.identifiers import get_swhids_info
from swh.web.utils.typing import (
Citation,
IntrinsicMetadataFile,
IntrinsicMetadataFiletype,
SWHObjectInfo,
)
def _get_bibtex_from_intrinsic_citation_metadata(
raw_intrinsic_metadata: List[IntrinsicMetadataFile],
swhid: Optional[QualifiedSWHID] = None,
) -> Citation:
# Should not happen: metadata contains at least one of codemeta.json or citation.cff
assert len(raw_intrinsic_metadata) > 0, (
f"lookup_origin_raw_intrinsic_metadata returned neither codemeta.json "
f"nor citation.cff: {raw_intrinsic_metadata}"
)
metadata_file = raw_intrinsic_metadata[0]
metadata_file_origin_type = metadata_file["type"]
metadata_file_id = metadata_file["id"]
source_swhid_params: Dict[str, Any] = {
"object_type": ObjectType.CONTENT,
"object_id": hash_to_bytes(metadata_file_id),
}
if swhid is not None:
if swhid.origin:
source_swhid_params["origin"] = swhid.origin
if swhid.visit:
source_swhid_params["visit"] = swhid.visit
if swhid.anchor:
source_swhid_params["anchor"] = swhid.anchor
elif swhid.object_type in (
ObjectType.DIRECTORY,
ObjectType.RELEASE,
ObjectType.REVISION,
):
source_swhid_params["anchor"] = CoreSWHID(
object_type=swhid.object_type, object_id=swhid.object_id
)
elif swhid.object_type == ObjectType.SNAPSHOT:
source_swhid_params["visit"] = str(
CoreSWHID(object_type=ObjectType.SNAPSHOT, object_id=swhid.object_id)
)
snapshot = lookup_snapshot(
swhid.object_id.hex(), branches_from="HEAD", branches_count=1
)
if "HEAD" in snapshot["branches"]:
branch_target = snapshot["branches"]["HEAD"]["target"]
branch_target_type = snapshot["branches"]["HEAD"]["target_type"]
if branch_target_type == "alias":
branch = lookup_snapshot_alias(swhid.object_id.hex(), "HEAD")
if branch:
branch_target = branch["target"]
branch_target_type = branch["target_type"]
source_swhid_params["anchor"] = str(
CoreSWHID(
object_type=ObjectType[branch_target_type.upper()],
object_id=hash_to_bytes(branch_target),
)
)
source_swhid_params["path"] = "/" + metadata_file["name"]
citation = Citation(
format="bibtex",
content="",
source_swhid=str(QualifiedSWHID(**source_swhid_params)),
error=metadata_file["parsing_error"],
)
try:
if metadata_file_origin_type == IntrinsicMetadataFiletype.CODEMETA.value:
citation["content"] = codemeta_to_bibtex(metadata_file["content"], swhid)
elif metadata_file_origin_type == IntrinsicMetadataFiletype.CFF.value:
citation["content"] = cff_to_bibtex(
yaml.dump(metadata_file["content"], default_flow_style=False), swhid
)
except Exception as e:
citation["error"] = str(e)
return citation
[docs]
def get_bibtex_from_origin(
origin_url: str,
) -> Citation:
"""
Get citation in BibTeX format given a software origin, from found intrinsic citation
metadata in the repository, i.e. original codemeta.json and citation.cff, for the
latest visit snapshot main branch root directory.
Args:
origin_url: origin url
Returns:
the software citation in BibTeX format
Raises:
swh.web.utils.exc.NotFoundExc: when snapshot, branch or directory is missing,
no metadata could be found or the metadata files could not be decoded
BadInputExc: when the origin does not allow to find metadata
"""
metadata = lookup_origin_intrinsic_citation_metadata(origin_url)
# compute a target SWHID from latest origin snapshot
target_object_info = None
target_swhid = None
snapshot_context = get_snapshot_context(origin_url=origin_url)
if snapshot_context["revision_id"] is not None:
target_object_info = SWHObjectInfo(
object_type=ObjectType.REVISION,
object_id=snapshot_context["revision_id"],
)
elif snapshot_context["release_id"] is not None:
target_object_info = SWHObjectInfo(
object_type=ObjectType.RELEASE,
object_id=snapshot_context["release_id"],
)
elif snapshot_context["root_directory"] is not None:
target_object_info = SWHObjectInfo(
object_type=ObjectType.DIRECTORY,
object_id=snapshot_context["root_directory"],
)
if target_object_info:
swhids_info = get_swhids_info(
[target_object_info], snapshot_context=snapshot_context
)
swhid_with_context = swhids_info[0]["swhid_with_context"]
if swhid_with_context:
target_swhid = QualifiedSWHID.from_string(swhid_with_context)
return _get_bibtex_from_intrinsic_citation_metadata(metadata, target_swhid)
[docs]
def get_bibtex_from_swhid(
target_swhid: str,
) -> Citation:
"""
Get citation in BibTeX format given a SWHID, from found intrinsic citation
metadata in the repository, i.e. original codemeta.json and citation.cff, for the
target object.
Args:
target_swhid: SWHID which can be qualified or not, if the target object is
of type Content, it must be qualified with an anchor
Returns:
the software citation in BibTeX format
Raises:
swh.web.utils.exc.NotFoundExc: when snapshot, branch or directory is missing,
no metadata could be found or the metadata files could not be decoded
BadInputExc: when the origin does not allow to find metadata
"""
metadata = lookup_intrinsic_citation_metadata_by_target_swhid(target_swhid)
parsed_swhid = QualifiedSWHID.from_string(target_swhid)
return _get_bibtex_from_intrinsic_citation_metadata(metadata, parsed_swhid)