# Copyright (C) 2022-2024 The Software Heritage developers
# See the AUTHORS file at the top-level directory of this distribution
# License: GNU General Public License version 3, or any later version
# See top-level LICENSE file for more information
import base64
import json
import logging
from pathlib import Path
import re
from typing import Any, Dict, Iterator, Optional, Sequence, Tuple
import attr
import requests
from requests.structures import CaseInsensitiveDict
from swh.loader.core.utils import (
DEFAULT_PARAMS,
EMPTY_AUTHOR,
Person,
get_url_body,
release_name,
)
from swh.loader.package.loader import BasePackageInfo, PackageLoader
from swh.model.hashutil import hash_to_hex
from swh.model.model import ObjectType, Release, Sha1Git, TimestampWithTimezone
from swh.storage.interface import StorageInterface
logger = logging.getLogger(__name__)
[docs]
@attr.s
class HackagePackageInfo(BasePackageInfo):
name = attr.ib(type=str)
"""Name of the package"""
version = attr.ib(type=str)
"""Current version"""
last_modified = attr.ib(type=str)
"""File last modified date as release date"""
author = attr.ib(type=Person)
"""Author"""
[docs]
class HackageLoader(PackageLoader[HackagePackageInfo]):
visit_type = "hackage"
def __init__(
self,
storage: StorageInterface,
url: str,
**kwargs,
):
super().__init__(storage=storage, url=url, **kwargs)
self.url = url
def _raw_info(self, url: str, **extra_params) -> bytes:
return get_url_body(url=url, **extra_params)
[docs]
def info_versions(self) -> Dict:
"""Return the package versions (fetched from
https://hackage.haskell.org/package/{pkgname})
Api documentation https://hackage.haskell.org/api
"""
return json.loads(
self._raw_info(url=self.url, headers={"Accept": "application/json"})
)
[docs]
def info_revisions(self, url) -> Dict:
"""Return the package version revisions (fetched from
https://hackage.haskell.org/package/{pkgname}-{version}/revisions/)
Api documentation https://hackage.haskell.org/api
"""
return json.loads(
self._raw_info(url=url, headers={"Accept": "application/json"})
)
[docs]
def get_versions(self) -> Sequence[str]:
"""Get all released versions of an Haskell package
Returns:
A sequence of versions
Example::
["0.1.1", "0.10.2"]
"""
return list(self.info_versions())
[docs]
def get_package_info(
self, version: str
) -> Iterator[Tuple[str, HackagePackageInfo]]:
"""Get release name and package information from version
Args:
version: Package version (e.g: "0.1.0")
Returns:
Iterator of tuple (release_name, p_info)
"""
pkgname: str = self.url.split("/")[-1]
url: str = (
f"https://hackage.haskell.org/package/"
f"{pkgname}-{version}/{pkgname}-{version}.tar.gz"
)
filename: str = url.split("/")[-1]
# Retrieve version revisions
revisions_url: str = (
f"https://hackage.haskell.org/package/{pkgname}-{version}/revisions/"
)
revisions = self.info_revisions(revisions_url)
last_modified = max(item["time"] for item in revisions)
author = EMPTY_AUTHOR
# Here we get a 'user' which in most case corresponds to the maintainer.
# We use that value as 'author' in case it is missing from intrinsic metadata
if "user" in revisions[-1]:
author = Person.from_fullname(revisions[-1]["user"].encode())
# Get md5 checksums with a HEAD request to archive url
headers = self.head_url_headers(url=url)
checksums = {}
if headers and headers.get("Content-MD5"):
md5 = base64.b64decode(headers["Content-MD5"].encode(), validate=True)
try:
checksums = {"md5": hash_to_hex(md5)}
except UnicodeDecodeError:
logger.warning("Can not decode md5 checksum %r for %r" % (md5, url))
p_info = HackagePackageInfo(
name=pkgname,
filename=filename,
url=url,
version=version,
last_modified=last_modified,
author=author,
checksums=checksums,
)
yield release_name(version), p_info
[docs]
def build_release(
self, p_info: HackagePackageInfo, uncompressed_path: str, directory: Sha1Git
) -> Optional[Release]:
# Extract intrinsic metadata from uncompressed_path/{pkgname}-{version}.cabal
intrinsic_metadata = extract_intrinsic_metadata(
Path(uncompressed_path) / f"{p_info.name}-{p_info.version}", p_info.name
)
author_str = intrinsic_metadata.get("author")
author = (
Person.from_fullname(author_str.encode()) if author_str else p_info.author
)
message = (
f"Synthetic release for Haskell source package {p_info.name} "
f"version {p_info.version}\n"
)
return Release(
name=p_info.version.encode(),
author=author,
date=TimestampWithTimezone.from_iso8601(p_info.last_modified),
message=message.encode(),
target_type=ObjectType.DIRECTORY,
target=directory,
synthetic=True,
)