Source code for swh.loader.package.golang.loader

# Copyright (C) 2022-2024  The Software Heritage developers
# See the AUTHORS file at the top-level directory of this distribution
# License: GNU General Public License version 3, or any later version
# See top-level LICENSE file for more information

import json
import logging
import re
from typing import Iterator, List, Optional, Sequence, Tuple

import attr
from looseversion import LooseVersion2
from requests import HTTPError

from swh.loader.core.utils import (
    EMPTY_AUTHOR,
    cached_method,
    get_url_body,
    release_name,
)
from swh.loader.exception import NotFound
from swh.loader.package.loader import BasePackageInfo, PackageLoader
from swh.model.model import ObjectType, Release, Sha1Git, TimestampWithTimezone
from swh.storage.interface import StorageInterface

logger = logging.getLogger(__name__)


def _uppercase_encode(url: str) -> str:
    return re.sub("([A-Z]{1})", r"!\1", url).lower()


[docs] @attr.s class GolangPackageInfo(BasePackageInfo): name = attr.ib(type=str) timestamp = attr.ib(type=Optional[TimestampWithTimezone])
[docs] class GolangLoader(PackageLoader[GolangPackageInfo]): """Load Golang module zip file into SWH archive.""" visit_type = "golang" GOLANG_PKG_DEV_URL = "https://pkg.go.dev" GOLANG_PROXY_URL = "https://proxy.golang.org" def __init__( self, storage: StorageInterface, url: str, max_content_size: Optional[int] = None, **kwargs, ): super().__init__(storage, url, max_content_size=max_content_size, **kwargs) # The lister saves human-usable URLs, so we translate them to proxy URLs # for use in the loader. # This URL format is detailed in https://go.dev/ref/mod#goproxy-protocol assert url.startswith( self.GOLANG_PKG_DEV_URL ), "Go package URL (%s) not from %s" % (url, self.GOLANG_PKG_DEV_URL) self.name = url[len(self.GOLANG_PKG_DEV_URL) + 1 :] self.url = url.replace(self.GOLANG_PKG_DEV_URL, self.GOLANG_PROXY_URL) self.url = _uppercase_encode(self.url) @cached_method def _get_versions(self) -> List[str]: return get_url_body(f"{self.url}/@v/list").decode().splitlines()
[docs] def get_versions(self) -> Sequence[str]: versions = self._get_versions() # some go packages only have a development version not listed by the endpoint above, # so ensure to return it or it will be missed by the golang loader default_version = self.get_default_version() if default_version not in versions: versions.append(default_version) return versions
[docs] @cached_method def get_default_version(self) -> str: try: latest = get_url_body(f"{self.url}/@latest") return json.loads(latest)["Version"] except (NotFound, HTTPError, json.JSONDecodeError): return max(self._get_versions(), key=LooseVersion2)
def _raw_info(self, version: str) -> dict: url = f"{self.url}/@v/{_uppercase_encode(version)}.info" return json.loads(get_url_body(url))
[docs] def get_package_info(self, version: str) -> Iterator[Tuple[str, GolangPackageInfo]]: # Encode the name because creating nested folders can become problematic encoded_name = self.name.replace("/", "__") filename = f"{encoded_name}-{version}.zip" timestamp = TimestampWithTimezone.from_iso8601(self._raw_info(version)["Time"]) p_info = GolangPackageInfo( url=f"{self.url}/@v/{_uppercase_encode(version)}.zip", filename=filename, version=version, timestamp=timestamp, name=self.name, ) yield release_name(version), p_info
[docs] def build_release( self, p_info: GolangPackageInfo, uncompressed_path: str, directory: Sha1Git ) -> Optional[Release]: msg = ( f"Synthetic release for Golang source package {p_info.name} " f"version {p_info.version}\n" ) return Release( name=p_info.version.encode(), message=msg.encode(), date=p_info.timestamp, author=EMPTY_AUTHOR, # Go modules offer very little metadata target_type=ObjectType.DIRECTORY, target=directory, synthetic=True, )