Source code for swh.loader.cli
# Copyright (C) 2019-2023 The Software Heritage developers
# See the AUTHORS file at the top-level directory of this distribution
# License: GNU General Public License version 3, or any later version
# See top-level LICENSE file for more information
from importlib.metadata import entry_points
# WARNING: do not import unnecessary things here to keep cli startup time under
# control
import logging
from typing import Any
import click
from swh.core.cli import CONTEXT_SETTINGS
from swh.core.cli import swh as swh_cli_group
logger = logging.getLogger(__name__)
LOADERS = {
entry_point.name.split(".", 1)[1]: entry_point
for entry_point in entry_points(group="swh.workers")
if entry_point.name.split(".", 1)[0] == "loader"
}
SUPPORTED_LOADERS = sorted(list(LOADERS))
[docs]
def get_loader(name: str, **kwargs) -> Any:
"""Given a loader name, instantiate it.
Args:
name: Loader's name
kwargs: Configuration dict (url...)
Returns:
An instantiated loader
"""
if name not in LOADERS:
raise ValueError(
"Invalid loader %s: only supported loaders are %s"
% (name, SUPPORTED_LOADERS)
)
registry_entry = LOADERS[name].load()()
logger.debug(f"registry: {registry_entry}")
loader_cls = registry_entry["loader"]
logger.debug(f"loader class: {loader_cls}")
return loader_cls.from_config(**kwargs)
@swh_cli_group.group(name="loader", context_settings=CONTEXT_SETTINGS)
@click.option(
"--config-file",
"-C",
default=None,
type=click.Path(
exists=True,
dir_okay=False,
),
help="Configuration file.",
)
@click.pass_context
def loader(ctx, config_file):
"""Loader cli tools"""
from os import environ
from swh.core.config import read
ctx.ensure_object(dict)
logger.debug("ctx: %s", ctx)
if not config_file:
config_file = environ.get("SWH_CONFIG_FILENAME")
ctx.obj["config"] = read(config_file)
logger.debug("config_file: %s", config_file)
logger.debug("config: %s", ctx.obj["config"])
@loader.command(name="run", context_settings=CONTEXT_SETTINGS)
@click.argument("type", type=click.Choice(SUPPORTED_LOADERS))
@click.argument("url")
@click.argument("options", nargs=-1)
@click.pass_context
def run(ctx, type, url, options):
"""Ingest with loader <type> the origin located at <url>
Expected configuration:
\b
* :ref:`cli-config-storage`
* :ref:`cli-config-metadata_fetcher_credentials`
"""
import iso8601
from swh.scheduler.cli.utils import parse_options
conf = ctx.obj.get("config", {})
if "storage" not in conf:
logger.warning(
"No storage configuration detected, using an in-memory storage instead."
)
conf["storage"] = {"cls": "memory"}
(_, kw) = parse_options(options)
logger.debug(f"kw: {kw}")
visit_date = kw.get("visit_date")
if visit_date and isinstance(visit_date, str):
visit_date = iso8601.parse_date(visit_date)
kw["visit_date"] = visit_date
loader = get_loader(
type,
url=url,
storage=conf["storage"],
metadata_fetcher_credentials=conf.get("metadata_fetcher_credentials"),
**kw,
)
result = loader.load()
visit_status = loader.storage.origin_visit_status_get_latest(
url, loader.visit.visit
)
msg = f"{result} for origin '{url}'"
directory = kw.get("directory")
if directory:
msg = msg + f" and directory '{directory}'"
click.echo(msg)
ctx.exit(code=0 if (visit_status and visit_status.status == "full") else 1)
@loader.command(name="list", context_settings=CONTEXT_SETTINGS)
@click.argument("type", default="all", type=click.Choice(["all"] + SUPPORTED_LOADERS))
@click.pass_context
def list(ctx, type):
"""List supported loaders and optionally their arguments"""
import inspect
if type == "all":
loaders = ", ".join(SUPPORTED_LOADERS)
click.echo(f"Supported loaders: {loaders}")
else:
registry_entry = LOADERS[type].load()()
loader_cls = registry_entry["loader"]
doc = inspect.getdoc(loader_cls).strip()
# Hack to get the signature of the class even though it subclasses
# Generic, which reimplements __new__.
# See <https://bugs.python.org/issue40897>
signature = inspect.signature(loader_cls.__init__)
signature_str = str(signature).replace("self, ", "")
click.echo(f"Loader: {doc}\nsignature: {signature_str}")