Source code for swh.core.cli.db

#!/usr/bin/env python3
# Copyright (C) 2018-2022  The Software Heritage developers
# See the AUTHORS file at the top-level directory of this distribution
# License: GNU General Public License version 3, or any later version
# See top-level LICENSE file for more information

import logging
from os import environ
import warnings

import click

from swh.core.cli import CONTEXT_SETTINGS
from swh.core.cli import swh as swh_cli_group

warnings.filterwarnings("ignore")  # noqa prevent psycopg from side-tracking us


logger = logging.getLogger(__name__)


@swh_cli_group.group(name="db", context_settings=CONTEXT_SETTINGS)
@click.option(
    "--config-file",
    "-C",
    default=None,
    type=click.Path(exists=True, dir_okay=False),
    help="Configuration file.",
)
@click.pass_context
def db(ctx, config_file):
    """Software Heritage database generic tools."""
    from swh.core.config import read as config_read

    ctx.ensure_object(dict)
    if config_file is None:
        config_file = environ.get("SWH_CONFIG_FILENAME")
    cfg = config_read(config_file)
    ctx.obj["config"] = cfg


@db.command(name="create", context_settings=CONTEXT_SETTINGS)
@click.argument("module", required=True)
@click.option(
    "--dbname",
    "--db-name",
    "-d",
    help="Database name.",
    default="softwareheritage-dev",
    show_default=True,
)
@click.option(
    "--template",
    "-T",
    help="Template database from which to build this database.",
    default="template1",
    show_default=True,
)
def db_create(module, dbname, template):
    """Create a database for the Software Heritage <module>.

    and potentially execute superuser-level initialization steps.

    Example::

        swh db create -d swh-test storage

    If you want to specify non-default postgresql connection parameters, please
    provide them using standard environment variables or by the mean of a
    properly crafted libpq connection URI. See psql(1) man page (section
    ENVIRONMENTS) for details.

    Note: this command requires a postgresql connection with superuser permissions.

    Example::

        \b
        PGPORT=5434 swh db create indexer
        swh db create -d postgresql://superuser:passwd@pghost:5433/swh-storage storage

    """
    from swh.core.db.db_utils import create_database_for_package

    logger.debug("db_create %s dn_name=%s", module, dbname)
    create_database_for_package(module, dbname, template)


@db.command(name="init-admin", context_settings=CONTEXT_SETTINGS)
@click.argument("module", required=True)
@click.option(
    "--dbname",
    "--db-name",
    "-d",
    help="Database name.",
    default="softwareheritage-dev",
    show_default=True,
)
def db_init_admin(module: str, dbname: str) -> None:
    """Execute superuser-level initialization steps (e.g pg extensions, admin functions,
    ...)

    Example::

        PGPASSWORD=... swh db init-admin -d swh-test scheduler

    If you want to specify non-default postgresql connection parameters, please
    provide them using standard environment variables or by the mean of a
    properly crafted libpq connection URI. See psql(1) man page (section
    ENVIRONMENTS) for details.

    Note: this command requires a postgresql connection with superuser permissions (e.g
    postgres, swh-admin, ...)

    Example::

        \b
        PGPORT=5434 swh db init-admin scheduler
        swh db init-admin -d postgresql://superuser:passwd@pghost:5433/swh-scheduler \
          scheduler

    """
    from swh.core.db.db_utils import init_admin_extensions

    logger.debug("db_init_admin %s dbname=%s", module, dbname)
    init_admin_extensions(module, dbname)


@db.command(name="init", context_settings=CONTEXT_SETTINGS)
@click.argument("module", required=True)
@click.option(
    "--dbname",
    "--db-name",
    "-d",
    help="Database name or connection URI.",
    default=None,
    show_default=False,
)
@click.option(
    "--flavor",
    help="Database flavor.",
    default=None,
)
@click.pass_context
def db_init(ctx, module, dbname, flavor):
    """Initialize a database for the Software Heritage <module>.

    The database connection string can come from the --dbname option, or from
    the configuration file (see option ``--config-file`` in ``swh db --help``)
    in the section named after the MODULE argument.

    Example::

        \b
        $ cat conf.yml
        storage:
          cls: postgresql
          db: postgresql://user:passwd@pghost:5433/swh-storage
          objstorage:
            cls: memory

        \b
        $ swh db -C conf.yml init storage  # or
        $ SWH_CONFIG_FILENAME=conf.yml swh db init storage
        $ # or
        $ swh db init --dbname postgresql://user:passwd@pghost:5433/swh-storage storage

    """
    from swh.core.db.db_utils import (
        get_database_info,
        import_swhmodule,
        populate_database_for_package,
        swh_set_db_version,
    )

    cfg = None
    if dbname is None:
        # use the db cnx from the config file; the expected config entry is the
        # given module name
        cfg = ctx.obj["config"].get(module, {})
        dbname = get_dburl_from_config(cfg)

    if not dbname:
        raise click.BadParameter(
            "Missing the postgresql connection configuration. Either fix your "
            "configuration file or use the --dbname option."
        )

    logger.debug("db_init %s flavor=%s dbname=%s", module, flavor, dbname)

    initialized, dbversion, dbflavor = populate_database_for_package(
        module, dbname, flavor
    )
    if dbversion is not None:
        click.secho(
            "ERROR: the database version has been populated by sql init scripts. "
            "This is now deprecated and should not happen any more"
        )
    else:
        # db version has not been populated by sql init scripts (new style),
        # let's do it; instantiate the data source to retrieve the current
        # (expected) db version
        if cfg is None:
            cfg = {"cls": "postgresql", "db": dbname}
        datastore_factory = getattr(import_swhmodule(module), "get_datastore", None)
        if datastore_factory:
            datastore = datastore_factory(**cfg)
            if not hasattr(datastore, "current_version"):
                logger.warning(
                    "Datastore %s does not declare the " "'current_version' attribute",
                    datastore,
                )
            else:
                code_version = datastore.current_version
                logger.info(
                    "Initializing database version to %s from the %s datastore",
                    code_version,
                    module,
                )
                swh_set_db_version(dbname, code_version, desc="DB initialization")

    dbversion = get_database_info(dbname)[1]
    if dbversion is None:
        click.secho(
            "ERROR: database for {} {}{} BUT db version could not be set".format(
                module,
                "initialized" if initialized else "exists",
                f" (flavor {dbflavor})" if dbflavor is not None else "",
            ),
            fg="red",
            bold=True,
        )
    else:
        click.secho(
            "DONE database for {} {}{} at version {}".format(
                module,
                "initialized" if initialized else "exists",
                f" (flavor {dbflavor})" if dbflavor is not None else "",
                dbversion,
            ),
            fg="green",
            bold=True,
        )

    if flavor is not None and dbflavor != flavor:
        click.secho(
            f"WARNING requested flavor '{flavor}' != recorded flavor '{dbflavor}'",
            fg="red",
            bold=True,
        )


@db.command(name="version", context_settings=CONTEXT_SETTINGS)
@click.argument("module", required=True)
@click.option(
    "--all/--no-all",
    "show_all",
    help="Show version history.",
    default=False,
    show_default=True,
)
@click.option("--module-config-key", help="Module config key to lookup.", default=None)
@click.pass_context
def db_version(ctx, module, show_all, module_config_key=None):
    """Print the database version for the Software Heritage.

    Example::

        \b
        swh db version -d swh-test
        swh db version scheduler
        swh db version scrubber --module-config-key=scrubber_db

    """
    from swh.core.db.db_utils import get_database_info, import_swhmodule

    # use the db cnx from the config file; the expected config entry is either the given
    # module_config_key or defaulting to the module name (if module_config_key is not
    # provided)
    cfg = ctx.obj["config"].get(module_config_key or module, {})
    dbname = get_dburl_from_config(cfg)

    if not dbname:
        raise click.BadParameter(
            "Missing the postgresql connection configuration. Either fix your "
            "configuration file or use the --dbname option."
        )

    logger.debug("db_version dbname=%s", dbname)

    db_module, db_version, db_flavor = get_database_info(dbname)
    if db_module is None:
        click.secho(
            "WARNING the database does not have a dbmodule table.", fg="red", bold=True
        )
        db_module = module
    assert db_module == module, f"{db_module} (in the db) != {module} (given)"

    click.secho(f"module: {db_module}", fg="green", bold=True)

    if db_flavor is not None:
        click.secho(f"flavor: {db_flavor}", fg="green", bold=True)

    # instantiate the data source to retrieve the current (expected) db version
    datastore_factory = getattr(import_swhmodule(db_module), "get_datastore", None)
    if datastore_factory:
        datastore = datastore_factory(**cfg)
        code_version = datastore.current_version
        click.secho(
            f"current code version: {code_version}",
            fg="green" if code_version == db_version else "red",
            bold=True,
        )

    if not show_all:
        click.secho(f"version: {db_version}", fg="green", bold=True)
    else:
        from swh.core.db.db_utils import swh_db_versions

        versions = swh_db_versions(dbname)
        for version, tstamp, desc in versions:
            click.echo(f"{version} [{tstamp}] {desc}")


@db.command(name="upgrade", context_settings=CONTEXT_SETTINGS)
@click.argument("module", required=True)
@click.option(
    "--to-version",
    type=int,
    help="Upgrade up to version VERSION",
    metavar="VERSION",
    default=None,
)
@click.option(
    "--interactive/--non-interactive",
    help="Do not ask questions (use default answer to all questions)",
    default=True,
)
@click.option(
    "--module-config-key", help="Module configuration key to lookup.", default=None
)
@click.pass_context
def db_upgrade(ctx, module, to_version, interactive, module_config_key):
    """Upgrade the database for given module (to a given version if specified).

    Examples::

        \b
        swh db upgrade storage
        swh db upgrade scheduler --to-version=10
        swh db upgrade scrubber --to-version=10 --module-config-key=scrubber_db

    """
    from swh.core.db.db_utils import (
        get_database_info,
        import_swhmodule,
        swh_db_upgrade,
        swh_set_db_module,
    )

    # use the db cnx from the config file; the expected config entry is either the given
    # module_config_key or defaulting to the module name (if module_config_key is not
    # provided)
    cfg = ctx.obj["config"].get(module_config_key or module, {})
    dbname = get_dburl_from_config(cfg)

    if not dbname:
        raise click.BadParameter(
            "Missing the postgresql connection configuration. Either fix your "
            "configuration file or use the --dbname option."
        )

    logger.debug("db_version dbname=%s", dbname)

    db_module, db_version, db_flavor = get_database_info(dbname)
    if db_module is None:
        click.secho(
            "Warning: the database does not have a dbmodule table.",
            fg="yellow",
            bold=True,
        )
        if interactive and not click.confirm(
            f"Write the module information ({module}) in the database?", default=True
        ):
            raise click.BadParameter("Migration aborted.")
        swh_set_db_module(dbname, module)
        db_module = module

    if db_module != module:
        raise click.BadParameter(
            f"Error: the given module ({module}) does not match the value "
            f"stored in the database ({db_module})."
        )

    # instantiate the data source to retrieve the current (expected) db version
    datastore_factory = getattr(import_swhmodule(db_module), "get_datastore", None)
    if not datastore_factory:
        raise click.UsageError(
            "You cannot use this command on old-style datastore backend {db_module}"
        )
    datastore = datastore_factory(**cfg)
    ds_version = datastore.current_version
    if to_version is None:
        to_version = ds_version
    if to_version > ds_version:
        raise click.UsageError(
            f"The target version {to_version} is larger than the current version "
            f"{ds_version} of the datastore backend {db_module}"
        )

    if to_version == db_version:
        click.secho(
            f"No migration needed: the current version is {db_version}",
            fg="yellow",
        )
    else:
        new_db_version = swh_db_upgrade(dbname, module, to_version)
        click.secho(f"Migration to version {new_db_version} done", fg="green")
        if new_db_version < ds_version:
            click.secho(
                "Warning: migration was not complete: "
                f"the current version is {ds_version}",
                fg="yellow",
            )


[docs] def get_dburl_from_config(cfg): if cfg.get("cls") != "postgresql": raise click.BadParameter( "Configuration cls must be set to 'postgresql' for this command." ) if "args" in cfg: # for bw compat cfg = cfg["args"] return cfg.get("db")