Source code for swh.deposit.cli.client
# Copyright (C) 2017-2024 The Software Heritage developers
# See the AUTHORS file at the top-level directory of this distribution
# License: GNU General Public License version 3, or any later version
# See top-level LICENSE file for more information
from __future__ import annotations
from contextlib import contextmanager
from datetime import datetime, timezone
import logging
# WARNING: do not import unnecessary things here to keep cli startup time under
# control
import os
import sys
from typing import TYPE_CHECKING, Any, Collection, Dict, List, Optional
import warnings
import xml.etree.ElementTree as ET
import click
from swh.deposit.cli import deposit
logger = logging.getLogger(__name__)
if TYPE_CHECKING:
from swh.deposit.client import PublicApiDepositClient
[docs]
@contextmanager
def trap_and_report_exceptions():
"""Trap and report exceptions (InputError, MaintenanceError) in a unified way."""
from swh.deposit.client import MaintenanceError
try:
yield
except InputError as e:
logger.error("Problem during parsing options: %s", e)
sys.exit(1)
except MaintenanceError as e:
logger.error(e)
sys.exit(1)
def _url(url: str) -> str:
"""Force the /1 api version at the end of the url (avoiding confusing
issues without it).
Args:
url (str): api url used by cli users
Returns:
Top level api url to actually request
"""
if not url.endswith("/1"):
url = "%s/1" % url
return url
def _init_django():
"""Initialize django but without overriding logging configuration
set by swh CLI.
"""
import os
os.environ.setdefault("DJANGO_SETTINGS_MODULE", "swh.deposit.settings.development")
from django.apps import apps
from django.conf import settings
apps.populate(settings.INSTALLED_APPS)
[docs]
def generate_metadata(
deposit_client: str,
name: str,
authors: List[str],
external_id: Optional[str] = None,
create_origin: Optional[str] = None,
metadata_provenance_url: Optional[str] = None,
) -> str:
"""Generate sword compliant xml metadata with the minimum required metadata.
The Atom spec, https://tools.ietf.org/html/rfc4287, says that:
- atom:entry elements MUST contain one or more atom:author elements
- atom:entry elements MUST contain exactly one atom:title element.
- atom:entry elements MUST contain exactly one atom:updated element.
However, we are also using CodeMeta, so we want some basic information to be
mandatory.
Therefore, we generate the following mandatory fields:
- http://www.w3.org/2005/Atom#updated
- http://www.w3.org/2005/Atom#author
- http://www.w3.org/2005/Atom#title
- https://doi.org/10.5063/SCHEMA/CODEMETA-2.0#name (yes, in addition to
http://www.w3.org/2005/Atom#title, even if they have somewhat the same
meaning)
- https://doi.org/10.5063/SCHEMA/CODEMETA-2.0#author
Args:
deposit_client: Deposit client username,
name: Software name
authors: List of author names
create_origin: Origin concerned by the deposit
metadata_provenance_url: Provenance metadata url
Returns:
metadata xml string
"""
from swh.deposit.utils import NAMESPACES as NS
# generate a metadata file with the minimum required metadata
document = ET.Element(f"{{{NS['atom']}}}entry")
now = datetime.now(tz=timezone.utc)
ET.SubElement(document, f"{{{NS['atom']}}}updated").text = str(now)
ET.SubElement(document, f"{{{NS['atom']}}}author").text = deposit_client
ET.SubElement(document, f"{{{NS['atom']}}}title").text = name
ET.SubElement(document, f"{{{NS['codemeta']}}}name").text = name
for author_name in authors:
author = ET.SubElement(document, f"{{{NS['codemeta']}}}author")
ET.SubElement(author, f"{{{NS['codemeta']}}}name").text = author_name
if external_id:
ET.SubElement(document, f"{{{NS['codemeta']}}}identifier").text = external_id
swh_deposit_elt = ET.Element(f"{{{NS['swh']}}}deposit")
if create_origin:
elt = ET.SubElement(swh_deposit_elt, f"{{{NS['swh']}}}create_origin")
ET.SubElement(elt, f"{{{NS['swh']}}}origin").set("url", create_origin)
if metadata_provenance_url:
elt = ET.SubElement(swh_deposit_elt, f"{{{NS['swh']}}}metadata-provenance")
ET.SubElement(elt, f"{{{NS['schema']}}}url").text = metadata_provenance_url
if len(swh_deposit_elt):
document.append(swh_deposit_elt)
s = ET.tostring(document, encoding="utf-8").decode()
logging.debug("Atom entry dict to generate as xml: %s", s)
return s
def _collection(client: PublicApiDepositClient) -> str:
"""Retrieve the client's collection"""
# retrieve user's collection
sd_content = client.service_document()
if "error" in sd_content:
msg = sd_content["error"]
raise InputError(f"Service document retrieval: {msg}")
collection = sd_content["app:service"]["app:workspace"][0]["app:collection"][
"sword:name"
]
return collection
[docs]
def client_command_parse_input(
client,
username: str,
archive: Optional[str],
metadata: Optional[str],
collection: Optional[str],
slug: Optional[str],
create_origin: Optional[str],
metadata_provenance_url: Optional[str],
partial: bool,
deposit_id: Optional[int],
swhid: Optional[str],
replace: bool,
url: str,
name: Optional[str],
authors: List[str],
temp_dir: str,
) -> Dict[str, Any]:
"""Parse the client subcommand options and make sure the combination
is acceptable*. If not, an InputError exception is raised
explaining the issue.
By acceptable, we mean:
- A multipart deposit (create or update) requires:
- an existing software archive
- an existing metadata file or author(s) and name provided in
params
- A binary deposit (create/update) requires an existing software
archive
- A metadata deposit (create/update) requires an existing metadata
file or author(s) and name provided in params
- A deposit update requires a deposit_id
This will not prevent all failure cases though. The remaining
errors are already dealt with by the underlying api client.
Raises:
InputError explaining the user input related issue
MaintenanceError explaining the api status
Returns:
dict with the following keys:
"archive": the software archive to deposit
"username": username
"metadata": the metadata file to deposit
"collection": the user's collection under which to put the deposit
"create_origin": the origin concerned by the deposit
"metadata_provenance_url": the metadata provenance url
"in_progress": if the deposit is partial or not
"url": deposit's server main entry point
"deposit_id": optional deposit identifier
"swhid": optional deposit swhid
"replace": whether the given deposit is to be replaced or not
"""
if not metadata:
if name and authors:
metadata_path = os.path.join(temp_dir, "metadata.xml")
logging.debug("Temporary file: %s", metadata_path)
metadata_xml = generate_metadata(
username,
name,
authors,
external_id=slug,
create_origin=create_origin,
metadata_provenance_url=metadata_provenance_url,
)
logging.debug("Metadata xml generated: %s", metadata_xml)
with open(metadata_path, "w") as f:
f.write(metadata_xml)
metadata = metadata_path
elif archive is not None and not partial and not deposit_id:
# If we meet all the following conditions:
# * this is not an archive-only deposit request
# * it is not part of a multipart deposit (either create/update
# or finish)
# * it misses either name or authors
raise InputError(
"For metadata deposit request, either a metadata file with "
"--metadata or both --author and --name must be provided. "
)
elif name or authors:
# If we are generating metadata, then all mandatory metadata
# must be present
raise InputError(
"For metadata deposit request, either a metadata file with "
"--metadata or both --author and --name must be provided."
)
else:
# TODO: this is a multipart deposit, we might want to check that
# metadata are deposited at some point
pass
elif name or authors or create_origin:
raise InputError(
"Using --metadata flag is incompatible with "
"--author and --name and --create-origin (those are used to generate one "
"metadata file)."
)
if not archive and not metadata:
raise InputError(
"Please provide an actionable command. See --help for more information"
)
if metadata:
from xml.etree import ElementTree
from swh.deposit.utils import (
parse_swh_deposit_origin,
parse_swh_metadata_provenance,
)
metadata_tree = ElementTree.fromstring(open(metadata).read())
(create_origin, add_to_origin) = parse_swh_deposit_origin(metadata_tree)
if create_origin and add_to_origin:
logger.error(
"The metadata file provided must not contain both "
'"<swh:create_origin>" and "<swh:add_to_origin>" tags',
)
elif not create_origin and not add_to_origin:
logger.warning(
"The metadata file provided should contain "
'"<swh:create_origin>" or "<swh:add_to_origin>" tag',
)
meta_prov_url = parse_swh_metadata_provenance(metadata_tree)
if not meta_prov_url:
logger.warning(
"The metadata file provided should contain "
'"<swh:metadata-provenance>" tag'
)
if replace and not deposit_id:
raise InputError("To update an existing deposit, you must provide its id")
if not collection:
collection = _collection(client)
return {
"archive": archive,
"username": username,
"metadata": metadata,
"collection": collection,
"slug": slug,
"in_progress": partial,
"url": url,
"deposit_id": deposit_id,
"swhid": swhid,
"replace": replace,
}
def _subdict(d: Dict[str, Any], keys: Collection[str]) -> Dict[str, Any]:
"return a dict from d with only given keys"
return {k: v for k, v in d.items() if k in keys}
[docs]
def credentials_decorator(f):
"""Add default --url, --username and --password flag to cli."""
f = click.option(
"--password", required=True, help="(Mandatory) User's associated password"
)(f)
f = click.option("--username", required=True, help="(Mandatory) User's name")(f)
f = click.option(
"--url",
default="https://deposit.softwareheritage.org",
help=(
"(Optional) Deposit server api endpoint. By default, "
"https://deposit.softwareheritage.org/1"
),
)(f)
return f
[docs]
def output_format_decorator(f):
"""Add --format output flag decorator to cli."""
return click.option(
"-f",
"--format",
"output_format",
default="logging",
type=click.Choice(["logging", "yaml", "json"]),
help="Output format results.",
)(f)
@deposit.command()
@credentials_decorator
@click.option(
"--archive",
type=click.Path(exists=True),
help="(Optional) Software archive to deposit",
)
@click.option(
"--metadata",
type=click.Path(exists=True),
help=(
"(Optional) Path to xml metadata file. If not provided, "
"this will use a file named <archive>.metadata.xml"
),
)
@click.option(
"--archive-deposit/--no-archive-deposit",
default=False,
help="Deprecated (ignored)",
)
@click.option(
"--metadata-deposit/--no-metadata-deposit",
default=False,
help="Deprecated (ignored)",
)
@click.option(
"--collection",
help="(Optional) User's collection. If not provided, this will be fetched.",
)
@click.option(
"--slug",
help=(
"(Deprecated) (Optional) External system information identifier. "
"If not provided, it will be generated"
),
)
@click.option(
"--create-origin",
help=(
"(Optional) Origin url to attach information to. To be used alongside "
"--name and --author. This will be generated alongside the metadata to "
"provide to the deposit server."
),
)
@click.option(
"--metadata-provenance-url",
help=(
"(Optional) Provenance metadata url to indicate from where the metadata is "
"coming from."
),
)
@click.option(
"--partial/--no-partial",
default=False,
help=(
"(Optional) The deposit will be partial, other deposits "
"will have to take place to finalize it."
),
)
@click.option(
"--deposit-id",
default=None,
help="(Optional) Update an existing partial deposit with its identifier",
)
@click.option(
"--swhid",
default=None,
help="(Optional) Update existing completed deposit (status done) with new metadata",
)
@click.option(
"--replace/--no-replace",
default=False,
help="(Optional) Update by replacing existing metadata to a deposit",
)
@click.option("--verbose/--no-verbose", default=False, help="Verbose mode")
@click.option("--name", help="Software name")
@click.option(
"--author",
multiple=True,
help="Software author(s), this can be repeated as many times"
" as there are authors",
)
@output_format_decorator
@click.pass_context
def upload(
ctx,
username: str,
password: str,
archive: Optional[str],
metadata: Optional[str],
archive_deposit: bool,
metadata_deposit: bool,
collection: Optional[str],
slug: Optional[str],
create_origin: Optional[str],
metadata_provenance_url: Optional[str],
partial: bool,
deposit_id: Optional[int],
swhid: Optional[str],
replace: bool,
url: str,
verbose: bool,
name: Optional[str],
author: List[str],
output_format: Optional[str],
):
"""Software Heritage Public Deposit Client
Create/Update deposit through the command line.
More documentation can be found at
https://docs.softwareheritage.org/devel/swh-deposit/getting-started.html.
"""
import tempfile
_init_django()
from swh.deposit.client import PublicApiDepositClient
if archive_deposit or metadata_deposit:
warnings.warn(
'"archive_deposit" and "metadata_deposit" option arguments are '
"deprecated and have no effect; simply do not provide the archive "
"for a metadata-only deposit, and do not provide a metadata for a"
"archive-only deposit.",
DeprecationWarning,
)
if slug:
if create_origin and slug != create_origin:
raise InputError(
'"--slug" flag has been deprecated in favor of "--create-origin" flag. '
"You mentioned both with different values, please only "
'use "--create-origin".'
)
warnings.warn(
'"--slug" flag has been deprecated in favor of "--create-origin" flag. '
'Please, start using "--create-origin" instead of "--slug"',
DeprecationWarning,
)
url = _url(url)
client = PublicApiDepositClient(url=url, auth=(username, password))
with tempfile.TemporaryDirectory() as temp_dir:
with trap_and_report_exceptions():
logger.debug("Parsing cli options")
config = client_command_parse_input(
client,
username,
archive,
metadata,
collection,
slug,
create_origin,
metadata_provenance_url,
partial,
deposit_id,
swhid,
replace,
url,
name,
author,
temp_dir,
)
if verbose:
logger.info("Parsed configuration: %s", config)
keys = [
"archive",
"collection",
"in_progress",
"metadata",
"slug",
]
if config["deposit_id"]:
keys += ["deposit_id", "replace", "swhid"]
data = client.deposit_update(**_subdict(config, keys))
else:
data = client.deposit_create(**_subdict(config, keys))
print_result(data, output_format)
@deposit.command()
@credentials_decorator
@click.option("--deposit-id", default=None, required=True, help="Deposit identifier.")
@output_format_decorator
@click.pass_context
def status(ctx, url, username, password, deposit_id, output_format):
"""Deposit's status"""
_init_django()
from swh.deposit.client import PublicApiDepositClient
url = _url(url)
logger.debug("Status deposit")
with trap_and_report_exceptions():
client = PublicApiDepositClient(url=_url(url), auth=(username, password))
collection = _collection(client)
print_result(
client.deposit_status(collection=collection, deposit_id=deposit_id),
output_format,
)
[docs]
def print_result(data: Dict[str, Any], output_format: Optional[str]) -> None:
"""Display the result data into a dedicated output format."""
import json
import yaml
if output_format == "json":
click.echo(json.dumps(data))
elif output_format == "yaml":
click.echo(yaml.dump(data))
else:
logger.info(data)
@deposit.command("metadata-only")
@credentials_decorator
@click.option(
"--metadata",
"metadata_path",
type=click.Path(exists=True),
required=True,
help="Path to xml metadata file",
)
@output_format_decorator
@click.pass_context
def metadata_only(ctx, url, username, password, metadata_path, output_format):
"""Deposit metadata only upload"""
from xml.etree import ElementTree
_init_django()
from swh.deposit.client import PublicApiDepositClient
from swh.deposit.utils import parse_swh_metadata_provenance, parse_swh_reference
# Parse to check for a swhid presence within the metadata file
with open(metadata_path, "r") as f:
raw_metadata = f.read()
metadata_tree = ElementTree.fromstring(raw_metadata)
actual_swhid = parse_swh_reference(metadata_tree)
if not actual_swhid:
raise InputError("A SWHID must be provided for a metadata-only deposit")
meta_prov_url = parse_swh_metadata_provenance(metadata_tree)
if not meta_prov_url:
logger.warning(
"A '<swh:metadata-provenance>' should be provided for a metadata-only "
"deposit"
)
with trap_and_report_exceptions():
client = PublicApiDepositClient(url=_url(url), auth=(username, password))
collection = _collection(client)
result = client.deposit_metadata_only(collection, metadata_path)
print_result(result, output_format)
@deposit.command("list")
@credentials_decorator
@output_format_decorator
@click.option(
"--page",
default=1,
help="Page number when requesting more information",
)
@click.option(
"--page-size",
default=100,
help="Page number when requesting more information",
)
@click.pass_context
def deposit_list(ctx, url, username, password, output_format, page, page_size):
"""Client deposit listing"""
_init_django()
from swh.deposit.client import PublicApiDepositClient
url = _url(url)
logger.debug("List deposits for user %s", username)
with trap_and_report_exceptions():
client = PublicApiDepositClient(url=_url(url), auth=(username, password))
collection = _collection(client)
result = client.deposit_list(collection, page=page, page_size=page_size)
print_result(result, output_format)