Source code for swh.graph.config

# Copyright (C) 2019-2025  The Software Heritage developers
# See the AUTHORS file at the top-level directory of this distribution
# License: GNU General Public License version 3, or any later version
# See top-level LICENSE file for more information

import logging
import os
from pathlib import Path
from typing import Any, Optional

# WARNING: do not import unnecessary things here to keep cli startup time under
# control
import psutil

logger = logging.getLogger(__name__)


[docs] def check_config( conf: dict[str, Any], base_rust_executable_dir: Optional[Path] = None ) -> dict[str, Any]: """Check configuration and propagate defaults. Arguments: base_rust_executable_dir: path to the directory that contains the local project's Rust build artifact, ie. :file:`target/`.""" conf = conf.copy() if "batch_size" not in conf: # Use 0.1% of the RAM as a batch size: # ~1 billion for big servers, ~10 million for small desktop machines conf["batch_size"] = min(int(psutil.virtual_memory().total / 1000), 2**30 - 1) logger.debug("batch_size not configured, defaulting to %s", conf["batch_size"]) if "llp_gammas" not in conf: conf["llp_gammas"] = "-1,-2,-3,-4" logger.debug("llp_gammas not configured, defaulting to %s", conf["llp_gammas"]) # rust related config entries debug_mode = ( os.environ.get("PYTEST_VERSION") is not None or conf.get("profile") == "debug" or ( "rust_executable_dir" in conf and Path(conf["rust_executable_dir"]).name == "debug" ) ) if "profile" not in conf: conf["profile"] = "debug" if debug_mode else "release" if "rust_executable_dir" not in conf: # look for a target/ directory in the sources root directory if base_rust_executable_dir is None: # in editable installs, __file__ is a symlink to the original file in # the source directory, which is where in the end the rust sources and # executable are. So resolve the symlink before looking for the target/ # directory relative to the actual python file. path = Path(__file__).resolve() base_rust_executable_dir = path.parent.parent.parent / "target" conf["rust_executable_dir"] = str(base_rust_executable_dir / conf["profile"]) if not conf["rust_executable_dir"].endswith("/"): conf["rust_executable_dir"] += "/" if "object_types" not in conf: conf["object_types"] = "*" return conf
[docs] def check_config_compress( config, graph_name, in_dir, out_dir, sensitive_in_dir, sensitive_out_dir, check_flavor, ): """check compression-specific configuration and initialize its execution environment. """ conf = check_config(config) def _retrieve_value(value, name, default_value=None, is_path=False): if value is not None: value = str(value) if isinstance(value, Path) else value elif name in conf: value = conf[name] elif default_value is not None: value = ( str(default_value) if isinstance(default_value, Path) else default_value ) else: raise ValueError(f"No {name} provided.") conf[name] = value return Path(value) if is_path else value graph_name = _retrieve_value(graph_name, "graph_name") in_dir = _retrieve_value(in_dir, "in_dir", is_path=True) out_dir = _retrieve_value(out_dir, "out_dir", is_path=True) check_flavor = _retrieve_value(check_flavor, "check_flavor") out_dir.mkdir(parents=True, exist_ok=True) if sensitive_in_dir is not None: conf["sensitive_in_dir"] = str(sensitive_in_dir) if sensitive_out_dir is not None: Path(sensitive_out_dir).mkdir(parents=True, exist_ok=True) conf["sensitive_out_dir"] = str(sensitive_out_dir) if "tmp_dir" not in conf: tmp_dir = out_dir / "tmp" conf["tmp_dir"] = str(tmp_dir) else: tmp_dir = Path(conf["tmp_dir"]) tmp_dir.mkdir(parents=True, exist_ok=True) if check_flavor is None: check_flavor = conf.get("check_flavor", "full") conf["check_flavor"] = check_flavor if conf["check_flavor"] not in [ "full", "history_hosting", "staging", "example", "none", ]: raise ValueError( f"Unsupported check flavor: {check_flavor}." "Must be one of full, history_hosting, staging, example or none." ) return conf