Source code for swh.storage.algos.directory
# Copyright (C) 2022-2023 The Software Heritage developers
# See the AUTHORS file at the top-level directory of this distribution
# License: GNU General Public License version 3, or any later version
# See top-level LICENSE file for more information
from typing import Iterable, List, Optional, Tuple
from swh.core.api.classes import stream_results_optional
from swh.model.model import Directory, DirectoryEntry, Sha1Git
from swh.storage.interface import StorageInterface
[docs]
def directory_get(
storage: StorageInterface, directory_id: Sha1Git
) -> Optional[Directory]:
"""Get all the entries for a given directory
Args:
storage: the storage instance
directory_id: the directory's identifier
Returns:
The directory if it could be properly put back together.
"""
entries: Optional[Iterable[DirectoryEntry]] = stream_results_optional(
storage.directory_get_entries,
directory_id=directory_id,
)
if entries is None:
return None
return Directory(
id=directory_id,
entries=tuple(entries),
raw_manifest=storage.directory_get_raw_manifest([directory_id])[directory_id],
)
[docs]
def directory_get_many(
storage: StorageInterface, directory_ids: List[Sha1Git]
) -> Iterable[Optional[Directory]]:
"""Same as :func:`directory_get`, but fetches directories slightly more
effectively by batching requests to ``directory_get_raw_manifest``.
Args:
storage: the storage instance
directory_ids: the directories' identifiers
Yields:
The directories which could be properly put back together
"""
raw_manifests = storage.directory_get_raw_manifest(directory_ids)
for directory_id in directory_ids:
if directory_id not in raw_manifests:
yield None
else:
entries = stream_results_optional(
storage.directory_get_entries,
directory_id=directory_id,
)
assert entries, f"Directory {directory_id.hex()} stopped existing"
yield Directory(
id=directory_id,
entries=tuple(entries),
raw_manifest=raw_manifests[directory_id],
)
[docs]
def directory_get_many_with_possibly_duplicated_entries(
storage: StorageInterface, directory_ids: List[Sha1Git]
) -> Iterable[Optional[Tuple[bool, Directory]]]:
"""Same as :func:`directory_get_many`, but does not error on directories whose
entries may contain duplicated names.
See :meth:`swh.model.model.Directory.from_possibly_duplicated_entries`.
Args:
storage: the storage instance
directory_ids: the directories' identifiers
Yields:
``(is_corrupt, directory)`` where ``is_corrupt`` is True iff some
entry names were indeed duplicated
"""
raw_manifests = storage.directory_get_raw_manifest(directory_ids)
for directory_id in directory_ids:
if directory_id not in raw_manifests:
yield None
else:
entries = stream_results_optional(
storage.directory_get_entries,
directory_id=directory_id,
)
assert entries, f"Directory {directory_id.hex()} stopped existing"
yield Directory.from_possibly_duplicated_entries(
id=directory_id,
entries=tuple(entries),
raw_manifest=raw_manifests[directory_id],
)