Source code for swh.vault.cookers.utils

# Copyright (C) 2019  The Software Heritage developers
# See the AUTHORS file at the top-level directory of this distribution
# License: GNU General Public License version 3, or any later version
# See top-level LICENSE file for more information

from swh.storage.algos.revisions_walker import get_revisions_walker


[docs]def revision_log(storage, rev_id, per_page=1000): """Retrieve a revision log in a paginated way in order to avoid storage timeouts when the total number of revisions to fetch is large. Args: storage (swh.storage.storage.Storage): instance of swh storage (either local or remote) rev_id (bytes): a revision identifier per_page (Optional[int]): the maximum number of revisions to return in each page Yields: dict: Revision information as a dictionary """ rw_state = {} nb_revs = 0 max_revs = per_page while True: # Get an iterator returning the commits log from rev_id. # At most max_revs visited revisions from rev_id in the commits graph # will be returned. revs_walker = get_revisions_walker( "bfs", storage, rev_id, max_revs=max_revs, state=rw_state ) # Iterate on at most per_page revisions in the commits log. for rev in revs_walker: nb_revs += 1 yield rev # If the total number of iterated revisions is lesser than the # maximum requested one, it means that we hit the initial revision # in the log. if nb_revs < max_revs: break # Backup iterator state to continue the revisions iteration # from where we left it. rw_state = revs_walker.export_state() # Increment the maximum of revisions to iterate from rev_id # to get next revisions in the log. max_revs += per_page