Source code for swh.lister.utils
# Copyright (C) 2018-2023 the Software Heritage developers
# License: GNU General Public License version 3, or any later version
# See top-level LICENSE file for more information
from typing import Iterator, Optional, Tuple
import urllib.parse
[docs]def split_range(total_pages: int, nb_pages: int) -> Iterator[Tuple[int, int]]:
"""Split `total_pages` into mostly `nb_pages` ranges. In some cases, the last range can
have one more element.
>>> list(split_range(19, 10))
[(0, 9), (10, 19)]
>>> list(split_range(20, 3))
[(0, 2), (3, 5), (6, 8), (9, 11), (12, 14), (15, 17), (18, 20)]
>>> list(split_range(21, 3))
[(0, 2), (3, 5), (6, 8), (9, 11), (12, 14), (15, 17), (18, 21)]
"""
prev_index = None
for index in range(0, total_pages, nb_pages):
if index is not None and prev_index is not None:
yield prev_index, index - 1
prev_index = index
if index != total_pages:
yield index, total_pages
[docs]def is_valid_origin_url(url: Optional[str]) -> bool:
"""Returns whether the given string is a valid origin URL.
This excludes Git SSH URLs and pseudo-URLs (eg. ``ssh://git@example.org:foo``
and ``git@example.org:foo``), as they are not supported by the Git loader
and usually require authentication.
All HTTP URLs are allowed:
>>> is_valid_origin_url("http://example.org/repo.git")
True
>>> is_valid_origin_url("http://example.org/repo")
True
>>> is_valid_origin_url("https://example.org/repo")
True
>>> is_valid_origin_url("https://foo:bar@example.org/repo")
True
Scheme-less URLs are rejected;
>>> is_valid_origin_url("example.org/repo")
False
>>> is_valid_origin_url("example.org:repo")
False
Git SSH URLs and pseudo-URLs are rejected:
>>> is_valid_origin_url("git@example.org:repo")
False
>>> is_valid_origin_url("ssh://git@example.org:repo")
False
"""
if not url:
# Empty or None
return False
parsed = urllib.parse.urlparse(url)
if not parsed.netloc:
# Is parsed as a relative URL
return False
if parsed.scheme == "ssh":
# Git SSH URL
return False
return True