Source code for swh.core.github.pytest_plugin

# Copyright (C) 2020-2023  The Software Heritage developers
# See the AUTHORS file at the top-level directory of this distribution
# License: GNU General Public License version 3, or any later version
# See top-level LICENSE file for more information

import time
from typing import Dict, Iterator, List, Optional, Union

import pytest
import requests_mock

HTTP_GITHUB_API_URL = "https://api.github.com/repositories"



[docs]
def fake_time_sleep(duration: float, sleep_calls: Optional[List[float]] = None):
    """Record calls to time.sleep in the sleep_calls list."""
    if duration < 0:
        raise ValueError("Can't sleep for a negative amount of time!")
    if sleep_calls is not None:
        sleep_calls.append(duration)




[docs]
def fake_time_time():
    """Return 0 when running time.time()"""
    return 0




[docs]
@pytest.fixture
def monkeypatch_sleep_calls(monkeypatch) -> Iterator[List[float]]:
    """Monkeypatch `time.time` and `time.sleep`. Returns a list cumulating the arguments
    passed to time.sleep()."""
    sleeps: List[float] = []
    monkeypatch.setattr(time, "sleep", lambda d: fake_time_sleep(d, sleeps))
    monkeypatch.setattr(time, "time", fake_time_time)
    yield sleeps




[docs]
@pytest.fixture()
def num_before_ratelimit() -> int:
    """Number of successful requests before the ratelimit hits"""
    return 0




[docs]
@pytest.fixture()
def num_ratelimit() -> Optional[int]:
    """Number of rate-limited requests; None means infinity"""
    return None




[docs]
@pytest.fixture()
def ratelimit_reset() -> Optional[int]:
    """Value of the X-Ratelimit-Reset header on ratelimited responses"""
    return None




[docs]
def github_ratelimit_callback(
    request: requests_mock.request._RequestObjectProxy,
    context: requests_mock.response._Context,
    remaining_requests: int,
    ratelimit_reset: Optional[int],
) -> Dict[str, str]:
    """Return a rate-limited GitHub API response."""
    # Check request headers
    assert request.headers["Accept"] == "application/vnd.github.v3+json"
    assert request.headers["User-Agent"] is not None

    context.status_code = 403

    if ratelimit_reset is not None:
        context.headers["X-Ratelimit-Reset"] = str(ratelimit_reset)
    context.headers["X-Ratelimit-Remaining"] = str(remaining_requests)

    return {
        "message": "API rate limit exceeded for <IP>.",
        "documentation_url": (
            "https://docs.github.com/rest/overview/"
            "resources-in-the-rest-api#rate-limiting"
        ),
    }




[docs]
def github_repo(i: int) -> Dict[str, Union[int, str]]:
    """Basic repository information returned by the GitHub API"""

    repo: Dict[str, Union[int, str]] = {
        "id": i,
        "html_url": f"https://github.com/origin/{i}",
        "fork": False,
    }

    # Set the extra fields provided to the SWH GitHub lister on some repos
    if i == 4321:
        repo["pushed_at"] = "2018-11-08T13:16:24Z"

    return repo




[docs]
def github_response_callback(
    request: requests_mock.request._RequestObjectProxy,
    context: requests_mock.response._Context,
    remaining_requests: int,
    page_size: int = 1000,
    origin_count: int = 10000,
) -> List[Dict[str, Union[str, int]]]:
    """Return minimal GitHub API responses for the common case where the loader
    hasn't been rate-limited"""
    # Check request headers
    assert request.headers["Accept"] == "application/vnd.github.v3+json"
    assert request.headers["User-Agent"] is not None

    # Check request parameters: per_page == 1000, since = last_repo_id
    assert "per_page" in request.qs
    assert request.qs["per_page"] == [str(page_size)]
    assert "since" in request.qs

    since = int(request.qs["since"][0])

    next_page = since + page_size
    if next_page < origin_count:
        # the first id for the next page is within our origin count; add a Link
        # header to the response
        next_url = f"{HTTP_GITHUB_API_URL}?per_page={page_size}&since={next_page}"
        context.headers["Link"] = f"<{next_url}>; rel=next"
    context.headers["X-Ratelimit-Remaining"] = str(remaining_requests)

    return [github_repo(i) for i in range(since + 1, min(next_page, origin_count) + 1)]




[docs]
@pytest.fixture()
def github_requests_ratelimited(
    num_before_ratelimit: int,
    num_ratelimit: Optional[int],
    ratelimit_reset: Optional[int],
) -> Iterator[requests_mock.Mocker]:
    """Mock requests to the GitHub API, returning a rate-limiting status code after
    `num_before_ratelimit` requests.

    This fixture takes multiple arguments (which can be overridden with a
    :func:`pytest.mark.parametrize` parameter):

    - num_before_ratelimit: the global number of requests until the ratelimit triggers
    - num_ratelimit: the number of requests that return a rate-limited response.
    - ratelimit_reset: the timestamp returned in X-Ratelimit-Reset if the request is
      authenticated.

    The default values set in the previous fixtures make all requests return a rate
    limit response.

    """
    current_request = 0

    def response_callback(request, context):
        nonlocal current_request
        current_request += 1
        if num_before_ratelimit >= current_request:
            # case 1: not yet rate-limited
            return github_response_callback(
                request, context, (num_before_ratelimit or 1000) - current_request
            )
        elif (
            num_ratelimit is not None
            and current_request >= num_before_ratelimit + num_ratelimit + 1
        ):
            # case 3: no longer rate-limited
            return github_response_callback(
                request, context, (num_before_ratelimit + 1000) - current_request
            )
        else:
            # case 2: being rate-limited
            return github_ratelimit_callback(
                request,
                context,
                max(0, (num_before_ratelimit or 1000) - current_request),
                ratelimit_reset,
            )

    with requests_mock.Mocker() as mock:
        mock.get(HTTP_GITHUB_API_URL, json=response_callback)
        yield mock




[docs]
@pytest.fixture
def github_credentials() -> List[Dict[str, str]]:
    """Return a static list of GitHub credentials"""
    return sorted(
        [{"username": f"swh{i:d}", "token": f"token-{i:d}"} for i in range(3)]
        + [
            {"username": f"swh-legacy{i:d}", "password": f"token-legacy-{i:d}"}
            for i in range(3)
        ],
        key=lambda c: c["username"],
    )




[docs]
@pytest.fixture
def all_tokens(github_credentials) -> List[str]:
    """Return the list of tokens matching the static credential"""

    return [t.get("token", t.get("password")) for t in github_credentials]