Source code for swh.web.browse.views.revision

# Copyright (C) 2017-2022  The Software Heritage developers
# See the AUTHORS file at the top-level directory of this distribution
# License: GNU Affero General Public License version 3, or any later version
# See top-level LICENSE file for more information

import hashlib
import json
import textwrap
from typing import Any, Dict, List, Optional

from django.http import HttpRequest, HttpResponse, JsonResponse
from django.shortcuts import render
from django.utils.html import format_html
from django.utils.safestring import mark_safe

from swh.model.hashutil import hash_to_bytes
from swh.model.swhids import CoreSWHID, ObjectType
from swh.web.browse.browseurls import browse_route
from swh.web.browse.snapshot_context import get_snapshot_context
from swh.web.browse.utils import (
    content_display_max_size,
    format_log_entries,
    gen_link,
    gen_person_mail_link,
    gen_revision_url,
    get_directory_entries,
    get_readme_to_display,
    get_revision_log_url,
    prepare_content_for_display,
    request_content,
)
from swh.web.utils import (
    archive,
    format_utc_iso_date,
    gen_path_info,
    highlightjs,
    reverse,
    swh_object_icons,
)
from swh.web.utils.exc import NotFoundExc, http_status_code_message
from swh.web.utils.identifiers import get_swhids_info
from swh.web.utils.typing import RevisionMetadata, SnapshotContext, SWHObjectInfo


def _gen_content_url(
    revision: Dict[str, Any],
    query_string: str,
    path: str,
    snapshot_context: Optional[SnapshotContext],
) -> str:
    if snapshot_context:
        query_params = snapshot_context["query_params"]
        query_params["path"] = path
        query_params["revision"] = revision["id"]
        content_url = reverse("browse-origin-content", query_params=query_params)
    else:
        content_path = "%s/%s" % (revision["directory"], path)
        content_url = reverse(
            "browse-content",
            url_args={"query_string": query_string},
            query_params={"path": content_path},
        )
    return content_url


def _gen_diff_link(idx: int, diff_anchor: str, link_text: str) -> str:
    if idx < _max_displayed_file_diffs:
        return gen_link(diff_anchor, link_text)
    else:
        return link_text


# TODO: put in conf
_max_displayed_file_diffs = 1000


def _gen_revision_changes_list(
    revision: Dict[str, Any],
    changes: List[Dict[str, Any]],
    snapshot_context: Optional[SnapshotContext],
) -> str:
    """
    Returns a HTML string describing the file changes
    introduced in a revision.
    As this string will be displayed in the browse revision view,
    links to adequate file diffs are also generated.

    Args:
        revision (str): hexadecimal representation of a revision identifier
        changes (list): list of file changes in the revision
        snapshot_context (dict): optional origin context used to reverse
            the content urls

    Returns:
        A string to insert in a revision HTML view.

    """
    changes_msg = []
    for i, change in enumerate(changes):
        hasher = hashlib.sha1()
        from_query_string = ""
        to_query_string = ""
        diff_id = "diff-"
        if change["from"]:
            from_query_string = "sha1_git:" + change["from"]["target"]
            diff_id += change["from"]["target"] + "-" + change["from_path"]
        diff_id += "-"
        if change["to"]:
            to_query_string = "sha1_git:" + change["to"]["target"]
            diff_id += change["to"]["target"] + change["to_path"]
        change["path"] = change["to_path"] or change["from_path"]
        url_args = {
            "from_query_string": from_query_string,
            "to_query_string": to_query_string,
        }
        query_params = {"path": change["path"]}
        change["diff_url"] = reverse(
            "diff-contents", url_args=url_args, query_params=query_params
        )

        hasher.update(diff_id.encode("utf-8"))
        diff_id = hasher.hexdigest()
        change["id"] = diff_id
        diff_link = "#diff_" + diff_id

        if change["type"] == "modify":
            change["content_url"] = _gen_content_url(
                revision, to_query_string, change["to_path"], snapshot_context
            )
            changes_msg.append(
                "modified:  %s" % _gen_diff_link(i, diff_link, change["to_path"])
            )
        elif change["type"] == "insert":
            change["content_url"] = _gen_content_url(
                revision, to_query_string, change["to_path"], snapshot_context
            )
            changes_msg.append(
                "new file:  %s" % _gen_diff_link(i, diff_link, change["to_path"])
            )
        elif change["type"] == "delete":
            parent = archive.lookup_revision(revision["parents"][0])
            change["content_url"] = _gen_content_url(
                parent, from_query_string, change["from_path"], snapshot_context
            )
            changes_msg.append(
                "deleted:   %s" % _gen_diff_link(i, diff_link, change["from_path"])
            )
        elif change["type"] == "rename":
            change["content_url"] = _gen_content_url(
                revision, to_query_string, change["to_path"], snapshot_context
            )
            link_text = change["from_path"] + " &rarr; " + change["to_path"]
            changes_msg.append(
                "renamed:   %s" % _gen_diff_link(i, diff_link, link_text)
            )
    if not changes:
        changes_msg.append("No changes")
    return mark_safe("\n".join(changes_msg))


@browse_route(
    r"revision/(?P<sha1_git>[0-9a-f]+)/diff/",
    view_name="diff-revision",
    checksum_args=["sha1_git"],
)
def _revision_diff(request: HttpRequest, sha1_git: str) -> HttpResponse:
    """
    Browse internal endpoint to compute revision diff
    """
    revision = archive.lookup_revision(sha1_git)
    snapshot_context = None
    origin_url = request.GET.get("origin_url", None)
    if not origin_url:
        origin_url = request.GET.get("origin", None)
    timestamp = request.GET.get("timestamp", None)
    visit_id_str = request.GET.get("visit_id", None)
    visit_id = int(visit_id_str) if visit_id_str is not None else None
    if origin_url:
        snapshot_context = get_snapshot_context(
            origin_url=origin_url,
            timestamp=timestamp,
            visit_id=visit_id,
            visit_type=request.GET.get("visit_type"),
        )

    changes = archive.diff_revision(sha1_git)
    changes_msg = _gen_revision_changes_list(revision, changes, snapshot_context)

    diff_data = {
        "total_nb_changes": len(changes),
        "changes": changes[:_max_displayed_file_diffs],
        "changes_msg": changes_msg,
    }
    return JsonResponse(diff_data)


NB_LOG_ENTRIES = 100


[docs] @browse_route( r"revision/(?P<sha1_git>[0-9a-f]+)/log/", view_name="browse-revision-log", checksum_args=["sha1_git"], ) def revision_log_browse(request: HttpRequest, sha1_git: str) -> HttpResponse: """ Django view that produces an HTML display of the history log for a revision identified by its id. The url that points to it is :http:get:`/browse/revision/(sha1_git)/log/` """ origin_url = request.GET.get("origin_url") snapshot_id = request.GET.get("snapshot") snapshot_context = None if origin_url or snapshot_id: visit_id = int(request.GET.get("visit_id", 0)) snapshot_context = get_snapshot_context( snapshot_id=snapshot_id, origin_url=origin_url, timestamp=request.GET.get("timestamp"), visit_id=visit_id or None, branch_name=request.GET.get("branch"), release_name=request.GET.get("release"), revision_id=sha1_git, visit_type=request.GET.get("visit_type"), ) per_page = int(request.GET.get("per_page", NB_LOG_ENTRIES)) offset = int(request.GET.get("offset", 0)) revs_ordering = request.GET.get("revs_ordering", "committer_date") session_key = "rev_%s_log_ordering_%s" % (sha1_git, revs_ordering) rev_log_session = request.session.get(session_key, None) rev_log = [] revs_walker_state = None if rev_log_session: rev_log = rev_log_session["rev_log"] revs_walker_state = rev_log_session["revs_walker_state"] if len(rev_log) < offset + per_page: revs_walker = archive.get_revisions_walker( revs_ordering, sha1_git, max_revs=offset + per_page + 1, state=revs_walker_state, ) rev_log += [rev["id"] for rev in revs_walker] revs_walker_state = revs_walker.export_state() revs = rev_log[offset : offset + per_page] revision_log = archive.lookup_revision_multiple(revs) request.session[session_key] = { "rev_log": rev_log, "revs_walker_state": revs_walker_state, } revs_ordering = request.GET.get("revs_ordering", "") prev_log_url = None if len(rev_log) > offset + per_page: prev_log_url = reverse( "browse-revision-log", url_args={"sha1_git": sha1_git}, query_params={ "per_page": str(per_page), "offset": str(offset + per_page), "revs_ordering": revs_ordering or None, "origin_url": origin_url, "snapshot": snapshot_id, }, ) next_log_url = None if offset != 0: next_log_url = reverse( "browse-revision-log", url_args={"sha1_git": sha1_git}, query_params={ "per_page": str(per_page), "offset": str(offset - per_page), "revs_ordering": revs_ordering or None, "origin_url": origin_url, "snapshot": snapshot_id, }, ) revision_log_data = format_log_entries(revision_log, per_page, snapshot_context) swh_rev_id = str( CoreSWHID(object_type=ObjectType.REVISION, object_id=hash_to_bytes(sha1_git)) ) return render( request, "browse-revision-log.html", { "heading": "Revision history", "swh_object_id": swh_rev_id, "swh_object_name": "Revisions history", "swh_object_metadata": None, "revision_log": revision_log_data, "revs_ordering": revs_ordering, "next_log_url": next_log_url, "prev_log_url": prev_log_url, "breadcrumbs": None, "top_right_link": None, "snapshot_context": snapshot_context, "vault_cooking": None, "show_actions": True, "swhids_info": None, }, )
[docs] @browse_route( r"revision/(?P<sha1_git>[0-9a-f]+)/", view_name="browse-revision", checksum_args=["sha1_git"], ) def revision_browse(request: HttpRequest, sha1_git: str) -> HttpResponse: """ Django view that produces an HTML display of a revision identified by its id. The url that points to it is :http:get:`/browse/revision/(sha1_git)/`. """ revision = archive.lookup_revision(sha1_git) origin_info = None snapshot_context = None origin_url = request.GET.get("origin_url") if not origin_url: origin_url = request.GET.get("origin") timestamp = request.GET.get("timestamp") visit_id = int(request.GET.get("visit_id", 0)) snapshot_id = request.GET.get("snapshot_id") if not snapshot_id: snapshot_id = request.GET.get("snapshot") path = request.GET.get("path") dir_id = None dirs, files = [], [] content_data = {} if origin_url: try: snapshot_context = get_snapshot_context( snapshot_id=snapshot_id, origin_url=origin_url, timestamp=timestamp, visit_id=visit_id or None, branch_name=request.GET.get("branch"), release_name=request.GET.get("release"), revision_id=sha1_git, path=path, visit_type=request.GET.get("visit_type"), ) except NotFoundExc as e: raw_rev_url = reverse( "browse-revision", url_args={"sha1_git": sha1_git}, request=request ) error_message = format_html( "The Software Heritage archive has a revision " "with the hash you provided but the origin " "mentioned in your request appears broken: {}. " "Please check the URL and try again.\n\n" "Nevertheless, you can still browse the revision " "without origin information: {}", origin_url, raw_rev_url, ) if str(e).startswith("Origin"): raise NotFoundExc(error_message) else: raise e origin_info = snapshot_context["origin_info"] snapshot_id = snapshot_context["snapshot_id"] elif snapshot_id: snapshot_context = get_snapshot_context(snapshot_id) error_info: Dict[str, Any] = {"status_code": 200, "description": None} if path: try: file_info = archive.lookup_directory_with_path(revision["directory"], path) if file_info["type"] == "dir": dir_id = file_info["target"] else: query_string = "sha1_git:" + file_info["target"] content_data = request_content(query_string) except NotFoundExc as e: error_info["status_code"] = 404 error_info["description"] = f"NotFoundExc: {str(e)}" else: dir_id = revision["directory"] if dir_id: path = "" if path is None else (path + "/") dirs, files = get_directory_entries(dir_id) revision_metadata = RevisionMetadata( object_type=ObjectType.REVISION, object_id=sha1_git, revision=sha1_git, author=revision["author"]["fullname"] if revision["author"] else "None", author_url=( gen_person_mail_link(revision["author"]) if revision["author"] else "None" ), committer=( revision["committer"]["fullname"] if revision["committer"] else "None" ), committer_url=( gen_person_mail_link(revision["committer"]) if revision["committer"] else "None" ), committer_date=format_utc_iso_date(revision["committer_date"]), date=format_utc_iso_date(revision["date"]), directory=revision["directory"], merge=revision["merge"], metadata=json.dumps( revision["metadata"], sort_keys=True, indent=4, separators=(",", ": ") ), parents=revision["parents"], synthetic=revision["synthetic"], type=revision["type"], snapshot=snapshot_id, origin_url=origin_url, ) message_lines = ["None"] if revision["message"]: message_lines = revision["message"].split("\n") parents = [] for p in revision["parents"]: parent_url = gen_revision_url(p, snapshot_context) parents.append({"id": p, "url": parent_url}) path_info = gen_path_info(path) query_params = snapshot_context["query_params"] if snapshot_context else {} breadcrumbs = [] breadcrumbs.append( { "name": revision["directory"][:7], "url": reverse( "browse-revision", url_args={"sha1_git": sha1_git}, query_params=query_params, ), } ) for pi in path_info: query_params["path"] = pi["path"] breadcrumbs.append( { "name": pi["name"], "url": reverse( "browse-revision", url_args={"sha1_git": sha1_git}, query_params=query_params, ), } ) vault_cooking = { "directory_context": False, "directory_swhid": None, "revision_context": True, "revision_swhid": f"swh:1:rev:{sha1_git}", } swh_objects = [SWHObjectInfo(object_type=ObjectType.REVISION, object_id=sha1_git)] available_languages = None content = None content_size = None filename = None mimetype = None language = None readme_name = None readme_url = None readme_html = None readmes = {} extra_context = dict(revision_metadata) extra_context["path"] = f"/{path}" if path else None if content_data: breadcrumbs[-1]["url"] = None content_size = content_data["length"] mimetype = content_data["mimetype"] if content_data["raw_data"]: content_display_data = prepare_content_for_display( content_data["raw_data"], content_data["mimetype"], path ) content = content_display_data["content_data"] language = content_display_data["language"] mimetype = content_display_data["mimetype"] if mimetype and ( mimetype.startswith("text/") or ( mimetype.startswith("application/") and content_data.get("encoding", "") != "binary" ) ): available_languages = highlightjs.get_supported_languages() if path: filename = path_info[-1]["name"] query_params["filename"] = filename filepath = "/".join(pi["name"] for pi in path_info[:-1]) extra_context["path"] = f"/{filepath}/" if filepath else "/" extra_context["filename"] = filename if filename.endswith(".ipynb"): # disable language select dropdown when a notebook is rendered available_languages = None top_right_link = { "url": reverse( "browse-content-raw", url_args={"query_string": query_string}, query_params={"filename": filename}, ), "icon": swh_object_icons["content"], "text": "Raw File", } swh_objects.append( SWHObjectInfo(object_type=ObjectType.CONTENT, object_id=file_info["target"]) ) else: for d in dirs: if d["type"] == "rev": d["url"] = reverse( "browse-revision", url_args={"sha1_git": d["target"]} ) else: query_params["path"] = path + d["name"] d["url"] = reverse( "browse-revision", url_args={"sha1_git": sha1_git}, query_params=query_params, ) for f in files: query_params["path"] = path + f["name"] f["url"] = reverse( "browse-revision", url_args={"sha1_git": sha1_git}, query_params=query_params, ) if f["name"].lower().startswith("readme"): readmes[f["name"]] = f.get("target") readme_name, readme_url, readme_html = get_readme_to_display(readmes) top_right_link = { "url": get_revision_log_url(sha1_git, snapshot_context), "icon": swh_object_icons["revisions history"], "text": "History", } vault_cooking["directory_context"] = True vault_cooking["directory_swhid"] = f"swh:1:dir:{dir_id}" swh_objects.append( SWHObjectInfo(object_type=ObjectType.DIRECTORY, object_id=dir_id) ) query_params.pop("path", None) diff_revision_url = reverse( "diff-revision", url_args={"sha1_git": sha1_git}, query_params=query_params, ) if snapshot_id: swh_objects.append( SWHObjectInfo(object_type=ObjectType.SNAPSHOT, object_id=snapshot_id) ) swhids_info = get_swhids_info(swh_objects, snapshot_context, extra_context) heading = "Revision - %s - %s" % ( sha1_git[:7], textwrap.shorten(message_lines[0], width=70), ) if snapshot_context: context_found = "snapshot: %s" % snapshot_context["snapshot_id"] if origin_info: context_found = "origin: %s" % origin_info["url"] heading += " - %s" % context_found return render( request, "browse-revision.html", { "heading": heading, "swh_object_id": swhids_info[0]["swhid"], "swh_object_name": "Revision", "swh_object_metadata": revision_metadata, "message_header": message_lines[0], "message_body": "\n".join(message_lines[1:]), "parents": parents, "snapshot_context": snapshot_context, "dirs": dirs, "files": files, "content": content, "content_size": content_size, "max_content_size": content_display_max_size, "filename": filename, "encoding": content_data.get("encoding"), "mimetype": mimetype, "language": language, "readme_name": readme_name, "readme_url": readme_url, "readme_html": readme_html, "breadcrumbs": breadcrumbs, "top_right_link": top_right_link, "vault_cooking": vault_cooking, "diff_revision_url": diff_revision_url, "show_actions": True, "swhids_info": swhids_info, "error_code": error_info["status_code"], "error_message": http_status_code_message.get(error_info["status_code"]), "error_description": error_info["description"], "available_languages": available_languages, }, status=error_info["status_code"], )