Source code for swh.web.browse.views.directory

# Copyright (C) 2017-2022  The Software Heritage developers
# See the AUTHORS file at the top-level directory of this distribution
# License: GNU Affero General Public License version 3, or any later version
# See top-level LICENSE file for more information

import os
from typing import Any, Dict, Optional

from django.http import HttpRequest, HttpResponse
from django.shortcuts import redirect, render
from django.utils.html import format_html

from swh.model.swhids import ObjectType
from swh.web.browse.browseurls import browse_route
from swh.web.browse.snapshot_context import get_snapshot_context
from swh.web.browse.utils import get_directory_entries, get_readme_to_display
from swh.web.utils import archive, gen_path_info, reverse, swh_object_icons
from swh.web.utils.exc import (
    NotFoundExc,
    http_status_code_message,
    sentry_capture_exception,
)
from swh.web.utils.identifiers import get_swhids_info
from swh.web.utils.typing import DirectoryMetadata, SWHObjectInfo


def _directory_browse(
    request: HttpRequest, sha1_git: str, path: Optional[str] = None
) -> HttpResponse:
    root_sha1_git = sha1_git
    dir_sha1_git: Optional[str] = sha1_git
    target_type = None
    error_info: Dict[str, Any] = {"status_code": 200, "description": None}
    if path:
        try:
            dir_info = archive.lookup_directory_with_path(sha1_git, path)
            dir_sha1_git = dir_info["target"]
            target_type = dir_info["type"]
        except NotFoundExc as e:
            error_info["status_code"] = 404
            error_info["description"] = f"NotFoundExc: {str(e)}"
            dir_sha1_git = None

    if target_type == "file":
        params = request.GET.dict()
        if "origin_url" not in params:
            # no origin context, prepend root directory id to path for
            # breadcrumbs navigation
            params["path"] = f"{sha1_git}/{path}"
        browse_content_url = reverse(
            "browse-content",
            url_args={"query_string": f"sha1_git:{dir_sha1_git}"},
            query_params=params,
        )
        return redirect(browse_content_url)

    dirs, files = [], []
    if dir_sha1_git is not None:
        dirs, files = get_directory_entries(dir_sha1_git)
    origin_url = request.GET.get("origin_url")
    if not origin_url:
        origin_url = request.GET.get("origin")
    snapshot_id = request.GET.get("snapshot")
    snapshot_context = None
    if origin_url is not None or snapshot_id is not None:
        try:
            snapshot_context = get_snapshot_context(
                snapshot_id=snapshot_id,
                origin_url=origin_url,
                branch_name=request.GET.get("branch"),
                release_name=request.GET.get("release"),
                revision_id=request.GET.get("revision"),
                path=path,
                visit_type=request.GET.get("visit_type"),
            )
        except NotFoundExc as e:
            if str(e).startswith("Origin") and origin_url is not None:
                raw_dir_url = reverse(
                    "browse-directory",
                    url_args={"sha1_git": dir_sha1_git},
                    request=request,
                )
                error_message = format_html(
                    "The Software Heritage archive has a directory "
                    "with the hash you provided but the origin "
                    "mentioned in your request appears broken: {}. "
                    "Please check the URL and try again.\n\n"
                    "Nevertheless, you can still browse the directory "
                    "without origin information: {}",
                    origin_url,
                    raw_dir_url,
                )
                raise NotFoundExc(error_message)
            else:
                raise e

    path_info = gen_path_info(path)

    query_params = snapshot_context["query_params"] if snapshot_context else {}

    breadcrumbs = []
    breadcrumbs.append(
        {
            "name": root_sha1_git[:7],
            "url": reverse(
                "browse-directory",
                url_args={"sha1_git": root_sha1_git},
                query_params={**query_params, "path": None},
            ),
        }
    )

    for pi in path_info:
        breadcrumbs.append(
            {
                "name": pi["name"],
                "url": reverse(
                    "browse-directory",
                    url_args={"sha1_git": root_sha1_git},
                    query_params={
                        **query_params,
                        "path": pi["path"],
                    },
                ),
            }
        )

    path = "" if path is None else (path + "/")

    for d in dirs:
        if d["type"] == "rev":
            d["url"] = reverse(
                "browse-revision",
                url_args={"sha1_git": d["target"]},
                query_params=query_params,
            )
        else:
            d["url"] = reverse(
                "browse-directory",
                url_args={"sha1_git": root_sha1_git},
                query_params={
                    **query_params,
                    "path": path + d["name"],
                },
            )

    sum_file_sizes = 0

    readmes = {}

    for f in files:
        query_string = "sha1_git:" + f["target"]
        f["url"] = reverse(
            "browse-content",
            url_args={"query_string": query_string},
            query_params={
                **query_params,
                "path": root_sha1_git + "/" + path + f["name"],
            },
        )
        if f["length"] is not None:
            sum_file_sizes += f["length"]
        if f["name"].lower().startswith("readme"):
            readmes[f["name"]] = f.get("target")

    readme_name, readme_url, readme_html = get_readme_to_display(readmes)

    dir_metadata = DirectoryMetadata(
        object_type=ObjectType.DIRECTORY,
        object_id=dir_sha1_git,
        directory=root_sha1_git,
        nb_files=len(files),
        nb_dirs=len(dirs),
        sum_file_sizes=sum_file_sizes,
        root_directory=root_sha1_git,
        path=f"/{path}" if path else None,
        revision=None,
        revision_found=None,
        release=None,
        snapshot=None,
    )

    vault_cooking = {
        "directory_context": True,
        "directory_swhid": f"swh:1:dir:{dir_sha1_git}",
        "revision_context": False,
        "revision_swhid": None,
    }

    swh_objects = [
        SWHObjectInfo(object_type=ObjectType.DIRECTORY, object_id=dir_sha1_git)
    ]

    if snapshot_context:
        if snapshot_context["revision_id"]:
            swh_objects.append(
                SWHObjectInfo(
                    object_type=ObjectType.REVISION,
                    object_id=snapshot_context["revision_id"],
                )
            )
        swh_objects.append(
            SWHObjectInfo(
                object_type=ObjectType.SNAPSHOT,
                object_id=snapshot_context["snapshot_id"],
            )
        )
        if snapshot_context["release_id"]:
            swh_objects.append(
                SWHObjectInfo(
                    object_type=ObjectType.RELEASE,
                    object_id=snapshot_context["release_id"],
                )
            )

    swhids_info = get_swhids_info(swh_objects, snapshot_context, dir_metadata)

    heading = "Directory - %s" % dir_sha1_git
    if breadcrumbs:
        dir_path = "/".join([bc["name"] for bc in breadcrumbs]) + "/"
        heading += " - %s" % dir_path

    top_right_link = None
    if (
        snapshot_context is not None
        and not snapshot_context["is_empty"]
        and snapshot_context["revision_id"] is not None
    ):
        history_url = reverse(
            "browse-revision-log",
            url_args={"sha1_git": snapshot_context["revision_id"]},
            query_params=query_params,
        )
        top_right_link = {
            "url": history_url,
            "icon": swh_object_icons["revisions history"],
            "text": "History",
        }

    return render(
        request,
        "browse-directory.html",
        {
            "heading": heading,
            "swh_object_id": swhids_info[0]["swhid"],
            "swh_object_name": "Directory",
            "swh_object_metadata": dir_metadata,
            "dirs": dirs,
            "files": files,
            "breadcrumbs": breadcrumbs,
            "top_right_link": top_right_link,
            "readme_name": readme_name,
            "readme_url": readme_url,
            "readme_html": readme_html,
            "snapshot_context": snapshot_context,
            "vault_cooking": vault_cooking,
            "show_actions": True,
            "swhids_info": swhids_info,
            "error_code": error_info["status_code"],
            "error_message": http_status_code_message.get(error_info["status_code"]),
            "error_description": error_info["description"],
        },
        status=error_info["status_code"],
    )


[docs] @browse_route( r"directory/(?P<sha1_git>[0-9a-f]+)/", view_name="browse-directory", checksum_args=["sha1_git"], ) def directory_browse(request: HttpRequest, sha1_git: str) -> HttpResponse: """Django view for browsing the content of a directory identified by its sha1_git value. The url that points to it is :http:get:`/browse/directory/(sha1_git)/` """ return _directory_browse(request, sha1_git, request.GET.get("path"))
[docs] @browse_route( r"directory/(?P<sha1_git>[0-9a-f]+)/(?P<path>.+)/", view_name="browse-directory-legacy", checksum_args=["sha1_git"], ) def directory_browse_legacy( request: HttpRequest, sha1_git: str, path: str ) -> HttpResponse: """Django view for browsing the content of a directory identified by its sha1_git value. The url that points to it is :http:get:`/browse/directory/(sha1_git)/(path)/` """ return _directory_browse(request, sha1_git, path)
@browse_route( r"directory/resolve/content-path/(?P<sha1_git>[0-9a-f]+)/", view_name="browse-directory-resolve-content-path", checksum_args=["sha1_git"], ) def _directory_resolve_content_path( request: HttpRequest, sha1_git: str ) -> HttpResponse: """ Internal endpoint redirecting to data url for a specific file path relative to a root directory. """ try: path = os.path.normpath(request.GET.get("path", "")) if not path.startswith("../"): dir_info = archive.lookup_directory_with_path(sha1_git, path) if dir_info["type"] == "file": sha1 = dir_info["checksums"]["sha1"] data_url = reverse( "browse-content-raw", url_args={"query_string": sha1} ) return redirect(data_url) except Exception as exc: sentry_capture_exception(exc) return HttpResponse(status=404)