Source code for swh.web.browse.views.iframe

# Copyright (C) 2021-2025  The Software Heritage developers
# See the AUTHORS file at the top-level directory of this distribution
# License: GNU Affero General Public License version 3, or any later version
# See top-level LICENSE file for more information

from typing import Any, Dict, List, Optional, Tuple
from urllib.parse import quote

from django.shortcuts import render
from django.urls import path as url
from django.views.decorators.clickjacking import xframe_options_exempt

from swh.model.hashutil import hash_to_bytes
from swh.model.swhids import CoreSWHID, ObjectType, QualifiedSWHID
from swh.web.browse.browseurls import browse_route
from swh.web.browse.snapshot_context import get_snapshot_context
from swh.web.browse.utils import (
    content_display_max_size,
    get_directory_entries,
    prepare_content_for_display,
    request_content,
)
from swh.web.utils import archive, gen_path_info, reverse
from swh.web.utils.exc import BadInputExc, NotFoundExc, http_status_code_message
from swh.web.utils.identifiers import get_qualified_swhid, get_swhids_info
from swh.web.utils.typing import SnapshotContext, SWHObjectInfo


def _get_content_rendering_data(cnt_swhid: QualifiedSWHID, path: str) -> Dict[str, Any]:
    content_data = request_content(f"sha1_git:{cnt_swhid.object_id.hex()}")
    content = None
    language = None
    mimetype = None
    if content_data.get("raw_data") is not None:
        content_display_data = prepare_content_for_display(
            content_data["raw_data"], content_data["mimetype"], path
        )
        content = content_display_data["content_data"]
        language = content_display_data["language"]
        mimetype = content_display_data["mimetype"]

    return {
        "content": content,
        "content_size": content_data.get("length"),
        "max_content_size": content_display_max_size,
        "filename": path.split("/")[-1],
        "encoding": content_data.get("encoding"),
        "mimetype": mimetype,
        "language": language,
    }


def _get_directory_rendering_data(
    dir_swhid: QualifiedSWHID,
    focus_swhid: QualifiedSWHID,
    path: str,
) -> Dict[str, Any]:
    dirs, files = get_directory_entries(dir_swhid.object_id.hex())
    for d in dirs:
        if d["type"] == "rev":
            d["url"] = None
        else:
            dir_swhid = QualifiedSWHID(
                object_type=ObjectType.DIRECTORY,
                object_id=hash_to_bytes(d["target"]),
                origin=dir_swhid.origin,
                visit=dir_swhid.visit,
                anchor=dir_swhid.anchor,
                path=(path or "/") + d["name"] + "/",
            )
            d["url"] = reverse(
                "browse-swhid-iframe",
                url_args={"swhid": str(dir_swhid)},
                query_params={"focus_swhid": str(focus_swhid)},
            )

    for f in files:
        object_id = hash_to_bytes(f["target"])
        cnt_swhid = QualifiedSWHID(
            object_type=ObjectType.CONTENT,
            object_id=object_id,
            origin=dir_swhid.origin,
            visit=dir_swhid.visit,
            anchor=dir_swhid.anchor,
            path=(path or "/") + f["name"],
            lines=(focus_swhid.lines if object_id == focus_swhid.object_id else None),
        )
        f["url"] = reverse(
            "browse-swhid-iframe",
            url_args={"swhid": str(cnt_swhid)},
            query_params={"focus_swhid": str(focus_swhid)},
        )

    return {"dirs": dirs, "files": files}


def _get_breacrumbs_data(
    swhid: QualifiedSWHID,
    focus_swhid: QualifiedSWHID,
    path: str,
    snapshot_context: Optional[SnapshotContext] = None,
) -> Tuple[List[Dict[str, Any]], Optional[str]]:
    breadcrumbs = []
    filename = None
    # strip any leading or trailing slash from path qualifier of SWHID
    if path and path[0] == "/":
        path = path[1:]
    if path and path[-1] == "/":
        path = path[:-1]
    if swhid.object_type == ObjectType.CONTENT:
        split_path = path.split("/")
        filename = split_path[-1]
        path = path[: -len(filename)]

    path_info = gen_path_info(path) if path != "/" else []

    root_dir = None
    if snapshot_context and snapshot_context["root_directory"]:
        root_dir = snapshot_context["root_directory"]
    elif swhid.anchor and swhid.anchor.object_type == ObjectType.DIRECTORY:
        root_dir = swhid.anchor.object_id.hex()
    elif focus_swhid.object_type == ObjectType.DIRECTORY:
        root_dir = focus_swhid.object_id.hex()

    if root_dir:
        root_dir_swhid = QualifiedSWHID(
            object_type=ObjectType.DIRECTORY,
            object_id=hash_to_bytes(root_dir),
            origin=swhid.origin,
            visit=swhid.visit,
            anchor=swhid.anchor,
        )

        breadcrumbs.append(
            {
                "name": root_dir[:7],
                "object_id": root_dir_swhid.object_id.hex(),
                "path": "/",
                "url": reverse(
                    "browse-swhid-iframe",
                    url_args={"swhid": str(root_dir_swhid)},
                    query_params={
                        "focus_swhid": (
                            str(focus_swhid) if focus_swhid != root_dir_swhid else None
                        )
                    },
                ),
            }
        )

        for pi in path_info:
            dir_info = archive.lookup_directory_with_path(root_dir, pi["path"])
            dir_swhid = QualifiedSWHID(
                object_type=ObjectType.DIRECTORY,
                object_id=hash_to_bytes(dir_info["target"]),
                origin=swhid.origin,
                visit=swhid.visit,
                anchor=swhid.anchor,
                path="/" + pi["path"] + "/",
            )
            breadcrumbs.append(
                {
                    "name": pi["name"],
                    "object_id": dir_swhid.object_id.hex(),
                    "path": dir_swhid.path.decode("utf-8") if dir_swhid.path else "",
                    "url": reverse(
                        "browse-swhid-iframe",
                        url_args={"swhid": str(dir_swhid)},
                        query_params={"focus_swhid": str(focus_swhid)},
                    ),
                }
            )
    if filename:
        breadcrumbs.append(
            {
                "name": filename,
                "object_id": swhid.object_id.hex(),
                "path": path,
                "url": "",
            }
        )

    return breadcrumbs, root_dir



[docs]
@browse_route(
    "embed/<swhid:swhid>/",
    view_name="browse-swhid-iframe",
)
@xframe_options_exempt
def swhid_iframe(request, swhid: str):
    """Django view that can be embedded in an iframe to display objects archived
    by Software Heritage (currently contents and directories) in a minimalist
    Web UI.
    """
    focus_swhid = request.GET.get("focus_swhid", swhid)
    parsed_swhid = None
    view_data = {}
    breadcrumbs: List[Dict[str, Any]] = []
    swh_objects = []
    snapshot_context = None
    swhids_info_extra_context = {}
    archive_link = None

    try:
        parsed_focus_swhid = get_qualified_swhid(focus_swhid)
    except BadInputExc:
        focus_swhid = swhid
        parsed_focus_swhid = get_qualified_swhid(swhid)

    try:
        parsed_swhid = get_qualified_swhid(swhid)

        path = parsed_swhid.path.decode("utf-8") if parsed_swhid.path else ""

        snapshot_context = None
        revision_id = None
        if (
            parsed_swhid.anchor
            and parsed_swhid.anchor.object_type == ObjectType.REVISION
        ):
            revision_id = parsed_swhid.anchor.object_id.hex()
        if parsed_swhid.origin or parsed_swhid.visit:
            snapshot_context = get_snapshot_context(
                origin_url=parsed_swhid.origin,
                snapshot_id=(
                    parsed_swhid.visit.object_id.hex() if parsed_swhid.visit else None
                ),
                revision_id=revision_id,
            )

        error_info: Dict[str, Any] = {"status_code": 200, "description": ""}

        if parsed_swhid and parsed_swhid.object_type == ObjectType.CONTENT:
            view_data = _get_content_rendering_data(parsed_swhid, path)
            swh_objects.append(
                SWHObjectInfo(
                    object_type=ObjectType.CONTENT,
                    object_id=parsed_swhid.object_id.hex(),
                )
            )

        elif parsed_swhid and parsed_swhid.object_type == ObjectType.DIRECTORY:
            view_data = _get_directory_rendering_data(
                parsed_swhid, parsed_focus_swhid, path
            )
            swh_objects.append(
                SWHObjectInfo(
                    object_type=ObjectType.DIRECTORY,
                    object_id=parsed_swhid.object_id.hex(),
                )
            )

        elif parsed_swhid:
            error_info = {
                "status_code": 400,
                "description": (
                    f"Objects of type {parsed_swhid.object_type} are not supported"
                ),
            }

        swhids_info_extra_context["path"] = path
        if parsed_swhid and view_data:
            breadcrumbs, root_dir = _get_breacrumbs_data(
                parsed_swhid, parsed_focus_swhid, path, snapshot_context
            )

            if parsed_swhid.object_type == ObjectType.CONTENT and len(breadcrumbs) > 1:
                swh_objects.append(
                    SWHObjectInfo(
                        object_type=ObjectType.DIRECTORY,
                        object_id=breadcrumbs[-2]["object_id"],
                    )
                )
                swhids_info_extra_context["path"] = breadcrumbs[-2]["path"]
                swhids_info_extra_context["filename"] = breadcrumbs[-1]["name"]

            if snapshot_context:
                swh_objects.append(
                    SWHObjectInfo(
                        object_type=ObjectType.REVISION,
                        object_id=snapshot_context["revision_id"] or "",
                    )
                )
                swh_objects.append(
                    SWHObjectInfo(
                        object_type=ObjectType.SNAPSHOT,
                        object_id=snapshot_context["snapshot_id"] or "",
                    )
                )

            archive_link = reverse(
                "browse-swhid", url_args={"swhid": quote(swhid, safe=":;=/")}
            )
            if (
                parsed_swhid.origin is None
                and parsed_swhid.visit is None
                and parsed_swhid.anchor is None
                and root_dir is not None
            ):
                # qualifier values cannot be used to get root directory from them,
                # we need to add it as anchor in the SWHID argument of the archive link
                root_dir_swhid = CoreSWHID(
                    object_type=ObjectType.DIRECTORY, object_id=hash_to_bytes(root_dir)
                )
                archive_swhid = str(
                    QualifiedSWHID(
                        object_type=parsed_swhid.object_type,
                        object_id=parsed_swhid.object_id,
                        path=parsed_swhid.path,
                        anchor=root_dir_swhid,
                    )
                )
                archive_link = reverse(
                    "browse-swhid",
                    url_args={"swhid": quote(archive_swhid, safe=":;=/")},
                )

    except BadInputExc as e:
        error_info = {"status_code": 400, "description": f"BadInputExc: {str(e)}"}
    except NotFoundExc as e:
        error_info = {"status_code": 404, "description": f"NotFoundExc: {str(e)}"}
    except Exception as e:
        error_info = {"status_code": 500, "description": str(e)}

    return render(
        request,
        "browse-iframe.html",
        {
            **view_data,
            "iframe_mode": True,
            "object_type": parsed_swhid.object_type.value if parsed_swhid else None,
            "lines": parsed_swhid.lines if parsed_swhid else None,
            "breadcrumbs": breadcrumbs,
            "swhid": swhid,
            "focus_swhid": focus_swhid,
            "archive_link": archive_link,
            "error_code": error_info["status_code"],
            "error_message": http_status_code_message.get(error_info["status_code"]),
            "error_description": error_info["description"],
            "snapshot_context": None,
            "swhids_info": get_swhids_info(
                swh_objects, snapshot_context, swhids_info_extra_context
            ),
        },
        status=error_info["status_code"],
    )



urlpatterns = [
    url(
        "embed/<swhid:swhid>/",
        swhid_iframe,
        name="browse-swhid-iframe",
    ),
]