Source code for swh.web.api.views.identifiers
# Copyright (C) 2018-2022 The Software Heritage developers
# See the AUTHORS file at the top-level directory of this distribution
# License: GNU Affero General Public License version 3, or any later version
# See top-level LICENSE file for more information
from typing import Dict, Set
from rest_framework.request import Request
from swh.model.hashutil import hash_to_bytes, hash_to_hex
from swh.model.swhids import ObjectType
from swh.web.api.apidoc import api_doc, format_docstring
from swh.web.api.apiurls import api_route
from swh.web.utils import archive
from swh.web.utils.exc import LargePayloadExc
from swh.web.utils.identifiers import group_swhids, parse_core_swhid, resolve_swhid
[docs]
@api_route(r"/resolve/(?P<swhid>.+)/", "api-1-resolve-swhid")
@api_doc("/resolve/", category="Archive")
@format_docstring()
def api_resolve_swhid(request: Request, swhid: str):
"""
.. http:get:: /api/1/resolve/(swhid)/
Resolve a SoftWare Hash IDentifier (SWHID)
Try to resolve a provided `SoftWare Hash IDentifier
<https://docs.softwareheritage.org/devel/swh-model/persistent-identifiers.html>`_
into an url for browsing the pointed archive object.
If the provided identifier is valid, the existence of the object in
the archive will also be checked.
:param string swhid: a SoftWare Hash IDentifier
:>json string browse_url: the url for browsing the pointed object
:>json object metadata: object holding optional parts of the SWHID
:>json string namespace: the SWHID namespace
:>json string object_id: the hash identifier of the pointed object
:>json string object_type: the type of the pointed object
:>json number scheme_version: the scheme version of the SWHID
{common_headers}
:statuscode 200: no error
:statuscode 400: an invalid SWHID has been provided
:statuscode 404: the pointed object does not exist in the archive
**Example:**
.. parsed-literal::
:swh_web_api:`resolve/swh:1:rev:96db9023b881d7cd9f379b0c154650d6c108e9a3;origin=https://github.com/openssl/openssl/`
"""
# try to resolve the provided swhid
swhid_resolved = resolve_swhid(swhid)
# id is well-formed, now check that the pointed
# object is present in the archive, NotFoundExc
# will be raised otherwise
swhid_parsed = swhid_resolved["swhid_parsed"]
object_type = swhid_parsed.object_type
object_id = hash_to_hex(swhid_parsed.object_id)
archive.lookup_object(swhid_parsed.object_type, object_id)
# id is well-formed and the pointed object exists
return {
"namespace": swhid_parsed.namespace,
"scheme_version": swhid_parsed.scheme_version,
"object_type": object_type.name.lower(),
"object_id": object_id,
"metadata": swhid_parsed.qualifiers(),
"browse_url": request.build_absolute_uri(swhid_resolved["browse_url"]),
}
[docs]
@api_route(r"/known/", "api-1-known", methods=["POST"])
@api_doc("/known/", category="Archive")
@format_docstring()
def api_swhid_known(request: Request):
"""
.. http:post:: /api/1/known/
Check if a list of objects are present in the Software Heritage
archive.
The objects to check existence must be provided using
`SoftWare Hash IDentifiers
<https://docs.softwareheritage.org/devel/swh-model/persistent-identifiers.html>`_.
:<jsonarr string -: input array of SWHIDs, its length cannot exceed 1000.
:>json object <swhid>: an object whose keys are input SWHIDs and values
objects with the following keys:
* **known (bool)**: whether the object was found
{common_headers}
:statuscode 200: no error
:statuscode 400: an invalid SWHID was provided
:statuscode 413: the input array of SWHIDs is too large
"""
limit = 1000
if len(request.data) > limit:
raise LargePayloadExc(
"The maximum number of SWHIDs this endpoint can receive is %s" % limit
)
swhids = [parse_core_swhid(swhid) for swhid in request.data]
response = {str(swhid): {"known": False} for swhid in swhids}
# group swhids by their type
swhids_by_type = group_swhids(swhids)
# search for hashes not present in the storage
missing_hashes: Dict[ObjectType, Set[bytes]] = {
k: set(map(hash_to_bytes, archive.lookup_missing_hashes({k: v})))
for k, v in swhids_by_type.items()
}
for swhid in swhids:
if swhid.object_id not in missing_hashes[swhid.object_type]:
response[str(swhid)]["known"] = True
return response