Source code for swh.web.utils.query
# Copyright (C) 2015-2022 The Software Heritage developers
# See the AUTHORS file at the top-level directory of this distribution
# License: GNU Affero General Public License version 3, or any later version
# See top-level LICENSE file for more information
import re
from swh.model.hashutil import ALGORITHMS, hash_to_bytes
from swh.web.utils.exc import BadInputExc
SHA256_RE = re.compile(r"^[0-9a-f]{64}$", re.IGNORECASE)
SHA1_RE = re.compile(r"^[0-9a-f]{40}$", re.IGNORECASE)
[docs]
def parse_hash(q):
"""Detect the hash type of a user submitted query string.
Args:
query string with the following format: "[HASH_TYPE:]HEX_CHECKSUM",
where HASH_TYPE is optional, defaults to "sha1", and can be one of
swh.model.hashutil.ALGORITHMS
Returns:
A pair (hash_algorithm, byte hash value)
Raises:
ValueError if the given query string does not correspond to a valid
hash value
"""
def guess_algo(q):
if SHA1_RE.match(q):
return "sha1"
elif SHA256_RE.match(q):
return "sha256"
else:
raise BadInputExc("Invalid checksum query string %s" % q)
def check_algo(algo, hex):
if (algo in {"sha1", "sha1_git"} and not SHA1_RE.match(hex)) or (
algo == "sha256" and not SHA256_RE.match(hex)
):
raise BadInputExc("Invalid hash %s for algorithm %s" % (hex, algo))
parts = q.split(":")
if len(parts) > 2:
raise BadInputExc("Invalid checksum query string %s" % q)
elif len(parts) == 1:
parts = (guess_algo(q), q)
elif len(parts) == 2:
check_algo(parts[0], parts[1])
algo = parts[0]
if algo not in ALGORITHMS:
raise BadInputExc("Unknown hash algorithm %s" % algo)
return (algo, hash_to_bytes(parts[1]))
[docs]
def parse_hash_with_algorithms_or_throws(q, accepted_algo, error_msg):
"""Parse a query but only accepts accepted_algo.
Otherwise, raise the exception with message error_msg.
Args:
- q: query string with the following format: "[HASH_TYPE:]HEX_CHECKSUM"
where HASH_TYPE is optional, defaults to "sha1", and can be one of
swh.model.hashutil.ALGORITHMS.
- accepted_algo: array of strings representing the names of accepted
algorithms.
- error_msg: error message to raise as BadInputExc if the algo of
the query does not match.
Returns:
A pair (hash_algorithm, byte hash value)
Raises:
BadInputExc when the inputs is invalid or does not
validate the accepted algorithms.
"""
algo, hash = parse_hash(q)
if algo not in accepted_algo:
raise BadInputExc(error_msg)
return (algo, hash)