Source code for swh.indexer.metadata_dictionary.python
# Copyright (C) 2018-2021 The Software Heritage developers
# See the AUTHORS file at the top-level directory of this distribution
# License: GNU General Public License version 3, or any later version
# See top-level LICENSE file for more information
import email.parser
import email.policy
from rdflib import BNode, Literal, URIRef
from swh.indexer.codemeta import CROSSWALK_TABLE
from swh.indexer.namespaces import RDF, SCHEMA
from .base import DictMapping, SingleFileIntrinsicMapping
from .utils import add_list
_normalize_pkginfo_key = str.lower
[docs]
class LinebreakPreservingEmailPolicy(email.policy.EmailPolicy):
[docs]
class PythonPkginfoMapping(DictMapping, SingleFileIntrinsicMapping):
"""Dedicated class for Python's PKG-INFO mapping and translation.
https://www.python.org/dev/peps/pep-0314/"""
name = "pkg-info"
filename = b"PKG-INFO"
mapping = {
_normalize_pkginfo_key(k): v
for (k, v) in CROSSWALK_TABLE["Python PKG-INFO"].items()
}
string_fields = [
"name",
"version",
"description",
"summary",
"author",
"author-email",
]
_parser = email.parser.BytesHeaderParser(policy=LinebreakPreservingEmailPolicy())
[docs]
def translate(self, content):
msg = self._parser.parsebytes(content)
d = {}
for key, value in msg.items():
key = _normalize_pkginfo_key(key)
if value != "UNKNOWN":
d.setdefault(key, []).append(value)
return self._translate_dict(d)
[docs]
def normalize_home_page(self, urls):
return [URIRef(url) for url in urls]
[docs]
def normalize_keywords(self, keywords):
return [Literal(keyword) for s in keywords for keyword in s.split(" ")]
[docs]
def normalize_license(self, licenses):
return [URIRef("https://spdx.org/licenses/" + license) for license in licenses]