swh.indexer.metadata_dictionary.npm module#

class swh.indexer.metadata_dictionary.npm.NpmMapping(log_suffix='')[source]#

Bases: JsonMapping, SingleFileIntrinsicMapping

dedicated class for NPM (package.json) mapping and translation

name = 'npm'#
mapping = {'author': rdflib.term.URIRef('http://schema.org/author'), 'author.email': rdflib.term.URIRef('http://schema.org/email'), 'author.name': rdflib.term.URIRef('http://schema.org/name'), 'bugs': rdflib.term.URIRef('https://codemeta.github.io/terms/issueTracker'), 'contributors': rdflib.term.URIRef('http://schema.org/contributor'), 'cpu': rdflib.term.URIRef('http://schema.org/processorRequirements'), 'description': rdflib.term.URIRef('http://schema.org/description'), 'engines': rdflib.term.URIRef('http://schema.org/runtimePlatform'), 'homepage': rdflib.term.URIRef('http://schema.org/url'), 'keywords': rdflib.term.URIRef('http://schema.org/keywords'), 'license': rdflib.term.URIRef('http://schema.org/license'), 'name': rdflib.term.URIRef('http://schema.org/name'), 'os': rdflib.term.URIRef('http://schema.org/operatingSystem'), 'repository': rdflib.term.URIRef('http://schema.org/codeRepository'), 'version': rdflib.term.URIRef('http://schema.org/version')}#
filename: bytes | Pattern[bytes] = b'package.json'#
string_fields: List[str] = ['name', 'version', 'description', 'email']#

List of fields that are simple strings, and don’t need any normalization.

uri_fields: List[str] = ['homepage']#

List of fields that are simple URIs, and don’t need any normalization.

normalize_repository(d)[source]#

https://docs.npmjs.com/files/package.json#repository

>>> NpmMapping().normalize_repository({
...     'type': 'git',
...     'url': 'https://example.org/foo.git'
... })
rdflib.term.URIRef('git+https://example.org/foo.git')
>>> NpmMapping().normalize_repository(
...     'gitlab:foo/bar')
rdflib.term.URIRef('git+https://gitlab.com/foo/bar.git')
>>> NpmMapping().normalize_repository(
...     'foo/bar')
rdflib.term.URIRef('git+https://github.com/foo/bar.git')
normalize_bugs(d)[source]#

https://docs.npmjs.com/files/package.json#bugs

>>> NpmMapping().normalize_bugs({
...     'url': 'https://example.org/bugs/',
...     'email': 'bugs@example.org'
... })
rdflib.term.URIRef('https://example.org/bugs/')
>>> NpmMapping().normalize_bugs(
...     'https://example.org/bugs/')
rdflib.term.URIRef('https://example.org/bugs/')
translate_author(graph: Graph, root, d)[source]#

https://docs.npmjs.com/files/package.json#people-fields-author-contributors

>>> from pprint import pprint
>>> root = URIRef("http://example.org/test-software")
>>> graph = Graph()
>>> NpmMapping().translate_author(graph, root, {
...     'name': 'John Doe',
...     'email': 'john.doe@example.org',
...     'url': 'https://example.org/~john.doe',
... })
>>> prettyprint_graph(graph, root)
{
    "@id": ...,
    "http://schema.org/author": {
        "@list": [
            {
                "@type": "http://schema.org/Person",
                "http://schema.org/email": "john.doe@example.org",
                "http://schema.org/name": "John Doe",
                "http://schema.org/url": {
                    "@id": "https://example.org/~john.doe"
                }
            }
        ]
    }
}
>>> graph = Graph()
>>> NpmMapping().translate_author(graph, root,
...     'John Doe <john.doe@example.org> (https://example.org/~john.doe)'
... )
>>> prettyprint_graph(graph, root)
{
    "@id": ...,
    "http://schema.org/author": {
        "@list": [
            {
                "@type": "http://schema.org/Person",
                "http://schema.org/email": "john.doe@example.org",
                "http://schema.org/name": "John Doe",
                "http://schema.org/url": {
                    "@id": "https://example.org/~john.doe"
                }
            }
        ]
    }
}
>>> graph = Graph()
>>> NpmMapping().translate_author(graph, root, {
...     'name': 'John Doe',
...     'email': 'john.doe@example.org',
...     'url': 'https:\\\\example.invalid/~john.doe',
... })
>>> prettyprint_graph(graph, root)
{
    "@id": ...,
    "http://schema.org/author": {
        "@list": [
            {
                "@type": "http://schema.org/Person",
                "http://schema.org/email": "john.doe@example.org",
                "http://schema.org/name": "John Doe"
            }
        ]
    }
}
normalize_description(description)[source]#

Try to re-decode description as UTF-16, as this is a somewhat common mistake that causes issues in the database because of null bytes in JSON.

>>> NpmMapping().normalize_description("foo bar")
rdflib.term.Literal('foo bar')
>>> NpmMapping().normalize_description(
...     "\ufffd\ufffd#\x00 \x00f\x00o\x00o\x00 \x00b\x00a\x00r\x00\r\x00 \x00"
... )
rdflib.term.Literal('foo bar')
>>> NpmMapping().normalize_description(
...     "\ufffd\ufffd\x00#\x00 \x00f\x00o\x00o\x00 \x00b\x00a\x00r\x00\r\x00 "
... )
rdflib.term.Literal('foo bar')
>>> NpmMapping().normalize_description(
...     # invalid UTF-16 and meaningless UTF-8:
...     "\ufffd\ufffd\x00#\x00\x00\x00 \x00\x00\x00\x00f\x00\x00\x00\x00"
... ) is None
True
>>> NpmMapping().normalize_description(
...     # ditto (ut looks like little-endian at first)
...     "\ufffd\ufffd#\x00\x00\x00 \x00\x00\x00\x00f\x00\x00\x00\x00\x00"
... ) is None
True
>>> NpmMapping().normalize_description(None) is None
True
normalize_license(s)[source]#

https://docs.npmjs.com/files/package.json#license

>>> NpmMapping().normalize_license('MIT')
rdflib.term.URIRef('https://spdx.org/licenses/MIT')
normalize_keywords(lst)[source]#

https://docs.npmjs.com/files/package.json#homepage

>>> NpmMapping().normalize_keywords(['foo', 'bar'])
[rdflib.term.Literal('foo'), rdflib.term.Literal('bar')]