swh.indexer.metadata_dictionary.npm module#
- class swh.indexer.metadata_dictionary.npm.NpmMapping(log_suffix='')[source]#
Bases:
JsonMapping
,SingleFileIntrinsicMapping
dedicated class for NPM (package.json) mapping and translation
- name = 'npm'#
- mapping = {'author': rdflib.term.URIRef('http://schema.org/author'), 'author.email': rdflib.term.URIRef('http://schema.org/email'), 'author.name': rdflib.term.URIRef('http://schema.org/name'), 'bugs': rdflib.term.URIRef('https://codemeta.github.io/terms/issueTracker'), 'contributors': rdflib.term.URIRef('http://schema.org/contributor'), 'cpu': rdflib.term.URIRef('http://schema.org/processorRequirements'), 'description': rdflib.term.URIRef('http://schema.org/description'), 'engines': rdflib.term.URIRef('http://schema.org/runtimePlatform'), 'homepage': rdflib.term.URIRef('http://schema.org/url'), 'keywords': rdflib.term.URIRef('http://schema.org/keywords'), 'license': rdflib.term.URIRef('http://schema.org/license'), 'name': rdflib.term.URIRef('http://schema.org/name'), 'os': rdflib.term.URIRef('http://schema.org/operatingSystem'), 'repository': rdflib.term.URIRef('http://schema.org/codeRepository'), 'version': rdflib.term.URIRef('http://schema.org/version')}#
- string_fields: List[str] = ['name', 'version', 'description', 'email']#
List of fields that are simple strings, and don’t need any normalization.
- uri_fields: List[str] = ['homepage']#
List of fields that are simple URIs, and don’t need any normalization.
- normalize_repository(d)[source]#
https://docs.npmjs.com/files/package.json#repository
>>> NpmMapping().normalize_repository({ ... 'type': 'git', ... 'url': 'https://example.org/foo.git' ... }) rdflib.term.URIRef('git+https://example.org/foo.git') >>> NpmMapping().normalize_repository( ... 'gitlab:foo/bar') rdflib.term.URIRef('git+https://gitlab.com/foo/bar.git') >>> NpmMapping().normalize_repository( ... 'foo/bar') rdflib.term.URIRef('git+https://github.com/foo/bar.git')
- normalize_bugs(d)[source]#
https://docs.npmjs.com/files/package.json#bugs
>>> NpmMapping().normalize_bugs({ ... 'url': 'https://example.org/bugs/', ... 'email': 'bugs@example.org' ... }) rdflib.term.URIRef('https://example.org/bugs/') >>> NpmMapping().normalize_bugs( ... 'https://example.org/bugs/') rdflib.term.URIRef('https://example.org/bugs/')
- translate_author(graph: Graph, root, d)[source]#
https://docs.npmjs.com/files/package.json#people-fields-author-contributors’
>>> from pprint import pprint >>> root = URIRef("http://example.org/test-software") >>> graph = Graph() >>> NpmMapping().translate_author(graph, root, { ... 'name': 'John Doe', ... 'email': 'john.doe@example.org', ... 'url': 'https://example.org/~john.doe', ... }) >>> prettyprint_graph(graph, root) { "@id": ..., "http://schema.org/author": { "@list": [ { "@type": "http://schema.org/Person", "http://schema.org/email": "john.doe@example.org", "http://schema.org/name": "John Doe", "http://schema.org/url": { "@id": "https://example.org/~john.doe" } } ] } } >>> graph = Graph() >>> NpmMapping().translate_author(graph, root, ... 'John Doe <john.doe@example.org> (https://example.org/~john.doe)' ... ) >>> prettyprint_graph(graph, root) { "@id": ..., "http://schema.org/author": { "@list": [ { "@type": "http://schema.org/Person", "http://schema.org/email": "john.doe@example.org", "http://schema.org/name": "John Doe", "http://schema.org/url": { "@id": "https://example.org/~john.doe" } } ] } } >>> graph = Graph() >>> NpmMapping().translate_author(graph, root, { ... 'name': 'John Doe', ... 'email': 'john.doe@example.org', ... 'url': 'https:\\\\example.invalid/~john.doe', ... }) >>> prettyprint_graph(graph, root) { "@id": ..., "http://schema.org/author": { "@list": [ { "@type": "http://schema.org/Person", "http://schema.org/email": "john.doe@example.org", "http://schema.org/name": "John Doe" } ] } }
- normalize_description(description)[source]#
Try to re-decode
description
as UTF-16, as this is a somewhat common mistake that causes issues in the database because of null bytes in JSON.>>> NpmMapping().normalize_description("foo bar") rdflib.term.Literal('foo bar') >>> NpmMapping().normalize_description( ... "\ufffd\ufffd#\x00 \x00f\x00o\x00o\x00 \x00b\x00a\x00r\x00\r\x00 \x00" ... ) rdflib.term.Literal('foo bar') >>> NpmMapping().normalize_description( ... "\ufffd\ufffd\x00#\x00 \x00f\x00o\x00o\x00 \x00b\x00a\x00r\x00\r\x00 " ... ) rdflib.term.Literal('foo bar') >>> NpmMapping().normalize_description( ... # invalid UTF-16 and meaningless UTF-8: ... "\ufffd\ufffd\x00#\x00\x00\x00 \x00\x00\x00\x00f\x00\x00\x00\x00" ... ) is None True >>> NpmMapping().normalize_description( ... # ditto (ut looks like little-endian at first) ... "\ufffd\ufffd#\x00\x00\x00 \x00\x00\x00\x00f\x00\x00\x00\x00\x00" ... ) is None True >>> NpmMapping().normalize_description(None) is None True
- normalize_license(s)[source]#
https://docs.npmjs.com/files/package.json#license
>>> NpmMapping().normalize_license('MIT') rdflib.term.URIRef('https://spdx.org/licenses/MIT')
- normalize_keywords(lst)[source]#
https://docs.npmjs.com/files/package.json#homepage
>>> NpmMapping().normalize_keywords(['foo', 'bar']) [rdflib.term.Literal('foo'), rdflib.term.Literal('bar')]