swh.indexer.metadata_dictionary.maven module

class swh.indexer.metadata_dictionary.maven.MavenMapping(log_suffix='')[source]

Bases: swh.indexer.metadata_dictionary.base.DictMapping, swh.indexer.metadata_dictionary.base.SingleFileMapping

dedicated class for Maven (pom.xml) mapping and translation

name = 'maven'
filename = b'pom.xml'
mapping = {'ciManagement': 'https://codemeta.github.io/terms/contIntegration', 'description': 'http://schema.org/description', 'groupId': 'http://schema.org/identifier', 'issuesManagement': 'https://codemeta.github.io/terms/issueTracker', 'license': 'http://schema.org/license', 'name': 'http://schema.org/name', 'repositories': 'http://schema.org/codeRepository', 'version': 'http://schema.org/version'}
string_fields = ['name', 'version', 'description', 'email']
translate(content)[source]
parse_repositories(d)[source]

https://maven.apache.org/pom.html#Repositories

>>> import xmltodict
>>> from pprint import pprint
>>> d = xmltodict.parse('''
... <repositories>
...   <repository>
...     <id>codehausSnapshots</id>
...     <name>Codehaus Snapshots</name>
...     <url>http://snapshots.maven.codehaus.org/maven2</url>
...     <layout>default</layout>
...   </repository>
... </repositories>
... ''')
>>> MavenMapping().parse_repositories(d)
parse_repository(d, repo)[source]
normalize_groupId(id_)[source]

https://maven.apache.org/pom.html#Maven_Coordinates

>>> MavenMapping().normalize_groupId('org.example')
{'@id': 'org.example'}
parse_licenses(d)[source]

https://maven.apache.org/pom.html#Licenses

>>> import xmltodict
>>> import json
>>> d = xmltodict.parse('''
... <licenses>
...   <license>
...     <name>Apache License, Version 2.0</name>
...     <url>https://www.apache.org/licenses/LICENSE-2.0.txt</url>
...   </license>
... </licenses>
... ''')
>>> print(json.dumps(d, indent=4))
{
    "licenses": {
        "license": {
            "name": "Apache License, Version 2.0",
            "url": "https://www.apache.org/licenses/LICENSE-2.0.txt"
        }
    }
}
>>> MavenMapping().parse_licenses(d)
[{'@id': 'https://www.apache.org/licenses/LICENSE-2.0.txt'}]

or, if there are more than one license:

>>> import xmltodict
>>> from pprint import pprint
>>> d = xmltodict.parse('''
... <licenses>
...   <license>
...     <name>Apache License, Version 2.0</name>
...     <url>https://www.apache.org/licenses/LICENSE-2.0.txt</url>
...   </license>
...   <license>
...     <name>MIT License</name>
...     <url>https://opensource.org/licenses/MIT</url>
...   </license>
... </licenses>
... ''')
>>> pprint(MavenMapping().parse_licenses(d))
[{'@id': 'https://www.apache.org/licenses/LICENSE-2.0.txt'},
 {'@id': 'https://opensource.org/licenses/MIT'}]