swh.indexer.metadata_dictionary.base module#
- swh.indexer.metadata_dictionary.base.TMP_ROOT_URI_PREFIX = 'https://www.softwareheritage.org/schema/2022/indexer/tmp-node/'#
Prefix used to generate temporary URIs for root nodes being translated.
- swh.indexer.metadata_dictionary.base.produce_terms(*uris: str) Callable[[TTranslateCallable], TTranslateCallable] [source]#
Returns a decorator that marks the decorated function as adding the given terms to the
translated_metadata
dict
- class swh.indexer.metadata_dictionary.base.BaseMapping(log_suffix='')[source]#
Bases:
object
Base class for
BaseExtrinsicMapping
andBaseIntrinsicMapping
, not to be inherited directly.- property name#
A name of this mapping, used as an identifier in the indexer storage.
- translate(raw_content: bytes) Dict | None [source]#
Translates content by parsing content from a bytestring containing mapping-specific data and translating with the appropriate mapping to JSON-LD using the Codemeta and ForgeFed vocabularies.
- Parameters:
raw_content – raw content to translate
- Returns:
translated metadata in JSON friendly form needed for the content if parseable,
None
otherwise.
- class swh.indexer.metadata_dictionary.base.BaseExtrinsicMapping(log_suffix='')[source]#
Bases:
BaseMapping
Base class for extrinsic_metadata mappings to inherit from
To implement a new mapping:
inherit this class
override translate function
- class swh.indexer.metadata_dictionary.base.BaseIntrinsicMapping(log_suffix='')[source]#
Bases:
BaseMapping
Base class for intrinsic-metadata mappings to inherit from
To implement a new mapping:
inherit this class
override translate function
- class swh.indexer.metadata_dictionary.base.SingleFileIntrinsicMapping(log_suffix='')[source]#
Bases:
BaseIntrinsicMapping
Base class for all intrinsic metadata mappings that use a single file as input.
- class swh.indexer.metadata_dictionary.base.DictMapping(log_suffix='')[source]#
Bases:
BaseMapping
Base class for mappings that take as input a file that is mostly a key-value store (eg. a shallow JSON dict).
- string_fields: List[str] = []#
List of fields that are simple strings, and don’t need any normalization.
- date_fields: List[str] = []#
//schema.org/Date
- Type:
List of fields that are strings that should be typed as http
- property mapping#
A translation dict to map dict keys into a canonical name.
- class swh.indexer.metadata_dictionary.base.JsonMapping(log_suffix='')[source]#
Bases:
DictMapping
Base class for all mappings that use JSON data as input.
- translate(raw_content: bytes) Dict | None [source]#
Translates content by parsing content from a bytestring containing mapping-specific data and translating with the appropriate mapping to JSON-LD using the Codemeta and ForgeFed vocabularies.
- Parameters:
raw_content – raw content to translate
- Returns:
translated metadata in JSON friendly form needed for the content if parseable,
None
otherwise.
- class swh.indexer.metadata_dictionary.base.XmlMapping(log_suffix='')[source]#
Bases:
DictMapping
Base class for all mappings that use XML data as input.
- translate(raw_content: bytes) Dict | None [source]#
Translates content by parsing content from a bytestring containing mapping-specific data and translating with the appropriate mapping to JSON-LD using the Codemeta and ForgeFed vocabularies.
- Parameters:
raw_content – raw content to translate
- Returns:
translated metadata in JSON friendly form needed for the content if parseable,
None
otherwise.
- class swh.indexer.metadata_dictionary.base.SafeLoader(stream)[source]#
Bases:
SafeLoader
Initialize the scanner.
- yaml_implicit_resolvers = {'': [('tag:yaml.org,2002:null', re.compile('^(?: ~\n |null|Null|NULL\n | )$', re.VERBOSE))], '!': [('tag:yaml.org,2002:yaml', re.compile('^(?:!|&|\\*)$'))], '&': [('tag:yaml.org,2002:yaml', re.compile('^(?:!|&|\\*)$'))], '*': [('tag:yaml.org,2002:yaml', re.compile('^(?:!|&|\\*)$'))], '+': [('tag:yaml.org,2002:float', re.compile('^(?:[-+]?(?:[0-9][0-9_]*)\\.[0-9_]*(?:[eE][-+][0-9]+)?\n |\\.[0-9][0-9_]*(?:[eE][-+][0-9]+)?\n |[-+]?[0-9][0-9_]*(?::[0-5]?[0-9])+\\.[0-9_]*\n , re.VERBOSE)), ('tag:yaml.org,2002:int', re.compile('^(?:[-+]?0b[0-1_]+\n |[-+]?0[0-7_]+\n |[-+]?(?:0|[1-9][0-9_]*)\n |[-+]?0x[0-9a-fA-F_]+\n |[-+]?[1-9][0-9_]*(?::[0-5]?[0-9]), re.VERBOSE))], '-': [('tag:yaml.org,2002:float', re.compile('^(?:[-+]?(?:[0-9][0-9_]*)\\.[0-9_]*(?:[eE][-+][0-9]+)?\n |\\.[0-9][0-9_]*(?:[eE][-+][0-9]+)?\n |[-+]?[0-9][0-9_]*(?::[0-5]?[0-9])+\\.[0-9_]*\n , re.VERBOSE)), ('tag:yaml.org,2002:int', re.compile('^(?:[-+]?0b[0-1_]+\n |[-+]?0[0-7_]+\n |[-+]?(?:0|[1-9][0-9_]*)\n |[-+]?0x[0-9a-fA-F_]+\n |[-+]?[1-9][0-9_]*(?::[0-5]?[0-9]), re.VERBOSE))], '.': [('tag:yaml.org,2002:float', re.compile('^(?:[-+]?(?:[0-9][0-9_]*)\\.[0-9_]*(?:[eE][-+][0-9]+)?\n |\\.[0-9][0-9_]*(?:[eE][-+][0-9]+)?\n |[-+]?[0-9][0-9_]*(?::[0-5]?[0-9])+\\.[0-9_]*\n , re.VERBOSE))], '0': [('tag:yaml.org,2002:float', re.compile('^(?:[-+]?(?:[0-9][0-9_]*)\\.[0-9_]*(?:[eE][-+][0-9]+)?\n |\\.[0-9][0-9_]*(?:[eE][-+][0-9]+)?\n |[-+]?[0-9][0-9_]*(?::[0-5]?[0-9])+\\.[0-9_]*\n , re.VERBOSE)), ('tag:yaml.org,2002:int', re.compile('^(?:[-+]?0b[0-1_]+\n |[-+]?0[0-7_]+\n |[-+]?(?:0|[1-9][0-9_]*)\n |[-+]?0x[0-9a-fA-F_]+\n |[-+]?[1-9][0-9_]*(?::[0-5]?[0-9]), re.VERBOSE))], '1': [('tag:yaml.org,2002:float', re.compile('^(?:[-+]?(?:[0-9][0-9_]*)\\.[0-9_]*(?:[eE][-+][0-9]+)?\n |\\.[0-9][0-9_]*(?:[eE][-+][0-9]+)?\n |[-+]?[0-9][0-9_]*(?::[0-5]?[0-9])+\\.[0-9_]*\n , re.VERBOSE)), ('tag:yaml.org,2002:int', re.compile('^(?:[-+]?0b[0-1_]+\n |[-+]?0[0-7_]+\n |[-+]?(?:0|[1-9][0-9_]*)\n |[-+]?0x[0-9a-fA-F_]+\n |[-+]?[1-9][0-9_]*(?::[0-5]?[0-9]), re.VERBOSE))], '2': [('tag:yaml.org,2002:float', re.compile('^(?:[-+]?(?:[0-9][0-9_]*)\\.[0-9_]*(?:[eE][-+][0-9]+)?\n |\\.[0-9][0-9_]*(?:[eE][-+][0-9]+)?\n |[-+]?[0-9][0-9_]*(?::[0-5]?[0-9])+\\.[0-9_]*\n , re.VERBOSE)), ('tag:yaml.org,2002:int', re.compile('^(?:[-+]?0b[0-1_]+\n |[-+]?0[0-7_]+\n |[-+]?(?:0|[1-9][0-9_]*)\n |[-+]?0x[0-9a-fA-F_]+\n |[-+]?[1-9][0-9_]*(?::[0-5]?[0-9]), re.VERBOSE))], '3': [('tag:yaml.org,2002:float', re.compile('^(?:[-+]?(?:[0-9][0-9_]*)\\.[0-9_]*(?:[eE][-+][0-9]+)?\n |\\.[0-9][0-9_]*(?:[eE][-+][0-9]+)?\n |[-+]?[0-9][0-9_]*(?::[0-5]?[0-9])+\\.[0-9_]*\n , re.VERBOSE)), ('tag:yaml.org,2002:int', re.compile('^(?:[-+]?0b[0-1_]+\n |[-+]?0[0-7_]+\n |[-+]?(?:0|[1-9][0-9_]*)\n |[-+]?0x[0-9a-fA-F_]+\n |[-+]?[1-9][0-9_]*(?::[0-5]?[0-9]), re.VERBOSE))], '4': [('tag:yaml.org,2002:float', re.compile('^(?:[-+]?(?:[0-9][0-9_]*)\\.[0-9_]*(?:[eE][-+][0-9]+)?\n |\\.[0-9][0-9_]*(?:[eE][-+][0-9]+)?\n |[-+]?[0-9][0-9_]*(?::[0-5]?[0-9])+\\.[0-9_]*\n , re.VERBOSE)), ('tag:yaml.org,2002:int', re.compile('^(?:[-+]?0b[0-1_]+\n |[-+]?0[0-7_]+\n |[-+]?(?:0|[1-9][0-9_]*)\n |[-+]?0x[0-9a-fA-F_]+\n |[-+]?[1-9][0-9_]*(?::[0-5]?[0-9]), re.VERBOSE))], '5': [('tag:yaml.org,2002:float', re.compile('^(?:[-+]?(?:[0-9][0-9_]*)\\.[0-9_]*(?:[eE][-+][0-9]+)?\n |\\.[0-9][0-9_]*(?:[eE][-+][0-9]+)?\n |[-+]?[0-9][0-9_]*(?::[0-5]?[0-9])+\\.[0-9_]*\n , re.VERBOSE)), ('tag:yaml.org,2002:int', re.compile('^(?:[-+]?0b[0-1_]+\n |[-+]?0[0-7_]+\n |[-+]?(?:0|[1-9][0-9_]*)\n |[-+]?0x[0-9a-fA-F_]+\n |[-+]?[1-9][0-9_]*(?::[0-5]?[0-9]), re.VERBOSE))], '6': [('tag:yaml.org,2002:float', re.compile('^(?:[-+]?(?:[0-9][0-9_]*)\\.[0-9_]*(?:[eE][-+][0-9]+)?\n |\\.[0-9][0-9_]*(?:[eE][-+][0-9]+)?\n |[-+]?[0-9][0-9_]*(?::[0-5]?[0-9])+\\.[0-9_]*\n , re.VERBOSE)), ('tag:yaml.org,2002:int', re.compile('^(?:[-+]?0b[0-1_]+\n |[-+]?0[0-7_]+\n |[-+]?(?:0|[1-9][0-9_]*)\n |[-+]?0x[0-9a-fA-F_]+\n |[-+]?[1-9][0-9_]*(?::[0-5]?[0-9]), re.VERBOSE))], '7': [('tag:yaml.org,2002:float', re.compile('^(?:[-+]?(?:[0-9][0-9_]*)\\.[0-9_]*(?:[eE][-+][0-9]+)?\n |\\.[0-9][0-9_]*(?:[eE][-+][0-9]+)?\n |[-+]?[0-9][0-9_]*(?::[0-5]?[0-9])+\\.[0-9_]*\n , re.VERBOSE)), ('tag:yaml.org,2002:int', re.compile('^(?:[-+]?0b[0-1_]+\n |[-+]?0[0-7_]+\n |[-+]?(?:0|[1-9][0-9_]*)\n |[-+]?0x[0-9a-fA-F_]+\n |[-+]?[1-9][0-9_]*(?::[0-5]?[0-9]), re.VERBOSE))], '8': [('tag:yaml.org,2002:float', re.compile('^(?:[-+]?(?:[0-9][0-9_]*)\\.[0-9_]*(?:[eE][-+][0-9]+)?\n |\\.[0-9][0-9_]*(?:[eE][-+][0-9]+)?\n |[-+]?[0-9][0-9_]*(?::[0-5]?[0-9])+\\.[0-9_]*\n , re.VERBOSE)), ('tag:yaml.org,2002:int', re.compile('^(?:[-+]?0b[0-1_]+\n |[-+]?0[0-7_]+\n |[-+]?(?:0|[1-9][0-9_]*)\n |[-+]?0x[0-9a-fA-F_]+\n |[-+]?[1-9][0-9_]*(?::[0-5]?[0-9]), re.VERBOSE))], '9': [('tag:yaml.org,2002:float', re.compile('^(?:[-+]?(?:[0-9][0-9_]*)\\.[0-9_]*(?:[eE][-+][0-9]+)?\n |\\.[0-9][0-9_]*(?:[eE][-+][0-9]+)?\n |[-+]?[0-9][0-9_]*(?::[0-5]?[0-9])+\\.[0-9_]*\n , re.VERBOSE)), ('tag:yaml.org,2002:int', re.compile('^(?:[-+]?0b[0-1_]+\n |[-+]?0[0-7_]+\n |[-+]?(?:0|[1-9][0-9_]*)\n |[-+]?0x[0-9a-fA-F_]+\n |[-+]?[1-9][0-9_]*(?::[0-5]?[0-9]), re.VERBOSE))], '<': [('tag:yaml.org,2002:merge', re.compile('^(?:<<)$'))], '=': [('tag:yaml.org,2002:value', re.compile('^(?:=)$'))], 'F': [('tag:yaml.org,2002:bool', re.compile('^(?:yes|Yes|YES|no|No|NO\n |true|True|TRUE|false|False|FALSE\n |on|On|ON|off|Off|OFF)$', re.VERBOSE))], 'N': [('tag:yaml.org,2002:bool', re.compile('^(?:yes|Yes|YES|no|No|NO\n |true|True|TRUE|false|False|FALSE\n |on|On|ON|off|Off|OFF)$', re.VERBOSE)), ('tag:yaml.org,2002:null', re.compile('^(?: ~\n |null|Null|NULL\n | )$', re.VERBOSE))], 'O': [('tag:yaml.org,2002:bool', re.compile('^(?:yes|Yes|YES|no|No|NO\n |true|True|TRUE|false|False|FALSE\n |on|On|ON|off|Off|OFF)$', re.VERBOSE))], 'T': [('tag:yaml.org,2002:bool', re.compile('^(?:yes|Yes|YES|no|No|NO\n |true|True|TRUE|false|False|FALSE\n |on|On|ON|off|Off|OFF)$', re.VERBOSE))], 'Y': [('tag:yaml.org,2002:bool', re.compile('^(?:yes|Yes|YES|no|No|NO\n |true|True|TRUE|false|False|FALSE\n |on|On|ON|off|Off|OFF)$', re.VERBOSE))], 'f': [('tag:yaml.org,2002:bool', re.compile('^(?:yes|Yes|YES|no|No|NO\n |true|True|TRUE|false|False|FALSE\n |on|On|ON|off|Off|OFF)$', re.VERBOSE))], 'n': [('tag:yaml.org,2002:bool', re.compile('^(?:yes|Yes|YES|no|No|NO\n |true|True|TRUE|false|False|FALSE\n |on|On|ON|off|Off|OFF)$', re.VERBOSE)), ('tag:yaml.org,2002:null', re.compile('^(?: ~\n |null|Null|NULL\n | )$', re.VERBOSE))], 'o': [('tag:yaml.org,2002:bool', re.compile('^(?:yes|Yes|YES|no|No|NO\n |true|True|TRUE|false|False|FALSE\n |on|On|ON|off|Off|OFF)$', re.VERBOSE))], 't': [('tag:yaml.org,2002:bool', re.compile('^(?:yes|Yes|YES|no|No|NO\n |true|True|TRUE|false|False|FALSE\n |on|On|ON|off|Off|OFF)$', re.VERBOSE))], 'y': [('tag:yaml.org,2002:bool', re.compile('^(?:yes|Yes|YES|no|No|NO\n |true|True|TRUE|false|False|FALSE\n |on|On|ON|off|Off|OFF)$', re.VERBOSE))], '~': [('tag:yaml.org,2002:null', re.compile('^(?: ~\n |null|Null|NULL\n | )$', re.VERBOSE))]}#
- class swh.indexer.metadata_dictionary.base.YamlMapping(log_suffix='')[source]#
Bases:
DictMapping
Base class for all mappings that use Yaml data as input.
- translate(raw_content: bytes) Dict[str, str] | None [source]#
Translates content by parsing content from a bytestring containing mapping-specific data and translating with the appropriate mapping to JSON-LD using the Codemeta and ForgeFed vocabularies.
- Parameters:
raw_content – raw content to translate
- Returns:
translated metadata in JSON friendly form needed for the content if parseable,
None
otherwise.