Source code for swh.indexer.metadata_dictionary.ruby

# Copyright (C) 2018-2022  The Software Heritage developers
# See the AUTHORS file at the top-level directory of this distribution
# License: GNU General Public License version 3, or any later version
# See top-level LICENSE file for more information

import ast
import itertools
import re

from rdflib import RDF, BNode, Graph, Literal, URIRef

from swh.indexer.codemeta import CROSSWALK_TABLE
from swh.indexer.namespaces import SCHEMA

from .base import DictMapping, SingleFileIntrinsicMapping
from .utils import add_map

SPDX = URIRef("https://spdx.org/licenses/")


[docs] def name_to_person(graph: Graph, name): if not isinstance(name, str): return None author = BNode() graph.add((author, RDF.type, SCHEMA.Person)) graph.add((author, SCHEMA.name, Literal(name))) return author
[docs] class GemspecMapping(DictMapping, SingleFileIntrinsicMapping): name = "gemspec" filename = re.compile(rb".*\.gemspec") mapping = CROSSWALK_TABLE["Ruby Gem"] string_fields = ["name", "version", "description", "summary", "email"] uri_fields = ["homepage"] _re_spec_new = re.compile(r".*Gem::Specification.new +(do|\{) +\|.*\|.*") _re_spec_entry = re.compile(r"\s*\w+\.(?P<key>\w+)\s*=\s*(?P<expr>.*)")
[docs] def translate(self, raw_content): try: raw_content = raw_content.decode() except UnicodeDecodeError: self.log.warning("Error unidecoding from %s", self.log_suffix) return # Skip lines before 'Gem::Specification.new' lines = itertools.dropwhile( lambda x: not self._re_spec_new.match(x), raw_content.split("\n") ) try: next(lines) # Consume 'Gem::Specification.new' except StopIteration: self.log.warning("Could not find Gem::Specification in %s", self.log_suffix) return content_dict = {} for line in lines: match = self._re_spec_entry.match(line) if match: value = self.eval_ruby_expression(match.group("expr")) if value: content_dict[match.group("key")] = value return self._translate_dict(content_dict)
[docs] def eval_ruby_expression(self, expr): """Very simple evaluator of Ruby expressions. >>> GemspecMapping().eval_ruby_expression('"Foo bar"') 'Foo bar' >>> GemspecMapping().eval_ruby_expression("'Foo bar'") 'Foo bar' >>> GemspecMapping().eval_ruby_expression("['Foo', 'bar']") ['Foo', 'bar'] >>> GemspecMapping().eval_ruby_expression("'Foo bar'.freeze") 'Foo bar' >>> GemspecMapping().eval_ruby_expression( \ "['Foo'.freeze, 'bar'.freeze]") ['Foo', 'bar'] """ def evaluator(node): if isinstance(node, ast.Str): return node.s elif isinstance(node, ast.List): res = [] for element in node.elts: val = evaluator(element) if not val: return res.append(val) return res expr = expr.replace(".freeze", "") try: # We're parsing Ruby expressions here, but Python's # ast.parse works for very simple Ruby expressions # (mainly strings delimited with " or ', and lists # of such strings). tree = ast.parse(expr, mode="eval") except (SyntaxError, ValueError): return if isinstance(tree, ast.Expression): return evaluator(tree.body)
[docs] def normalize_license(self, s): if isinstance(s, str): return SPDX + s
[docs] def normalize_licenses(self, licenses): if isinstance(licenses, list): return [SPDX + license for license in licenses if isinstance(license, str)]
[docs] def translate_author(self, graph: Graph, root, author): if isinstance(author, str): add_map(graph, root, SCHEMA.author, name_to_person, [author])
[docs] def translate_authors(self, graph: Graph, root, authors): if isinstance(authors, list): add_map(graph, root, SCHEMA.author, name_to_person, authors)