Source code for swh.indexer.storage.model
# Copyright (C) 2020-2022 The Software Heritage developers
# See the AUTHORS file at the top-level directory of this distribution
# License: GNU General Public License version 3, or any later version
# See top-level LICENSE file for more information
"""Classes used internally by the in-memory idx-storage, and will be
used for the interface of the idx-storage in the near future."""
from __future__ import annotations
import json
from typing import Any, Dict, List, Optional, Tuple, Type, TypeVar
import attr
from typing_extensions import Final
from swh.model.model import Sha1Git, dictify
TSelf = TypeVar("TSelf")
[docs]
@attr.s
class BaseRow:
UNIQUE_KEY_FIELDS: Tuple = ("id",)
id = attr.ib(type=Any)
indexer_configuration_id = attr.ib(type=Optional[int], default=None, kw_only=True)
tool = attr.ib(type=Optional[Dict], default=None, kw_only=True)
def __attrs_post_init__(self):
if self.indexer_configuration_id is None and self.tool is None:
raise TypeError("Either indexer_configuration_id or tool must be not None.")
if self.indexer_configuration_id is not None and self.tool is not None:
raise TypeError(
"indexer_configuration_id and tool are mutually exclusive; "
"only one may be not None."
)
[docs]
def anonymize(self: TSelf) -> Optional[TSelf]:
# Needed to implement swh.journal.writer.ValueProtocol
return None
[docs]
def to_dict(self) -> Dict[str, Any]:
"""Wrapper of `attr.asdict` that can be overridden by subclasses
that have special handling of some of the fields."""
d = dictify(attr.asdict(self, recurse=False))
if d["indexer_configuration_id"] is None:
del d["indexer_configuration_id"]
if d["tool"] is None:
del d["tool"]
return d
[docs]
@classmethod
def from_dict(cls: Type[TSelf], d) -> TSelf:
return cls(**d)
[docs]
def unique_key(self) -> Dict:
if not self.tool:
raise ValueError(
f"Cannot compute unique_key of {self.__class__.__name__} with no tool "
f"dictionary (indexer_configuration_id was given instead)"
)
tool_dict = {
"tool_name": self.tool["name"],
"tool_version": self.tool["version"],
"tool_configuration": json.dumps(
self.tool["configuration"], sort_keys=True
),
}
return {
**{key: getattr(self, key) for key in self.UNIQUE_KEY_FIELDS},
**tool_dict,
}
[docs]
@attr.s
class ContentMimetypeRow(BaseRow):
object_type: Final = "content_mimetype"
id = attr.ib(type=Sha1Git)
mimetype = attr.ib(type=str)
encoding = attr.ib(type=str)
[docs]
@attr.s
class ContentLicenseRow(BaseRow):
object_type: Final = "content_fossology_license"
UNIQUE_KEY_FIELDS = ("id", "license")
id = attr.ib(type=Sha1Git)
license = attr.ib(type=str)
[docs]
@attr.s
class ContentMetadataRow(BaseRow):
object_type: Final = "content_metadata"
id = attr.ib(type=Sha1Git)
metadata = attr.ib(type=Dict[str, Any])