swh.storage.cassandra.model module
Classes representing tables in the Cassandra database.
They are very close to classes found in swh.model.model, but most of
them are subtly different:
Large objects are split into other classes (eg. RevisionRow has no
‘parents’ field, because parents are stored in a different table,
represented by RevisionParentRow)
They have a “cols” field, which returns the list of column names
of the table
They only use types that map directly to Cassandra’s schema (ie. no enums)
Therefore, this model doesn’t reuse swh.model.model, except for types
that can be mapped to UDTs (Person and TimestampWithTimezone).
-
swh.storage.cassandra.model.MAGIC_NULL_PK = b'<null>'
NULLs (or all-empty blobs) are not allowed in primary keys; instead we use a
special value that can’t possibly be a valid hash.
-
swh.storage.cassandra.model.content_index_table_name(algo: str, skipped_content: bool) → str[source]
Given an algorithm name, returns the name of one of the ‘content_by_*’
and ‘skipped_content_by_*’ tables that serve as index for the ‘content’
and ‘skipped_content’ tables based on this algorithm’s hashes.
For now it is a simple substitution, but future versions may append a version
number to it, if needed for schema updates.
-
class swh.storage.cassandra.model.BaseRow[source]
Bases: object
-
TABLE: ClassVar[str]
-
PARTITION_KEY: ClassVar[Tuple[str, ...]]
-
CLUSTERING_KEY: ClassVar[Tuple[str, ...]] = ()
-
classmethod from_dict(d: Dict[str, Any]) → T[source]
-
classmethod cols() → List[str][source]
-
to_dict() → Dict[str, Any][source]
-
class swh.storage.cassandra.model.ContentRow(sha1: bytes, sha1_git: bytes, sha256: bytes, blake2s256: bytes, length: int, ctime: datetime.datetime, status: str)[source]
Bases: BaseRow
-
TABLE: ClassVar[str] = 'content'
-
PARTITION_KEY: ClassVar[Tuple[str, ...]] = ('sha256',)
-
CLUSTERING_KEY: ClassVar[Tuple[str, ...]] = ('sha1', 'sha1_git', 'blake2s256')
-
sha1: bytes
-
sha1_git: bytes
-
sha256: bytes
-
blake2s256: bytes
-
length: int
-
ctime: datetime
-
status: str
-
class swh.storage.cassandra.model.SkippedContentRow(sha1: Union[bytes, NoneType], sha1_git: Union[bytes, NoneType], sha256: Union[bytes, NoneType], blake2s256: Union[bytes, NoneType], length: Union[int, NoneType], ctime: Union[datetime.datetime, NoneType], status: str, reason: str, origin: str)[source]
Bases: BaseRow
-
TABLE: ClassVar[str] = 'skipped_content'
-
PARTITION_KEY: ClassVar[Tuple[str, ...]] = ('sha1', 'sha1_git', 'sha256', 'blake2s256')
-
sha1: Optional[bytes]
-
sha1_git: Optional[bytes]
-
sha256: Optional[bytes]
-
blake2s256: Optional[bytes]
-
length: Optional[int]
-
ctime: Optional[datetime]
-
status: str
-
reason: str
-
origin: str
-
classmethod from_dict(d: Dict[str, Any]) → SkippedContentRow[source]
-
class swh.storage.cassandra.model.DirectoryRow(id: bytes, raw_manifest: Union[bytes, NoneType])[source]
Bases: BaseRow
-
TABLE: ClassVar[str] = 'directory'
-
PARTITION_KEY: ClassVar[Tuple[str, ...]] = ('id',)
-
id: bytes
-
raw_manifest: Optional[bytes]
-
class swh.storage.cassandra.model.DirectoryEntryRow(directory_id: bytes, name: bytes, target: bytes, perms: int, type: str)[source]
Bases: BaseRow
-
TABLE: ClassVar[str] = 'directory_entry'
-
PARTITION_KEY: ClassVar[Tuple[str, ...]] = ('directory_id',)
-
CLUSTERING_KEY: ClassVar[Tuple[str, ...]] = ('name',)
-
directory_id: bytes
-
name: bytes
-
target: bytes
-
perms: int
-
type: str
-
class swh.storage.cassandra.model.RevisionRow(id: bytes, date: Union[swh.model.model.TimestampWithTimezone, NoneType], committer_date: Union[swh.model.model.TimestampWithTimezone, NoneType], type: str, directory: bytes, message: bytes, author: swh.model.model.Person, committer: swh.model.model.Person, synthetic: bool, metadata: str, extra_headers: dict, raw_manifest: Union[bytes, NoneType])[source]
Bases: BaseRow
-
TABLE: ClassVar[str] = 'revision'
-
PARTITION_KEY: ClassVar[Tuple[str, ...]] = ('id',)
-
id: bytes
-
date: Optional[TimestampWithTimezone]
-
committer_date: Optional[TimestampWithTimezone]
-
type: str
-
directory: bytes
-
message: bytes
-
author: Person
-
committer: Person
-
synthetic: bool
-
metadata: str
-
extra_headers: dict
-
raw_manifest: Optional[bytes]
-
class swh.storage.cassandra.model.RevisionParentRow(id: bytes, parent_rank: int, parent_id: bytes)[source]
Bases: BaseRow
-
TABLE: ClassVar[str] = 'revision_parent'
-
PARTITION_KEY: ClassVar[Tuple[str, ...]] = ('id',)
-
CLUSTERING_KEY: ClassVar[Tuple[str, ...]] = ('parent_rank',)
-
id: bytes
-
parent_rank: int
-
parent_id: bytes
-
class swh.storage.cassandra.model.ReleaseRow(id: bytes, target_type: str, target: bytes, date: swh.model.model.TimestampWithTimezone, name: bytes, message: bytes, author: swh.model.model.Person, synthetic: bool, raw_manifest: Union[bytes, NoneType])[source]
Bases: BaseRow
-
TABLE: ClassVar[str] = 'release'
-
PARTITION_KEY: ClassVar[Tuple[str, ...]] = ('id',)
-
id: bytes
-
target_type: str
-
target: bytes
-
date: TimestampWithTimezone
-
name: bytes
-
message: bytes
-
author: Person
-
synthetic: bool
-
raw_manifest: Optional[bytes]
-
class swh.storage.cassandra.model.SnapshotRow(id: bytes)[source]
Bases: BaseRow
-
TABLE: ClassVar[str] = 'snapshot'
-
PARTITION_KEY: ClassVar[Tuple[str, ...]] = ('id',)
-
id: bytes
-
class swh.storage.cassandra.model.SnapshotBranchRow(snapshot_id: bytes, name: bytes, target_type: Union[str, NoneType], target: Union[bytes, NoneType])[source]
Bases: BaseRow
-
TABLE: ClassVar[str] = 'snapshot_branch'
-
PARTITION_KEY: ClassVar[Tuple[str, ...]] = ('snapshot_id',)
-
CLUSTERING_KEY: ClassVar[Tuple[str, ...]] = ('name',)
-
snapshot_id: bytes
-
name: bytes
-
target_type: Optional[str]
-
target: Optional[bytes]
-
class swh.storage.cassandra.model.OriginVisitRow(origin: str, visit: int, date: datetime.datetime, type: str)[source]
Bases: BaseRow
-
TABLE: ClassVar[str] = 'origin_visit'
-
PARTITION_KEY: ClassVar[Tuple[str, ...]] = ('origin',)
-
CLUSTERING_KEY: ClassVar[Tuple[str, ...]] = ('visit',)
-
origin: str
-
visit: int
-
date: datetime
-
type: str
-
class swh.storage.cassandra.model.OriginVisitStatusRow(origin: str, visit: int, date: datetime.datetime, type: str, status: str, metadata: str, snapshot: bytes)[source]
Bases: BaseRow
-
TABLE: ClassVar[str] = 'origin_visit_status'
-
PARTITION_KEY: ClassVar[Tuple[str, ...]] = ('origin',)
-
CLUSTERING_KEY: ClassVar[Tuple[str, ...]] = ('visit', 'date')
-
origin: str
-
visit: int
-
date: datetime
-
type: str
-
status: str
-
metadata: str
-
snapshot: bytes
-
classmethod from_dict(d: Dict[str, Any]) → T[source]
-
class swh.storage.cassandra.model.OriginRow(sha1: bytes, url: str, next_visit_id: int)[source]
Bases: BaseRow
-
TABLE: ClassVar[str] = 'origin'
-
PARTITION_KEY: ClassVar[Tuple[str, ...]] = ('sha1',)
-
sha1: bytes
-
url: str
-
next_visit_id: int
-
class swh.storage.cassandra.model.MetadataAuthorityRow(url: str, type: str)[source]
Bases: BaseRow
-
TABLE: ClassVar[str] = 'metadata_authority'
-
PARTITION_KEY: ClassVar[Tuple[str, ...]] = ('url',)
-
CLUSTERING_KEY: ClassVar[Tuple[str, ...]] = ('type',)
-
url: str
-
type: str
-
class swh.storage.cassandra.model.MetadataFetcherRow(name: str, version: str)[source]
Bases: BaseRow
-
TABLE: ClassVar[str] = 'metadata_fetcher'
-
PARTITION_KEY: ClassVar[Tuple[str, ...]] = ('name',)
-
CLUSTERING_KEY: ClassVar[Tuple[str, ...]] = ('version',)
-
name: str
-
version: str
-
class swh.storage.cassandra.model.RawExtrinsicMetadataRow(id: bytes, type: str, target: str, authority_type: str, authority_url: str, discovery_date: datetime.datetime, fetcher_name: str, fetcher_version: str, format: str, metadata: bytes, origin: Union[str, NoneType], visit: Union[int, NoneType], snapshot: Union[str, NoneType], release: Union[str, NoneType], revision: Union[str, NoneType], path: Union[bytes, NoneType], directory: Union[str, NoneType])[source]
Bases: BaseRow
-
TABLE: ClassVar[str] = 'raw_extrinsic_metadata'
-
PARTITION_KEY: ClassVar[Tuple[str, ...]] = ('target',)
-
CLUSTERING_KEY: ClassVar[Tuple[str, ...]] = ('authority_type', 'authority_url', 'discovery_date', 'id')
-
id: bytes
-
type: str
-
target: str
-
authority_type: str
-
authority_url: str
-
discovery_date: datetime
-
fetcher_name: str
-
fetcher_version: str
-
format: str
-
metadata: bytes
-
origin: Optional[str]
-
visit: Optional[int]
-
snapshot: Optional[str]
-
release: Optional[str]
-
revision: Optional[str]
-
path: Optional[bytes]
-
directory: Optional[str]
-
class swh.storage.cassandra.model.RawExtrinsicMetadataByIdRow(id: bytes, target: str, authority_type: str, authority_url: str)[source]
Bases: BaseRow
-
TABLE: ClassVar[str] = 'raw_extrinsic_metadata_by_id'
-
PARTITION_KEY: ClassVar[Tuple[str, ...]] = ('id',)
-
CLUSTERING_KEY: ClassVar[Tuple[str, ...]] = ()
-
id: bytes
-
target: str
-
authority_type: str
-
authority_url: str
-
class swh.storage.cassandra.model.ObjectCountRow(partition_key: int, object_type: str, count: int)[source]
Bases: BaseRow
-
TABLE: ClassVar[str] = 'object_count'
-
PARTITION_KEY: ClassVar[Tuple[str, ...]] = ('partition_key',)
-
CLUSTERING_KEY: ClassVar[Tuple[str, ...]] = ('object_type',)
-
partition_key: int
-
object_type: str
-
count: int
-
class swh.storage.cassandra.model.ExtIDRow(extid_type: str, extid: bytes, extid_version: int, target_type: str, target: bytes)[source]
Bases: BaseRow
-
TABLE: ClassVar[str] = 'extid'
-
PARTITION_KEY: ClassVar[Tuple[str, ...]] = ('target', 'target_type', 'extid_version', 'extid', 'extid_type')
-
extid_type: str
-
extid: bytes
-
extid_version: int
-
target_type: str
-
target: bytes
-
class swh.storage.cassandra.model.ExtIDByTargetRow(target_type: str, target: bytes, target_token: int)[source]
Bases: BaseRow
-
TABLE: ClassVar[str] = 'extid_by_target'
-
PARTITION_KEY: ClassVar[Tuple[str, ...]] = ('target_type', 'target')
-
CLUSTERING_KEY: ClassVar[Tuple[str, ...]] = ('target_token',)
-
target_type: str
-
target: bytes
-
target_token: int