swh.storage.cassandra.model module

Classes representing tables in the Cassandra database.

They are very close to classes found in swh.model.model, but most of them are subtly different:

  • Large objects are split into other classes (eg. RevisionRow has no ‘parents’ field, because parents are stored in a different table, represented by RevisionParentRow)

  • They have a “cols” field, which returns the list of column names of the table

  • They only use types that map directly to Cassandra’s schema (ie. no enums)

Therefore, this model doesn’t reuse swh.model.model, except for types that can be mapped to UDTs (Person and TimestampWithTimezone).

swh.storage.cassandra.model.MAGIC_NULL_PK = b'<null>'

NULLs (or all-empty blobs) are not allowed in primary keys; instead we use a special value that can’t possibly be a valid hash.

class swh.storage.cassandra.model.BaseRow[source]

Bases: object

TABLE: ClassVar[str]
PARTITION_KEY: ClassVar[Tuple[str, ]]
CLUSTERING_KEY: ClassVar[Tuple[str, ]] = ()
classmethod from_dict(d: Dict[str, Any]) → T[source]
classmethod cols() → List[str][source]
to_dict() → Dict[str, Any][source]
class swh.storage.cassandra.model.ContentRow(sha1: bytes, sha1_git: bytes, sha256: bytes, blake2s256: bytes, length: int, ctime: datetime.datetime, status: str)[source]

Bases: swh.storage.cassandra.model.BaseRow

TABLE: ClassVar[str] = 'content'
PARTITION_KEY: ClassVar[Tuple[str, ]] = ('sha1', 'sha1_git', 'sha256', 'blake2s256')
sha1: bytes
sha1_git: bytes
sha256: bytes
blake2s256: bytes
length: int
ctime: datetime.datetime
status: str
class swh.storage.cassandra.model.SkippedContentRow(sha1: Union[bytes, NoneType], sha1_git: Union[bytes, NoneType], sha256: Union[bytes, NoneType], blake2s256: Union[bytes, NoneType], length: Union[int, NoneType], ctime: Union[datetime.datetime, NoneType], status: str, reason: str, origin: str)[source]

Bases: swh.storage.cassandra.model.BaseRow

TABLE: ClassVar[str] = 'skipped_content'
PARTITION_KEY: ClassVar[Tuple[str, ]] = ('sha1', 'sha1_git', 'sha256', 'blake2s256')
sha1: Optional[bytes]
sha1_git: Optional[bytes]
sha256: Optional[bytes]
blake2s256: Optional[bytes]
length: Optional[int]
ctime: Optional[datetime.datetime]
status: str
reason: str
origin: str
classmethod from_dict(d: Dict[str, Any])swh.storage.cassandra.model.SkippedContentRow[source]
class swh.storage.cassandra.model.DirectoryRow(id: bytes)[source]

Bases: swh.storage.cassandra.model.BaseRow

TABLE: ClassVar[str] = 'directory'
PARTITION_KEY: ClassVar[Tuple[str, ]] = ('id',)
id: bytes
class swh.storage.cassandra.model.DirectoryEntryRow(directory_id: bytes, name: bytes, target: bytes, perms: int, type: str)[source]

Bases: swh.storage.cassandra.model.BaseRow

TABLE: ClassVar[str] = 'directory_entry'
PARTITION_KEY: ClassVar[Tuple[str, ]] = ('directory_id',)
CLUSTERING_KEY: ClassVar[Tuple[str, ]] = ('name',)
directory_id: bytes
name: bytes
target: bytes
perms: int
type: str
class swh.storage.cassandra.model.RevisionRow(id: bytes, date: Union[swh.model.model.TimestampWithTimezone, NoneType], committer_date: Union[swh.model.model.TimestampWithTimezone, NoneType], type: str, directory: bytes, message: bytes, author: swh.model.model.Person, committer: swh.model.model.Person, synthetic: bool, metadata: str, extra_headers: dict)[source]

Bases: swh.storage.cassandra.model.BaseRow

TABLE: ClassVar[str] = 'revision'
PARTITION_KEY: ClassVar[Tuple[str, ]] = ('id',)
id: bytes
date: Optional[swh.model.model.TimestampWithTimezone]
committer_date: Optional[swh.model.model.TimestampWithTimezone]
type: str
directory: bytes
message: bytes
author: swh.model.model.Person
committer: swh.model.model.Person
synthetic: bool
metadata: str
extra_headers: dict
class swh.storage.cassandra.model.RevisionParentRow(id: bytes, parent_rank: int, parent_id: bytes)[source]

Bases: swh.storage.cassandra.model.BaseRow

TABLE: ClassVar[str] = 'revision_parent'
PARTITION_KEY: ClassVar[Tuple[str, ]] = ('id',)
CLUSTERING_KEY: ClassVar[Tuple[str, ]] = ('parent_rank',)
id: bytes
parent_rank: int
parent_id: bytes
class swh.storage.cassandra.model.ReleaseRow(id: bytes, target_type: str, target: bytes, date: swh.model.model.TimestampWithTimezone, name: bytes, message: bytes, author: swh.model.model.Person, synthetic: bool)[source]

Bases: swh.storage.cassandra.model.BaseRow

TABLE: ClassVar[str] = 'release'
PARTITION_KEY: ClassVar[Tuple[str, ]] = ('id',)
id: bytes
target_type: str
target: bytes
date: swh.model.model.TimestampWithTimezone
name: bytes
message: bytes
author: swh.model.model.Person
synthetic: bool
class swh.storage.cassandra.model.SnapshotRow(id: bytes)[source]

Bases: swh.storage.cassandra.model.BaseRow

TABLE: ClassVar[str] = 'snapshot'
PARTITION_KEY: ClassVar[Tuple[str, ]] = ('id',)
id: bytes
class swh.storage.cassandra.model.SnapshotBranchRow(snapshot_id: bytes, name: bytes, target_type: Union[str, NoneType], target: Union[bytes, NoneType])[source]

Bases: swh.storage.cassandra.model.BaseRow

TABLE: ClassVar[str] = 'snapshot_branch'
PARTITION_KEY: ClassVar[Tuple[str, ]] = ('snapshot_id',)
CLUSTERING_KEY: ClassVar[Tuple[str, ]] = ('name',)
snapshot_id: bytes
name: bytes
target_type: Optional[str]
target: Optional[bytes]
class swh.storage.cassandra.model.OriginVisitRow(origin: str, visit: int, date: datetime.datetime, type: str)[source]

Bases: swh.storage.cassandra.model.BaseRow

TABLE: ClassVar[str] = 'origin_visit'
PARTITION_KEY: ClassVar[Tuple[str, ]] = ('origin',)
CLUSTERING_KEY: ClassVar[Tuple[str, ]] = ('visit',)
origin: str
visit: int
date: datetime.datetime
type: str
class swh.storage.cassandra.model.OriginVisitStatusRow(origin: str, visit: int, date: datetime.datetime, status: str, metadata: str, snapshot: bytes)[source]

Bases: swh.storage.cassandra.model.BaseRow

TABLE: ClassVar[str] = 'origin_visit_status'
PARTITION_KEY: ClassVar[Tuple[str, ]] = ('origin',)
CLUSTERING_KEY: ClassVar[Tuple[str, ]] = ('visit', 'date')
origin: str
visit: int
date: datetime.datetime
status: str
metadata: str
snapshot: bytes
class swh.storage.cassandra.model.OriginRow(sha1: bytes, url: str, next_visit_id: int)[source]

Bases: swh.storage.cassandra.model.BaseRow

TABLE: ClassVar[str] = 'origin'
PARTITION_KEY: ClassVar[Tuple[str, ]] = ('sha1',)
sha1: bytes
url: str
next_visit_id: int
class swh.storage.cassandra.model.MetadataAuthorityRow(url: str, type: str, metadata: str)[source]

Bases: swh.storage.cassandra.model.BaseRow

TABLE: ClassVar[str] = 'metadata_authority'
PARTITION_KEY: ClassVar[Tuple[str, ]] = ('url',)
CLUSTERING_KEY: ClassVar[Tuple[str, ]] = ('type',)
url: str
type: str
metadata: str
class swh.storage.cassandra.model.MetadataFetcherRow(name: str, version: str, metadata: str)[source]

Bases: swh.storage.cassandra.model.BaseRow

TABLE: ClassVar[str] = 'metadata_fetcher'
PARTITION_KEY: ClassVar[Tuple[str, ]] = ('name',)
CLUSTERING_KEY: ClassVar[Tuple[str, ]] = ('version',)
name: str
version: str
metadata: str
class swh.storage.cassandra.model.RawExtrinsicMetadataRow(type: str, id: str, authority_type: str, authority_url: str, discovery_date: datetime.datetime, fetcher_name: str, fetcher_version: str, format: str, metadata: bytes, origin: Union[str, NoneType], visit: Union[int, NoneType], snapshot: Union[str, NoneType], release: Union[str, NoneType], revision: Union[str, NoneType], path: Union[bytes, NoneType], directory: Union[str, NoneType])[source]

Bases: swh.storage.cassandra.model.BaseRow

TABLE: ClassVar[str] = 'raw_extrinsic_metadata'
PARTITION_KEY: ClassVar[Tuple[str, ]] = ('id',)
CLUSTERING_KEY: ClassVar[Tuple[str, ]] = ('authority_type', 'authority_url', 'discovery_date', 'fetcher_name', 'fetcher_version')
type: str
id: str
authority_type: str
authority_url: str
discovery_date: datetime.datetime
fetcher_name: str
fetcher_version: str
format: str
metadata: bytes
origin: Optional[str]
visit: Optional[int]
snapshot: Optional[str]
release: Optional[str]
revision: Optional[str]
path: Optional[bytes]
directory: Optional[str]
class swh.storage.cassandra.model.ObjectCountRow(partition_key: int, object_type: str, count: int)[source]

Bases: swh.storage.cassandra.model.BaseRow

TABLE: ClassVar[str] = 'object_count'
PARTITION_KEY: ClassVar[Tuple[str, ]] = ('partition_key',)
CLUSTERING_KEY: ClassVar[Tuple[str, ]] = ('object_type',)
partition_key: int
object_type: str
count: int