swh.storage.cassandra package#
Submodules:
- swh.storage.cassandra.common module
- swh.storage.cassandra.converters module
- swh.storage.cassandra.cql module
PARTITION_KEY_RESTRICTION_MAX_SIZE
get_execution_profiles()
create_keyspace()
CqlRunner
CqlRunner.MAX_RETRIES
CqlRunner.content_add_prepare()
CqlRunner.content_get_from_pk()
CqlRunner.content_missing_from_all_hashes()
CqlRunner.content_get_from_tokens()
CqlRunner.content_get_random()
CqlRunner.content_get_token_range()
CqlRunner.content_index_add_one()
CqlRunner.content_get_tokens_from_single_algo()
CqlRunner.skipped_content_add_prepare()
CqlRunner.skipped_content_get_from_pk()
CqlRunner.skipped_content_get_from_token()
CqlRunner.skipped_content_index_add_one()
CqlRunner.skipped_content_get_tokens_from_single_hash()
CqlRunner.directory_missing()
CqlRunner.directory_add_one()
CqlRunner.directory_get_random()
CqlRunner.directory_get()
CqlRunner.directory_get_token_range()
CqlRunner.directory_entry_add_one()
CqlRunner.directory_entry_add_concurrent()
CqlRunner.directory_entry_add_batch()
CqlRunner.directory_entry_get()
CqlRunner.directory_entry_get_from_name()
CqlRunner.revision_missing()
CqlRunner.revision_add_one()
CqlRunner.revision_get_ids()
CqlRunner.revision_get()
CqlRunner.revision_get_random()
CqlRunner.revision_get_token_range()
CqlRunner.revision_parent_add_one()
CqlRunner.revision_parent_get()
CqlRunner.release_missing()
CqlRunner.release_add_one()
CqlRunner.release_get()
CqlRunner.release_get_random()
CqlRunner.release_get_token_range()
CqlRunner.snapshot_missing()
CqlRunner.snapshot_add_one()
CqlRunner.snapshot_get_random()
CqlRunner.snapshot_get_token_range()
CqlRunner.snapshot_branch_add_one()
CqlRunner.snapshot_count_branches_from_name()
CqlRunner.snapshot_count_branches_before_name()
CqlRunner.snapshot_count_branches()
CqlRunner.snapshot_branch_get_from_name()
CqlRunner.snapshot_branch_get_range()
CqlRunner.snapshot_branch_get()
CqlRunner.origin_add_one()
CqlRunner.origin_get_by_sha1()
CqlRunner.origin_get_by_url()
CqlRunner.origin_list()
CqlRunner.origin_iter_all()
CqlRunner.origin_bump_next_visit_id()
CqlRunner.origin_generate_unique_visit_id()
CqlRunner.origin_visit_get()
CqlRunner.origin_visit_add_one()
CqlRunner.origin_visit_get_one()
CqlRunner.origin_visit_iter_all()
CqlRunner.origin_visit_iter()
CqlRunner.origin_visit_status_get_range()
CqlRunner.origin_visit_status_get_all_range()
CqlRunner.origin_visit_status_add_one()
CqlRunner.origin_visit_status_get_latest()
CqlRunner.origin_visit_status_get()
CqlRunner.origin_snapshot_get_all()
CqlRunner.raw_extrinsic_metadata_by_id_add()
CqlRunner.raw_extrinsic_metadata_get_by_ids()
CqlRunner.raw_extrinsic_metadata_add()
CqlRunner.raw_extrinsic_metadata_get_after_date()
CqlRunner.raw_extrinsic_metadata_get_after_date_and_id()
CqlRunner.raw_extrinsic_metadata_get()
CqlRunner.raw_extrinsic_metadata_get_authorities()
CqlRunner.metadata_authority_add()
CqlRunner.metadata_authority_get()
CqlRunner.metadata_fetcher_add()
CqlRunner.metadata_fetcher_get()
CqlRunner.extid_add_prepare()
CqlRunner.extid_get_from_pk()
CqlRunner.extid_get_from_token()
CqlRunner.extid_get_from_token_and_extid_version()
CqlRunner.extid_get_from_extid()
CqlRunner.extid_get_from_extid_and_version()
CqlRunner.extid_get_from_target()
CqlRunner.extid_index_add_one()
CqlRunner.stat_counters()
CqlRunner.check_read()
- swh.storage.cassandra.model module
MAGIC_NULL_PK
content_index_table_name()
BaseRow
ContentRow
SkippedContentRow
SkippedContentRow.TABLE
SkippedContentRow.PARTITION_KEY
SkippedContentRow.sha1
SkippedContentRow.sha1_git
SkippedContentRow.sha256
SkippedContentRow.blake2s256
SkippedContentRow.length
SkippedContentRow.ctime
SkippedContentRow.status
SkippedContentRow.reason
SkippedContentRow.origin
SkippedContentRow.from_dict()
DirectoryRow
DirectoryEntryRow
RevisionRow
RevisionParentRow
ReleaseRow
SnapshotRow
SnapshotBranchRow
OriginVisitRow
OriginVisitStatusRow
OriginVisitStatusRow.TABLE
OriginVisitStatusRow.PARTITION_KEY
OriginVisitStatusRow.CLUSTERING_KEY
OriginVisitStatusRow.origin
OriginVisitStatusRow.visit
OriginVisitStatusRow.date
OriginVisitStatusRow.type
OriginVisitStatusRow.status
OriginVisitStatusRow.metadata
OriginVisitStatusRow.snapshot
OriginVisitStatusRow.from_dict()
OriginRow
MetadataAuthorityRow
MetadataFetcherRow
RawExtrinsicMetadataRow
RawExtrinsicMetadataRow.TABLE
RawExtrinsicMetadataRow.PARTITION_KEY
RawExtrinsicMetadataRow.CLUSTERING_KEY
RawExtrinsicMetadataRow.id
RawExtrinsicMetadataRow.type
RawExtrinsicMetadataRow.target
RawExtrinsicMetadataRow.authority_type
RawExtrinsicMetadataRow.authority_url
RawExtrinsicMetadataRow.discovery_date
RawExtrinsicMetadataRow.fetcher_name
RawExtrinsicMetadataRow.fetcher_version
RawExtrinsicMetadataRow.format
RawExtrinsicMetadataRow.metadata
RawExtrinsicMetadataRow.origin
RawExtrinsicMetadataRow.visit
RawExtrinsicMetadataRow.snapshot
RawExtrinsicMetadataRow.release
RawExtrinsicMetadataRow.revision
RawExtrinsicMetadataRow.path
RawExtrinsicMetadataRow.directory
RawExtrinsicMetadataByIdRow
ObjectCountRow
ExtIDRow
ExtIDByTargetRow
- swh.storage.cassandra.schema module
- swh.storage.cassandra.storage module
CassandraStorage
CassandraStorage.hosts
CassandraStorage.keyspace
CassandraStorage.port
CassandraStorage.check_config()
CassandraStorage.content_add()
CassandraStorage.content_update()
CassandraStorage.content_add_metadata()
CassandraStorage.content_get_data()
CassandraStorage.content_get_partition()
CassandraStorage.content_get()
CassandraStorage.content_find()
CassandraStorage.content_missing()
CassandraStorage.content_missing_per_sha1()
CassandraStorage.content_missing_per_sha1_git()
CassandraStorage.content_get_random()
CassandraStorage.skipped_content_add()
CassandraStorage.skipped_content_missing()
CassandraStorage.directory_add()
CassandraStorage.directory_missing()
CassandraStorage.directory_entry_get_by_path()
CassandraStorage.directory_ls()
CassandraStorage.directory_get_entries()
CassandraStorage.directory_get_raw_manifest()
CassandraStorage.directory_get_random()
CassandraStorage.directory_get_id_partition()
CassandraStorage.revision_add()
CassandraStorage.revision_missing()
CassandraStorage.revision_get()
CassandraStorage.revision_get_partition()
CassandraStorage.revision_log()
CassandraStorage.revision_shortlog()
CassandraStorage.revision_get_random()
CassandraStorage.release_add()
CassandraStorage.release_missing()
CassandraStorage.release_get()
CassandraStorage.release_get_partition()
CassandraStorage.release_get_random()
CassandraStorage.snapshot_add()
CassandraStorage.snapshot_missing()
CassandraStorage.snapshot_get()
CassandraStorage.snapshot_get_id_partition()
CassandraStorage.snapshot_count_branches()
CassandraStorage.snapshot_get_branches()
CassandraStorage.snapshot_get_random()
CassandraStorage.origin_get()
CassandraStorage.origin_get_one()
CassandraStorage.origin_get_by_sha1()
CassandraStorage.origin_list()
CassandraStorage.origin_search()
CassandraStorage.origin_count()
CassandraStorage.origin_snapshot_get_all()
CassandraStorage.origin_add()
CassandraStorage.origin_visit_add()
CassandraStorage.origin_visit_status_add()
CassandraStorage.origin_visit_get()
CassandraStorage.origin_visit_get_with_statuses()
CassandraStorage.origin_visit_status_get()
CassandraStorage.origin_visit_find_by_date()
CassandraStorage.origin_visit_get_by()
CassandraStorage.origin_visit_get_latest()
CassandraStorage.origin_visit_status_get_latest()
CassandraStorage.origin_visit_status_get_random()
CassandraStorage.object_find_by_sha1_git()
CassandraStorage.stat_counters()
CassandraStorage.refresh_stat_counters()
CassandraStorage.raw_extrinsic_metadata_add()
CassandraStorage.raw_extrinsic_metadata_get()
CassandraStorage.raw_extrinsic_metadata_get_by_ids()
CassandraStorage.raw_extrinsic_metadata_get_authorities()
CassandraStorage.metadata_fetcher_add()
CassandraStorage.metadata_fetcher_get()
CassandraStorage.metadata_authority_add()
CassandraStorage.metadata_authority_get()
CassandraStorage.extid_add()
CassandraStorage.extid_get_from_extid()
CassandraStorage.extid_get_from_target()
CassandraStorage.clear_buffers()
CassandraStorage.flush()
Module contents:
- swh.storage.cassandra.create_keyspace(hosts: List[str], keyspace: str, port: int = 9042, *, durable_writes=True)[source]#
- class swh.storage.cassandra.CassandraStorage(hosts, keyspace, objstorage, port=9042, journal_writer=None, allow_overwrite=False, consistency_level='ONE', directory_entries_insert_algo='one-by-one')[source]#
Bases:
object
A backend of swh-storage backed by Cassandra
- Parameters:
hosts – Seed Cassandra nodes, to start connecting to the cluster
keyspace – Name of the Cassandra database to use
objstorage – Passed as argument to
ObjStorage
port – Cassandra port
journal_writer – Passed as argument to
JournalWriter
allow_overwrite – Whether
*_add
functions will check if an object already exists in the database before sending it in an INSERT.False
is the default as it is more efficient when there is a moderately high probability the object is already known, butTrue
can be useful to overwrite existing objects (eg. when applying a schema update), or when the database is known to be mostly empty. Note that aFalse
value does not guarantee there won’t be any overwrite.consistency_level – The default read/write consistency to use
directory_entries_insert_algo – Must be one of: * one-by-one: naive, one INSERT per directory entry, serialized * concurrent: one INSERT per directory entry, concurrent * batch: using UNLOGGED BATCH to insert many entries in a few statements
- content_get_partition(partition_id: int, nb_partitions: int, page_token: Optional[str] = None, limit: int = 1000) PagedResult[Content, str] [source]#
- directory_entry_get_by_path(directory: bytes, paths: List[bytes]) Optional[Dict[str, Any]] [source]#
- directory_get_entries(directory_id: bytes, page_token: Optional[bytes] = None, limit: int = 1000) Optional[PagedResult[DirectoryEntry, str]] [source]#
- directory_get_id_partition(partition_id: int, nb_partitions: int, page_token: Optional[str] = None, limit: int = 1000) PagedResult[bytes, str] [source]#
- revision_get(revision_ids: List[bytes], ignore_displayname: bool = False) List[Optional[Revision]] [source]#
- revision_get_partition(partition_id: int, nb_partitions: int, page_token: Optional[str] = None, limit: int = 1000) PagedResult[Revision, str] [source]#
- revision_log(revisions: List[bytes], ignore_displayname: bool = False, limit: Optional[int] = None) Iterable[Optional[Dict[str, Any]]] [source]#
- revision_shortlog(revisions: List[bytes], limit: Optional[int] = None) Iterable[Optional[Tuple[bytes, Tuple[bytes, ...]]]] [source]#
- release_get(releases: List[bytes], ignore_displayname: bool = False) List[Optional[Release]] [source]#
- release_get_partition(partition_id: int, nb_partitions: int, page_token: Optional[str] = None, limit: int = 1000) PagedResult[Release, str] [source]#
- snapshot_get_id_partition(partition_id: int, nb_partitions: int, page_token: Optional[str] = None, limit: int = 1000) PagedResult[bytes, str] [source]#
- snapshot_count_branches(snapshot_id: bytes, branch_name_exclude_prefix: Optional[bytes] = None) Optional[Dict[Optional[str], int]] [source]#
- snapshot_get_branches(snapshot_id: bytes, branches_from: bytes = b'', branches_count: int = 1000, target_types: Optional[List[str]] = None, branch_name_include_substring: Optional[bytes] = None, branch_name_exclude_prefix: Optional[bytes] = None) Optional[PartialBranches] [source]#
- origin_get_one(origin_url: str) Optional[Origin] [source]#
Given an origin url, return the origin if it exists, None otherwise
- origin_search(url_pattern: str, page_token: Optional[str] = None, limit: int = 50, regexp: bool = False, with_visit: bool = False, visit_types: Optional[List[str]] = None) PagedResult[Origin, str] [source]#
- origin_visit_add(visits: List[OriginVisit]) Iterable[OriginVisit] [source]#
- origin_visit_get(origin: str, page_token: Optional[str] = None, order: ListOrder = ListOrder.ASC, limit: int = 10) PagedResult[OriginVisit, str] [source]#
- origin_visit_get_with_statuses(origin: str, allowed_statuses: Optional[List[str]] = None, require_snapshot: bool = False, page_token: Optional[str] = None, order: ListOrder = ListOrder.ASC, limit: int = 10) PagedResult[OriginVisitWithStatuses, str] [source]#
- origin_visit_status_get(origin: str, visit: int, page_token: Optional[str] = None, order: ListOrder = ListOrder.ASC, limit: int = 10) PagedResult[OriginVisitStatus, str] [source]#
- origin_visit_get_latest(origin: str, type: Optional[str] = None, allowed_statuses: Optional[List[str]] = None, require_snapshot: bool = False) Optional[OriginVisit] [source]#
- origin_visit_status_get_latest(origin_url: str, visit: int, allowed_statuses: Optional[List[str]] = None, require_snapshot: bool = False) Optional[OriginVisitStatus] [source]#
- origin_visit_status_get_random(type: str) Optional[OriginVisitStatus] [source]#
- raw_extrinsic_metadata_get(target: ExtendedSWHID, authority: MetadataAuthority, after: Optional[datetime] = None, page_token: Optional[bytes] = None, limit: int = 1000) PagedResult[RawExtrinsicMetadata, str] [source]#
- raw_extrinsic_metadata_get_authorities(target: ExtendedSWHID) List[MetadataAuthority] [source]#
- metadata_authority_get(type: MetadataAuthorityType, url: str) Optional[MetadataAuthority] [source]#
- extid_get_from_extid(id_type: str, ids: List[bytes], version: Optional[int] = None) List[ExtID] [source]#