swh.dataset package# Subpackages# swh.dataset.exporters package Submodules swh.dataset.exporters.edges module swh.dataset.exporters.orc module Module contents swh.dataset.test namespace Submodules swh.dataset.test.test_edges module swh.dataset.test.test_journal_processor module swh.dataset.test.test_orc module swh.dataset.test.test_utils module Submodules# swh.dataset.athena module create_database() drop_table() create_table() repair_table() query() create_tables() human_size() run_query_get_results() generate_subdataset() swh.dataset.cli module get_masked_swhids() run_export_graph() swh.dataset.exporter module Exporter Exporter.process_object() Exporter.get_unique_file_id() ExporterDispatch ExporterDispatch.process_object() swh.dataset.journalprocessor module JournalClientOffsetRanges JournalClientOffsetRanges.subscribe() JournalClientOffsetRanges.unsubscribe() JournalClientOffsetRanges.process() JournalClientOffsetRanges.handle_offset() JournalClientOffsetRanges.deserialize_message() JournalClientOffsetRanges.handle_messages() ParallelJournalProcessor ParallelJournalProcessor.get_offsets() ParallelJournalProcessor.run() ParallelJournalProcessor.progress_worker() ParallelJournalProcessor.export_worker() JournalProcessorWorker JournalProcessorWorker.get_node_set_for_object() JournalProcessorWorker.run() JournalProcessorWorker.process_messages() JournalProcessorWorker.process_message() swh.dataset.luigi module Luigi tasks File layout Running all on staging ObjectType ObjectType.origin ObjectType.origin_visit ObjectType.origin_visit_status ObjectType.snapshot ObjectType.release ObjectType.revision ObjectType.directory ObjectType.content ObjectType.skipped_content Format Format.edges Format.orc merge_lists() PathParameter PathParameter.parse() S3PathParameter S3PathParameter.normalize() FractionalFloatParameter FractionalFloatParameter.parse() stamps_paths() ExportGraph ExportGraph.config_file ExportGraph.local_export_path ExportGraph.export_id ExportGraph.formats ExportGraph.processes ExportGraph.margin ExportGraph.object_types ExportGraph.output() ExportGraph.complete() ExportGraph.run() UploadExportToS3 UploadExportToS3.local_export_path UploadExportToS3.formats UploadExportToS3.object_types UploadExportToS3.s3_export_path UploadExportToS3.requires() UploadExportToS3.output() UploadExportToS3.complete() UploadExportToS3.run() DownloadExportFromS3 DownloadExportFromS3.local_export_path DownloadExportFromS3.formats DownloadExportFromS3.object_types DownloadExportFromS3.s3_export_path DownloadExportFromS3.parallelism DownloadExportFromS3.requires() DownloadExportFromS3.output() DownloadExportFromS3.complete() DownloadExportFromS3.run() LocalExport LocalExport.local_export_path LocalExport.formats LocalExport.object_types LocalExport.export_task_type LocalExport.requires() LocalExport.output() LocalExport.complete() AthenaDatabaseTarget AthenaDatabaseTarget.exists() CreateAthena CreateAthena.object_types CreateAthena.s3_export_path CreateAthena.s3_athena_output_location CreateAthena.athena_db_name CreateAthena.requires() CreateAthena.output() CreateAthena.run() RunExportAll RunExportAll.formats RunExportAll.object_types RunExportAll.s3_export_path RunExportAll.s3_athena_output_location RunExportAll.athena_db_name RunExportAll.requires() swh.dataset.relational module BLOOM_FILTER_COLUMNS swh.dataset.utils module ZSTFile ZSTFile.read() ZSTFile.write() SQLiteSet SQLiteSet.add() LevelDBSet LevelDBSet.add() remove_pull_requests() Module contents#