Source code for swh.web.save_code_now.management.commands.dump_savecodenow_data
# Copyright (C) The Software Heritage developers
# See the AUTHORS file at the top-level directory of this distribution
# License: GNU Affero General Public License version 3, or any later version
# See top-level LICENSE file for more information
import csv
import hashlib
from django.core.management.base import BaseCommand
[docs]
class Command(BaseCommand):
help = "Dump Save Code Now requests data to CSV"
[docs]
def add_arguments(self, parser):
parser.add_argument(
"--output-file",
type=str,
default="",
help="optional path to dump CSV file, dump to stdout by default",
)
[docs]
def handle(self, *args, **options):
"""Dump Save Code Now requests data to CSV.
Dump to stdout or to file the following CSV data about Save Code Now requests:
* date of request
* requested visit type
* URL of origin to save
* status of the request
* status of visit by SWH
* whether the request was triggered by a webhook
* user id (anonymized) that created the request
"""
from swh.web.save_code_now.models import SaveOriginRequest
output = self.stdout
if options["output_file"]:
output = open(options["output_file"], "w")
fieldnames = [
"request_date",
"visit_type",
"origin_url",
"request_status",
"visit_status",
"from_webhook",
"user_id",
]
writer = csv.DictWriter(output, fieldnames=fieldnames)
writer.writeheader()
scn_requests = SaveOriginRequest.objects
for scn_request in scn_requests.iterator():
users_ids = (
map(
# generate opaque identifier from user id
lambda user_id: hashlib.sha1(user_id.encode()).hexdigest()[:7],
scn_request.user_ids.split(","),
)
if scn_request.user_ids
else [""]
)
for user_id in users_ids:
writer.writerow(
{
"request_date": scn_request.request_date,
"visit_type": scn_request.visit_type,
"origin_url": scn_request.origin_url,
"request_status": scn_request.status,
"visit_status": scn_request.visit_status,
"from_webhook": str(scn_request.from_webhook).lower(),
"user_id": user_id,
}
)
if output != self.stdout:
output.close()