Source code for swh.storage.proxies.counter

# Copyright (C) 2021 The Software Heritage developers
# See the AUTHORS file at the top-level directory of this distribution
# License: GNU General Public License version 3, or any later version
# See top-level LICENSE file for more information


from typing import Callable

from swh.counters import get_counters
from swh.counters.interface import CountersInterface
from swh.storage import get_storage
from swh.storage.interface import StorageInterface

OBJECT_TYPES = [
    "content",
    "directory",
    "snapshot",
    "origin_visit_status",
    "origin_visit",
    "origin",
]


[docs]class CountingProxyStorage: """Counting Storage Proxy. This is in charge of adding objects directly to swh-counters, without going through Kafka/swh-journal. This is meant as a simple way to setup counters for experiments; production should use swh-journal to reduce load/latency of the storage server. Additionally, unlike the journal-based counting, it does not count persons or the number of origins per netloc. Sample configuration use case for filtering storage: .. code-block: yaml storage: cls: counter counters: cls: remote url: http://counters.internal.staging.swh.network:5011/ storage: cls: remote url: http://storage.internal.staging.swh.network:5002/ """ def __init__(self, counters, storage): self.counters: CountersInterface = get_counters(**counters) self.storage: StorageInterface = get_storage(**storage) def __getattr__(self, key): if key == "storage": raise AttributeError(key) if key.endswith("_add"): return self._adder(key[0:-4], getattr(self.storage, key)) return getattr(self.storage, key) def _adder(self, collection: str, backend_function: Callable): def f(objs): self.counters.add(collection, [obj.unique_key() for obj in objs]) return backend_function(objs) return f