Source code for swh.perfecthash
# Copyright (C) 2021-2022 The Software Heritage developers
# See the AUTHORS file at the top-level directory of this distribution
# License: GNU General Public License version 3, or any later version
# See top-level LICENSE file for more information
from typing import NewType
from cffi import FFI
from swh.perfecthash._hash_cffi import lib
Key = NewType("Key", bytes)
HashObject = NewType("HashObject", bytes)
[docs]class Shard:
"""Low level management for files indexed with a perfect hash table.
This class allows creating a Read Shard by adding key/object pairs
and looking up the content of an object when given the key.
"""
def __init__(self, path: str):
"""Initialize with an existing Read Shard.
Args:
path: path to an existing Read Shard file or device
"""
self.ffi = FFI()
self.shard = lib.shard_init(path.encode("utf-8"))
def __del__(self):
lib.shard_destroy(self.shard)
[docs] @staticmethod
def key_len():
return lib.shard_key_len
[docs] def create(self, objects_count: int) -> "Shard":
"""Wipe out the content of the Read Shard. It must be followed by
**object_count** calls to the **write** method otherwise the content
of the Read Shard will be inconsistent. When all objects are inserted,
the Read Shard must be made persistent by calling the **save** method.
Args:
objects_count: number of objects in the Read Shard.
Returns:
self.
"""
assert lib.shard_create(self.shard, objects_count) != -1
return self
[docs] def load(self) -> "Shard":
"""Open the Read Shard file in read-only mode.
Returns:
self.
"""
assert lib.shard_load(self.shard) != -1
return self
[docs] def save(self) -> int:
"""Create the perfect hash table the **lookup** method
relies on to find the content of the objects.
It must be called after **create** and **write** otherwise the
content of the Read Shard will be inconsistent.
Returns:
0 on success, -1 on error.
"""
return lib.shard_save(self.shard)
[docs] def lookup(self, key: Key) -> HashObject:
"""Fetch the object matching the key in the Read Shard.
Fetching an object is O(1): one lookup in the index to obtain
the offset of the object in the Read Shard and one read to get
the payload.
Args:
key: the key associated with the object to retrieve.
Returns:
the object as bytes.
"""
object_size_pointer = self.ffi.new("uint64_t*")
lib.shard_lookup_object_size(self.shard, key, object_size_pointer)
object_size = object_size_pointer[0]
object_pointer = self.ffi.new("char[]", object_size)
lib.shard_lookup_object(self.shard, object_pointer, object_size)
return self.ffi.buffer(object_pointer, object_size)
[docs] def write(self, key: Key, object: HashObject) -> int:
"""Add the key/object pair to the Read Shard.
The **create** method must have been called prior to calling
the **write** method.
Args:
key: the unique key associated with the object.
object: the object
Returns:
0 on success, -1 on error.
"""
if len(key) != Shard.key_len():
raise ValueError(f"key length is {len(key)} instead of {Shard.key_len()}")
return lib.shard_object_write(self.shard, key, object, len(object))