646 lines
18 KiB
Cython

# distutils: language = c++
"""placeholder module for compilation"""
from __future__ import annotations
import datetime
from pathlib import Path
import dopt_basics.datetime
import sqlalchemy as sql
from dopt_pollublock_blockchain import db
from dopt_pollublock_blockchain import types
from dopt_pollublock_blockchain.block cimport Block
from libcpp.unordered_map cimport unordered_map
from libcpp.vector cimport vector
from libcpp.string cimport string
from libc.stdint cimport uint64_t
from libc.stdlib cimport malloc, free
from libc.string cimport memcpy
from cython.operator import postincrement, dereference
cimport dopt_pollublock_blockchain.openssl_evp as ossl
ctypedef unsigned long ULong
ctypedef unordered_map[uint64_t, Block*] BcHashmap
cdef const size_t NONCE_OFFSET = <size_t>16
cdef timestamp_to_datetime(uint64_t ts):
return datetime.datetime.fromtimestamp(float(ts), dopt_basics.datetime.TIMEZONE_UTC)
cdef uint64_t current_timestamp_integer():
cdef uint64_t ts
dt = dopt_basics.datetime.current_time_tz(cut_microseconds=True)
ts = <uint64_t>int(dt.timestamp())
return ts
cdef int serialize_uint32(unsigned char* out, unsigned int v) except -1 nogil:
out[0] = (v >> 24) & 0xFF
out[1] = (v >> 16) & 0xFF
out[2] = (v >> 8) & 0xFF
out[3] = v & 0xFF
cdef int serialize_uint64(unsigned char* out, unsigned long long v) except -1 nogil:
out[0] = (v >> 56) & 0xFF
out[1] = (v >> 48) & 0xFF
out[2] = (v >> 40) & 0xFF
out[3] = (v >> 32) & 0xFF
out[4] = (v >> 24) & 0xFF
out[5] = (v >> 16) & 0xFF
out[6] = (v >> 8) & 0xFF
out[7] = v & 0xFF
cdef inline bint has_leading_zero_bits(const unsigned char *digest, int num_bits) nogil:
cdef int i, full_bytes = num_bits // 8
cdef int rem_bits = num_bits % 8
for i in range(full_bytes):
if digest[i] != 0:
return False
if rem_bits:
if digest[full_bytes] >> (8 - rem_bits) != 0:
return False
return True
cdef inline bint has_leading_zero_bytes(const unsigned char *digest, int num_bytes) nogil:
cdef int i, full_bytes = num_bytes // 8
cdef int rem_bits = num_bytes % 8
for i in range(num_bytes):
if digest[i] != 0:
return False
return True
cdef class PyBlock:
cdef:
Block *BlockC
bint ptr_owner
def __cinit__(self,
index,
nonce,
data,
previous_hash,
from_ptr=False,
):
self.ptr_owner = False
if not from_ptr:
self.BlockC = new Block(
index,
current_timestamp_integer(),
nonce,
data.encode("UTF-8"),
previous_hash.encode("UTF-8"),
"".encode("UTF-8"),
)
if self.BlockC is NULL:
raise MemoryError()
self.ptr_owner = True
def __init__(self, *args, **kwargs):
pass
def __dealloc__(self):
if self.BlockC is not NULL and self.ptr_owner is True:
del self.BlockC
self.BlockC = NULL
@staticmethod
cdef PyBlock from_ptr(Block *block, bint owner=False):
cdef PyBlock py_block = PyBlock.__new__(
PyBlock,
block.index,
block.nonce,
block.data,
block.prev_hash,
True,
)
py_block.BlockC = block
py_block.ptr_owner = owner
return py_block
def __repr__(self):
return (
f"PyBlock(\n\tIndex:\t\t{self.index}\n\tTimestamp:\t{self.timestamp}\n\tNonce:\t\t{self.nonce}\n\t"
f"Prev Hash:\t{self.prev_hash}\n\tHash:\t\t{self.hash}\n\tData:\t\t{self.data}\n)"
)
def __str__(self):
return self.__repr__()
def as_dataclass(self):
return types.PyBlockData(
Index=self.index,
Timestamp=self.timestamp,
Nonce=self.nonce,
PrevHash=self.prev_hash,
Hash=self.hash,
Data=self.data,
)
def serialize_dict(self):
contents = {}
contents["index"] = self.BlockC.index
contents["timestamp"] = self.BlockC.timestamp
contents["nonce"] = self.BlockC.nonce
contents["previous_hash"] = self.prev_hash
contents["hash"] = self.hash
contents["data"] = self.data
return contents
# Python public API
@property
def index(self):
return self.BlockC.index
@property
def timestamp(self):
return timestamp_to_datetime(self.BlockC.timestamp)
@property
def data(self):
return self.BlockC.data.decode("UTF-8")
@property
def prev_hash(self):
return self.BlockC.prev_hash.decode("UTF-8")
@property
def nonce(self):
return self.BlockC.nonce
@property
def hash(self):
return self.BlockC.hash.decode("UTF-8")
def bytes_serialize(self):
cdef:
unsigned char *serialize_res
size_t serialize_size
try:
serialize_res = bytes_serialize_c(self.BlockC, &serialize_size)
return serialize_res[:serialize_size]
finally:
free(serialize_res)
def perform_hash(self):
cdef:
unsigned char *digest
size_t digest_size
try:
digest = perform_hash_c(self.BlockC, &digest_size)
if digest is NULL:
raise MemoryError()
self.BlockC.hash = bytes(digest[:digest_size]).hex().encode("UTF-8")
finally:
free(digest)
return self.hash
cdef unsigned char* bytes_serialize_c(Block *block, size_t *size) nogil:
cdef:
size_t total_len
unsigned char* buf
size_t pos = 0
# index (8), timestamp (8), nonce (8), data, prev_hash
size[0] = (
<size_t>(8 * 3) +
block.data.size() +
block.prev_hash.size()
)
buf = <unsigned char*>malloc(size[0] * sizeof(unsigned char))
if buf is NULL:
return NULL
serialize_uint64(buf + pos, block.index)
pos += 8
serialize_uint64(buf + pos, block.timestamp)
pos += 8
serialize_uint64(buf + pos, block.nonce)
pos += 8
# Copy data
memcpy(
buf + pos,
block.data.c_str(),
block.data.size(),
)
pos += block.data.size()
# Copy prev_hash
memcpy(
buf + pos,
block.prev_hash.c_str(),
block.prev_hash.size(),
)
pos += block.prev_hash.size()
return buf
cdef unsigned char* SHA256_digest(const void *data, size_t data_size, size_t *digest_size) nogil:
cdef ossl.EVP_MD_CTX *ctx = ossl.EVP_MD_CTX_new()
if ctx is NULL:
return NULL
cdef const ossl.EVP_MD *algo = ossl.EVP_sha256()
if algo is NULL:
return NULL
cdef:
unsigned char* digest
size_t dig_buff_len
unsigned int digest_len
dig_buff_len = <size_t>ossl.EVP_MD_size(algo)
digest_size[0] = dig_buff_len
digest = <unsigned char*>malloc(dig_buff_len * sizeof(unsigned char))
ossl.EVP_DigestInit_ex(ctx, algo, NULL)
ossl.EVP_DigestUpdate(ctx, data, data_size)
ossl.EVP_DigestFinal_ex(ctx, digest, &digest_len)
ossl.EVP_MD_CTX_free(ctx)
return digest
cdef unsigned char* perform_hash_c(Block *block, size_t *digest_size) nogil:
cdef:
unsigned char *serialize_res
size_t serialize_size
unsigned char *digest
serialize_res = bytes_serialize_c(block, &serialize_size)
if serialize_res is NULL:
return NULL
digest = SHA256_digest(serialize_res, serialize_size, digest_size)
free(serialize_res)
if digest is NULL:
return NULL
return digest
cdef int mine_block(Block *block, unsigned int difficulty, uint64_t *nonce_solution, unsigned int max_nonce=0xFFFFFFFF) nogil:
cdef:
unsigned char *serial_buf
size_t serialize_size
unsigned char *digest
size_t digest_size
bint nonce_found = False
int nonce
serial_buf = bytes_serialize_c(block, &serialize_size)
with nogil:
for nonce in range(max_nonce):
serialize_uint64(serial_buf + NONCE_OFFSET, <uint64_t>nonce)
digest = SHA256_digest(serial_buf, serialize_size, &digest_size)
if has_leading_zero_bits(digest, difficulty):
nonce_found = True
nonce_solution[0] = nonce
break
free(digest)
free(serial_buf)
if not nonce_found:
return 1
return 0
cdef class Blockchain:
cdef:
unsigned int _difficulty
uint64_t _index
BcHashmap *_chain_map
vector[Block*] *_chain
bint _genesis_done
bint _loaded
readonly object db_path
readonly object _engine
def __cinit__(self, *args, **kwargs):
self._difficulty = 26
self._index = <uint64_t>0
self._genesis_done = False
self._loaded = False
self._chain_map = new unordered_map[uint64_t, Block*]()
self._chain = new vector[Block*]()
if self._chain_map is NULL:
raise MemoryError("Could not allocate hasmap")
if self._chain is NULL:
raise MemoryError("Could not allocate vector")
def __init__(self, db_path):
self.db_path = Path(db_path).resolve()
if not self.db_path.parent.exists():
raise FileNotFoundError(
"The parent directory of the provided database path does not exist"
)
self._engine = sql.create_engine(f"sqlite:///{str(self.db_path)}")
db.metadata_blockchain.create_all(self._engine)
def __dealloc__(self):
# ownership is typically not transferred from the Blockchain extension class
cdef BcHashmap.iterator it = self._chain_map.begin()
if self._chain_map is not NULL:
while it != self._chain_map.end():
del dereference(it).second
postincrement(it)
del self._chain_map
self._chain_map = NULL
if self._chain is not NULL:
del self._chain
self._chain = NULL
cdef Block* get_block_c(self, uint64_t idx) nogil:
if idx > self._index:
return NULL
return self._chain_map[0][idx]
cdef void add_block_from_loading(self, Block *block) nogil:
self._chain[0].push_back(block)
self._chain_map[0][block.index] = block
self._index = block.index
if not self._genesis_done:
self._genesis_done = True
cdef int add_block(self, Block *block) nogil:
cdef:
uint64_t mined_nonce
size_t digest_size
unsigned char *sha256_digest
# mine block
if mine_block(block, self._difficulty, &mined_nonce) != 0:
return 1
block.nonce = mined_nonce
# hash block, add hash to block, add block to blockchain hashmap
sha256_digest = perform_hash_c(block, &digest_size)
with gil:
block.hash = bytes(sha256_digest[:digest_size]).hex().encode("UTF-8")
free(sha256_digest)
self._chain[0].push_back(block)
self._chain_map[0][block.index] = block
if self._genesis_done:
self._index += 1
return 0
cdef string hash_data(self, data):
cdef:
string data_str
unsigned char *data_digest
size_t digest_size
data_str = data.encode("UTF-8")
data_digest = SHA256_digest(data_str.c_str(), data_str.size(), &digest_size)
if data_digest is NULL:
raise RuntimeError("Failed to hash data")
data_str = bytes(data_digest[:digest_size]).hex().encode("UTF-8")
free(data_digest)
return data_str
cdef load_from_batch(self, batch):
cdef Block *block
for entry in batch:
block = new Block(
entry[0],
entry[1],
entry[2],
entry[5].encode("UTF-8"),
entry[3].encode("UTF-8"),
entry[4].encode("UTF-8"),
)
self.add_block_from_loading(block)
# // Python public API
def __len__(self):
if self.genesis_done:
return self._index + 1
else:
return 0
@property
def difficulty(self):
return self._difficulty
@difficulty.setter
def difficulty(self, value):
if not isinstance(value, int):
raise TypeError("Difficulty must be integer value")
if value <= 0:
raise ValueError("Difficulty must be greater than 0")
self._difficulty = value
@property
def genesis_done(self):
return self._genesis_done
@property
def index(self):
return self._index
def _print_key_value_pair(self):
cdef BcHashmap.iterator it = self._chain_map.begin()
cdef Block *block
while it != self._chain_map.end():
print(dereference(it).first)
block = dereference(it).second
py_block = PyBlock.from_ptr(block)
print(py_block)
postincrement(it)
def print_blocks(self, max_num):
cdef:
Block *block
int max_nummber = max_num
int idx, num = 0
if max_num <= 0:
raise ValueError("Maximum number must be greater than 0")
for idx in range(self._chain[0].size()):
block = self._chain[0][idx]
py_block = PyBlock.from_ptr(block)
print(py_block)
num += 1
if num == max_nummber:
break
def get_block(self, idx):
if idx < 0 or idx > self._index:
raise IndexError("Index value is out of bounds")
cdef Block *block = self.get_block_c(idx)
if block is NULL:
raise IndexError("Provided index not found")
return PyBlock.from_ptr(block, owner=False)
def create_genesis_block(self):
if self._genesis_done:
raise RuntimeError(
("Blockchain already has a genesis block. "
"Either it was created or loaded.")
)
genesis_prev_hash = ("0" * 64).encode("UTF-8")
cdef string data_str = self.hash_data("Genesis Block")
cdef Block *block = new Block(
self._index,
current_timestamp_integer(),
0,
data_str,
genesis_prev_hash,
"".encode("UTF-8"),
)
cdef int res = self.add_block(block)
if res != 0:
raise RuntimeError("Could not mine block. No nonce found")
self._genesis_done = True
def new_block(self, data):
cdef:
Block *prev_block
string prev_hash
uint64_t new_idx
string data_str
unsigned char *data_digest
size_t digest_size
if not self._genesis_done:
raise RuntimeError("Create a genesis block first.")
if not isinstance(data, str):
raise TypeError("Data must be a string")
data_str = self.hash_data(data)
prev_block = self.get_block_c(self._index)
prev_hash = prev_block.hash
new_idx = self._index + 1
cdef Block *block = new Block(
new_idx,
current_timestamp_integer(),
0,
data_str,
prev_hash,
"".encode("UTF-8"),
)
cdef int res = self.add_block(block)
if res != 0:
raise RuntimeError("Could not mine block. No nonce found")
return self._index
def validate(self):
cdef:
Block *block
Block *prev_block = NULL
int idx = 0
unsigned char *digest
size_t digest_size
for idx in range(self._chain[0].size()):
block = self._chain[0][idx]
py_bytes = bytes.fromhex(block.hash.decode("UTF-8"))
digest = perform_hash_c(block, &digest_size)
py_bytes_rehashed = bytes(digest[:digest_size])
free(digest)
if py_bytes != py_bytes_rehashed:
print(f"Index {idx}: Hashes to not match. Abort.")
return False
if prev_block is not NULL:
if prev_block.hash != block.prev_hash:
print(
(f"Index {idx}: Hash mismatch. Hash of previous block does not "
"match the saved one in the current block. Abort.")
)
return False
prev_block = block
return True
def get_saving_entries(self, min_idx):
entries = []
cdef:
Block *block
int idx = 0
int _min_idx
if min_idx is None:
_min_idx = -1
else:
_min_idx = min_idx
for idx in range(self._chain[0].size()):
if idx <= _min_idx:
continue
block = self._chain[0][idx]
contents = {}
contents["index"] = block.index
contents["timestamp"] = block.timestamp
contents["nonce"] = block.nonce
contents["previous_hash"] = block.prev_hash.decode("UTF-8")
contents["hash"] = block.hash.decode("UTF-8")
contents["data"] = block.data.decode("UTF-8")
entries.append(contents)
return entries
def save(self):
# get max index from db
# only retrieve indices greater than max value
stmt = sql.select(sql.func.max(db.blocks.c.index))
with self._engine.connect() as conn:
result = conn.execute(stmt)
max_value = result.scalar()
entries = self.get_saving_entries(max_value)
if not entries:
return
with self._engine.begin() as con:
con.execute(sql.insert(db.blocks), entries)
def close_db_connections(self):
self._engine.dispose()
def load(self, batch_size):
if self._loaded:
raise RuntimeError("Blockchain was already loaded")
with self._engine.connect() as con:
res = con.execute(sql.select(db.blocks).order_by(db.blocks.c.index.asc()))
for batch in res.partitions(batch_size):
self.load_from_batch(batch)
self._loaded = True