Skip to content

Commit 39ebb5c

Browse files
committed
refactor: Optimize data hashing and improve empty input handling in DataHasherV0
1 parent bdb85f0 commit 39ebb5c

File tree

2 files changed

+18
-16
lines changed

2 files changed

+18
-16
lines changed

iscc_core/code_data.py

+13-16
Original file line numberDiff line numberDiff line change
@@ -84,26 +84,19 @@ def __init__(self, data=None):
8484
self.push(data)
8585

8686
def push(self, data):
87-
# type: (ic.Data) -> None
88-
"""Push data to the Data-Hash generator."""
8987
if self.tail:
9088
data = self.tail + data
91-
self.tail = None
92-
9389
chunks = ic.alg_cdc_chunks(
9490
data, utf32=False, avg_chunk_size=ic.core_opts.data_avg_chunk_size
9591
)
96-
97-
# Process chunks one at a time but keep track of last one
98-
last_chunk = None
92+
prev_chunk = None
9993
for chunk in chunks:
100-
if last_chunk is not None:
101-
self.chunk_sizes.append(len(last_chunk))
102-
self.chunk_features.append(xxhash.xxh32_intdigest(last_chunk))
103-
last_chunk = chunk
104-
105-
# Buffer the last chunk as it may be incomplete
106-
self.tail = last_chunk if last_chunk is not None else None
94+
if prev_chunk is not None: # Process only if we’ve seen a prior chunk
95+
self.chunk_sizes.append(len(prev_chunk))
96+
self.chunk_features.append(xxhash.xxh32_intdigest(prev_chunk))
97+
prev_chunk = chunk
98+
# Handle the case where no chunks were yielded (empty input)
99+
self.tail = prev_chunk if prev_chunk is not None else b""
107100

108101
def digest(self):
109102
# type: () -> bytes
@@ -131,8 +124,12 @@ def code(self, bits=ic.core_opts.data_bits):
131124

132125
def _finalize(self):
133126
if self.tail is not None:
134-
self.chunk_features.append(xxhash.xxh32_intdigest(self.tail))
135-
self.chunk_sizes.append(len(self.tail))
127+
if self.tail: # Append non-empty tail
128+
self.chunk_features.append(xxhash.xxh32_intdigest(self.tail))
129+
self.chunk_sizes.append(len(self.tail))
130+
elif not self.chunk_features: # Empty input case: ensure at least one feature
131+
self.chunk_features.append(xxhash.xxh32_intdigest(b""))
132+
self.chunk_sizes.append(0)
136133
self.tail = None
137134

138135

iscc_core/iscc_id.py

+5
Original file line numberDiff line numberDiff line change
@@ -173,3 +173,8 @@ def alg_simhash_from_iscc_id(iscc_id, wallet):
173173
iscc_id_xor_digest = iscc_tuple[4][:8]
174174
iscc_id_digest = bytes(a ^ b for (a, b) in zip(iscc_id_xor_digest, wallet_hash_digest))
175175
return iscc_id_digest.hex()
176+
177+
178+
####################################################################################################
179+
# ISCC-IDv1 - New Timestamp/Server-ID based ISCC-ID #
180+
####################################################################################################

0 commit comments

Comments
 (0)