@@ -84,26 +84,19 @@ def __init__(self, data=None):
84
84
self .push (data )
85
85
86
86
def push (self , data ):
87
- # type: (ic.Data) -> None
88
- """Push data to the Data-Hash generator."""
89
87
if self .tail :
90
88
data = self .tail + data
91
- self .tail = None
92
-
93
89
chunks = ic .alg_cdc_chunks (
94
90
data , utf32 = False , avg_chunk_size = ic .core_opts .data_avg_chunk_size
95
91
)
96
-
97
- # Process chunks one at a time but keep track of last one
98
- last_chunk = None
92
+ prev_chunk = None
99
93
for chunk in chunks :
100
- if last_chunk is not None :
101
- self .chunk_sizes .append (len (last_chunk ))
102
- self .chunk_features .append (xxhash .xxh32_intdigest (last_chunk ))
103
- last_chunk = chunk
104
-
105
- # Buffer the last chunk as it may be incomplete
106
- self .tail = last_chunk if last_chunk is not None else None
94
+ if prev_chunk is not None : # Process only if we’ve seen a prior chunk
95
+ self .chunk_sizes .append (len (prev_chunk ))
96
+ self .chunk_features .append (xxhash .xxh32_intdigest (prev_chunk ))
97
+ prev_chunk = chunk
98
+ # Handle the case where no chunks were yielded (empty input)
99
+ self .tail = prev_chunk if prev_chunk is not None else b""
107
100
108
101
def digest (self ):
109
102
# type: () -> bytes
@@ -131,8 +124,12 @@ def code(self, bits=ic.core_opts.data_bits):
131
124
132
125
def _finalize (self ):
133
126
if self .tail is not None :
134
- self .chunk_features .append (xxhash .xxh32_intdigest (self .tail ))
135
- self .chunk_sizes .append (len (self .tail ))
127
+ if self .tail : # Append non-empty tail
128
+ self .chunk_features .append (xxhash .xxh32_intdigest (self .tail ))
129
+ self .chunk_sizes .append (len (self .tail ))
130
+ elif not self .chunk_features : # Empty input case: ensure at least one feature
131
+ self .chunk_features .append (xxhash .xxh32_intdigest (b"" ))
132
+ self .chunk_sizes .append (0 )
136
133
self .tail = None
137
134
138
135
0 commit comments