|
| 1 | +# cython: language_level=3 |
| 2 | +# cython: overflowcheck=False |
| 3 | +# cython: cdivision=True |
| 4 | +import struct |
| 5 | + |
| 6 | +from numcodecs.abc import Codec |
| 7 | +from numcodecs.compat import ensure_contiguous_ndarray |
| 8 | + |
| 9 | +from libc.stdint cimport uint8_t, uint16_t, uint32_t |
| 10 | + |
| 11 | + |
| 12 | +cdef uint32_t _fletcher32(const uint8_t[::1] _data): |
| 13 | + # converted from |
| 14 | + # https://github.com/Unidata/netcdf-c/blob/main/plugins/H5checksum.c#L109 |
| 15 | + cdef: |
| 16 | + const uint8_t *data = &_data[0] |
| 17 | + size_t _len = _data.shape[0] |
| 18 | + size_t len = _len / 2 |
| 19 | + size_t tlen |
| 20 | + uint32_t sum1 = 0, sum2 = 0; |
| 21 | + |
| 22 | + |
| 23 | + while len: |
| 24 | + tlen = 360 if len > 360 else len |
| 25 | + len -= tlen |
| 26 | + while True: |
| 27 | + sum1 += <uint32_t>((<uint16_t>data[0]) << 8) | (<uint16_t>data[1]) |
| 28 | + data += 2 |
| 29 | + sum2 += sum1 |
| 30 | + tlen -= 1 |
| 31 | + if tlen < 1: |
| 32 | + break |
| 33 | + sum1 = (sum1 & 0xffff) + (sum1 >> 16) |
| 34 | + sum2 = (sum2 & 0xffff) + (sum2 >> 16) |
| 35 | + |
| 36 | + if _len % 2: |
| 37 | + sum1 += <uint32_t>((<uint16_t>(data[0])) << 8) |
| 38 | + sum2 += sum1 |
| 39 | + sum1 = (sum1 & 0xffff) + (sum1 >> 16) |
| 40 | + sum2 = (sum2 & 0xffff) + (sum2 >> 16) |
| 41 | + |
| 42 | + sum1 = (sum1 & 0xffff) + (sum1 >> 16) |
| 43 | + sum2 = (sum2 & 0xffff) + (sum2 >> 16) |
| 44 | + |
| 45 | + return (sum2 << 16) | sum1 |
| 46 | + |
| 47 | + |
| 48 | +class Fletcher32(Codec): |
| 49 | + """The fletcher checksum with 16-bit words and 32-bit output |
| 50 | +
|
| 51 | + This is the netCDF4/HED5 implementation, which is not equivalent |
| 52 | + to the one in wikipedia |
| 53 | + https://github.com/Unidata/netcdf-c/blob/main/plugins/H5checksum.c#L95 |
| 54 | +
|
| 55 | + With this codec, the checksum is concatenated on the end of the data |
| 56 | + bytes when encoded. At decode time, the checksum is performed on |
| 57 | + the data portion and compared with the four-byte checksum, raising |
| 58 | + RuntimeError if inconsistent. |
| 59 | + """ |
| 60 | + |
| 61 | + codec_id = "fletcher32" |
| 62 | + |
| 63 | + def encode(self, buf): |
| 64 | + """Return buffer plus 4-byte fletcher checksum""" |
| 65 | + buf = ensure_contiguous_ndarray(buf).ravel().view('uint8') |
| 66 | + cdef const uint8_t[::1] b_ptr = buf |
| 67 | + val = _fletcher32(b_ptr) |
| 68 | + return buf.tobytes() + struct.pack("<I", val) |
| 69 | + |
| 70 | + def decode(self, buf, out=None): |
| 71 | + """Check fletcher checksum, and return buffer without it""" |
| 72 | + b = ensure_contiguous_ndarray(buf).view('uint8') |
| 73 | + cdef const uint8_t[::1] b_ptr = b[:-4] |
| 74 | + val = _fletcher32(b_ptr) |
| 75 | + found = b[-4:].view("<u4")[0] |
| 76 | + if val != found: |
| 77 | + raise RuntimeError( |
| 78 | + f"The fletcher32 checksum of the data ({val}) did not" |
| 79 | + f" match the expected checksum ({found}).\n" |
| 80 | + "This could be a sign that the data has been corrupted." |
| 81 | + ) |
| 82 | + if out: |
| 83 | + out.view("uint8")[:] = b[:-4] |
| 84 | + return out |
| 85 | + return memoryview(b[:-4]) |
0 commit comments