Merge pull request #21 from lucianpls/one-three

Version 1.3
lucianpls · Feb 16, 2025 · b55d3da · b55d3da
2 parents 37647e6 + dc0fee4
commit b55d3da
Show file tree

Hide file tree

Showing 11 changed files with 345 additions and 196 deletions.
diff --git a/QB3lib/CMakeLists.txt b/QB3lib/CMakeLists.txt
@@ -49,7 +49,7 @@ target_sources(${PROJECT_NAME}
 )
 
 set_target_properties(${PROJECT_NAME} PROPERTIES
-    PUBLIC_HEADER "QB3.h;${CMAKE_CURRENT_BINARY_DIR}/libqb3_export.h"
+    PUBLIC_HEADER QB3.h
     DEBUG_POSTFIX "d"
     PREFIX ""
 )

diff --git a/QB3lib/QB3.h b/QB3lib/QB3.h
@@ -1,7 +1,7 @@
 /*
 Content: Public API for QB3 library
 
-Copyright 2021-2024 Esri
+Copyright 2021-2025 Esri
 Licensed under the Apache License, Version 2.0 (the "License");
 you may not use this file except in compliance with the License.
 You may obtain a copy of the License at
@@ -15,19 +15,22 @@ limitations under the License.
 Contributors:  Lucian Plesea
 */
 
-#pragma once
+#if !defined(QB3_H)
 // For size_t
 #include <stddef.h>
 // For uint64_t
 #include <stdint.h>
 
-// CMake will generate LIBQB3_EXPORT linkage as needed
-#include "libqb3_export.h"
+// Defined when building the library
+#if !defined(LIBQB3_EXPORT)
+#define LIBQB3_EXPORT
+#endif
 
 // Keep this close to plain C so it can have a C API
 #if defined(__cplusplus)
 extern "C" {
 #endif
+// Max number of bands supported by library <= 256
 #define QB3_MAXBANDS 16
 
 typedef struct encs * encsp; // encoder
@@ -46,21 +49,20 @@ enum qb3_mode {
     QB3M_BASE = 4,
     QB3M_BEST = 7,
 
-    // original z-curve
-    QB3M_BASE_Z = 0, // Base
-    QB3M_CF = 1, // With common factor
-    QB3M_RLE = 2, // BASE + RLE
-    QB3M_CF_RLE = 3, // BASE + CF + RLE
+    // Legacy z-curve
+    QB3M_BASE_Z = 0, // Legacy base
+    QB3M_CF = 1, //  + common factor
+    QB3M_RLE = 2, // + RLE
+    QB3M_CF_RLE = 3, // + CF + RLE
 
     // better, with Hilbert curve
-    QB3M_BASE_H = 4, // Hilbert
+    QB3M_BASE_H = 4, // Hilbert base
     QB3M_CF_H = 5, // Hilbert + CF
     QB3M_RLE_H = 6, // Hilbert + RLE
     QB3M_CF_RLE_H = 7, // Hilbert + CF + RLE
 
-    // Faster and only slightly worse than base in many cases
-    // Hilbert curve but no bit-step, no CF, no RLE
-    QB3M_FTL = 8, // Fastest, Hilbert
+    // Faster and only slightly worse than base
+    QB3M_FTL = 8, // Fastest, Hilbert base - step
     QB3M_END, // Marks the end of the settable modes
 
     QB3M_STORED = 255, // Raw bypass, can't be requested
@@ -72,7 +74,7 @@ enum qb3_error {
     QB3E_OK = 0,
     QB3E_EINV, // Invalid parameter
     QB3E_UNKN, // Unknown
-    QB3E_ERR,   // unspecified error
+    QB3E_ERR,  // unspecified error
     QB3E_LIBERR = 255 // internal QB3 error, should not happen
 };
 
@@ -107,8 +109,8 @@ LIBQB3_EXPORT size_t qb3_max_encoded_size(const encsp p);
 // If mode value is out of range, it returns the previous mode value of p
 LIBQB3_EXPORT qb3_mode qb3_set_encoder_mode(encsp p, qb3_mode mode);
 
-//// Generate raw qb3 stream, no headers
-//LIBQB3_EXPORT void qb3_set_encoder_raw(encsp p);
+// Set line to line stride, in dtype units, defaults to xsize * nbands
+LIBQB3_EXPORT void qb3_set_encoder_stride(encsp p, size_t stride);
 
 // Encode the source into destination buffer, which should be at least qb3_max_encoded_size
 // Source organization is expected to be y major, then x, then band (interleaved)
@@ -138,7 +140,7 @@ LIBQB3_EXPORT size_t qb3_decoded_size(const decsp p);
 
 LIBQB3_EXPORT qb3_dtype qb3_get_type(const decsp p);
 
-// Set line to line to line stride for decoder, defaults to line size
+// Set line to line stride, in dtype units, defaults to xsize * nbands
 LIBQB3_EXPORT void qb3_set_decoder_stride(decsp p, size_t stride);
 
 // Query settings, valid after qb3_read_info
@@ -159,3 +161,4 @@ LIBQB3_EXPORT bool qb3_get_coreband(const decsp p, size_t *cband);
 }
 
 #endif
+#endif
diff --git a/QB3lib/QB3common.h b/QB3lib/QB3common.h
@@ -1,7 +1,7 @@
 /*
 Content: QB3 parts used by both the encoder and the decoder
 
-Copyright 2020-2024 Esri
+Copyright 2020-2025 Esri
 Licensed under the Apache License, Version 2.0 (the "License");
 you may not use this file except in compliance with the License.
 You may obtain a copy of the License at
@@ -15,8 +15,9 @@ limitations under the License.
 Contributors:  Lucian Plesea
 */
 
-#pragma once
-
+// This file is only used during library build
+// Include the linkage file generated by CMake before QB3.h
+#include "libqb3_export.h"
 #include "QB3.h"
 #include "bitstream.h"
 #include <cinttypes>
@@ -34,6 +35,10 @@ constexpr auto TBLMASK(0xfffull);
 constexpr size_t B(4);
 constexpr size_t B2(B * B);
 
+#if QB3_MAXBANDS > 256
+#error QB3_MAXBANDS too large
+#endif
+
 #if defined(_WIN32)
 // blog2 of val, result is undefined for val == 0
 static size_t topbit(uint64_t val) {
@@ -84,6 +89,8 @@ struct encs {
     size_t xsize;
     size_t ysize;
     size_t nbands;
+    // Line to line stride in type units
+    size_t stride;
     // micro block scanning order
     uint64_t order;
     uint64_t quanta;
@@ -105,7 +112,7 @@ struct decs {
     size_t xsize;
     size_t ysize;
     size_t nbands;
-    // Line to line stride
+    // Line to line stride in type units
     size_t stride;
     // micro block scanning order
     uint64_t order;
@@ -126,6 +133,11 @@ struct decs {
 // in decode.cpp
 extern const int typesizes[8];
 
+// Could be a macro
+static size_t szof(qb3_dtype dt) {
+    return (dt > QB3_I64) ? 0 : typesizes[int(dt)];
+}
+
 // Encode integers as magnitude and sign, with bit 0 for sign.
 // This encoding has the top bits always zero, regardless of sign
 // To keep the range the same as two's complement, the magnitude of 
@@ -150,10 +162,10 @@ static size_t step(const T* const v, size_t rung) {
     // Accumulate flipped rung bits
     for (size_t i = 0; i < B2; i++)
         acc = (acc << 1) | (1 ^ (v[i] >> rung));
-    // pattern is now 0*1*, with at least one 1 set
-    // s is 1 if distribution is a down step, 0 otherwise
+    // Looking for 0*1*, with at least one bit set
+    // s is true if bit pattern is a step down
     bool s = ((acc & (acc + 1)) != 0);
-    return B2 + s - !s * setbits16(acc);
+    return B2 + (s ? 1 : -setbits16(acc));
 }
 
 // Two QB3 standard parsing order, encoded as a single 64bit value

diff --git a/QB3lib/QB3decode.cpp b/QB3lib/QB3decode.cpp
@@ -21,11 +21,11 @@ Contributors:  Lucian Plesea
 #include <cstring>
 #include <vector>
 
-// Main header
-// 4 sig
-// 2 xsize
-// 2 ysize
-// 1 nbands
+// Main QB3 file header
+// 4 signature
+// 2 xmax
+// 2 ymax
+// 1 bandmax
 // 1 data type
 // 1 mode
 constexpr size_t QB3_HDRSZ = 4 + 2 + 2 + 1 + 1 + 1;
@@ -35,23 +35,19 @@ void qb3_destroy_decoder(decsp p) {
 }
 
 size_t qb3_decoded_size(const decsp p) {
-    return p->xsize * p->ysize * p->nbands * typesizes[static_cast<int>(p->type)];
+    return p->xsize * p->ysize * p->nbands * szof(p->type);
 }
 
 qb3_dtype qb3_get_type(const decsp p) {
     return p->type;
 }
 
 qb3_mode qb3_get_mode(const decsp p) {
-    if (p->stage != 2)
-        return qb3_mode::QB3M_INVALID;
-    return p->mode;
+    return (2 == p->stage) ? p->mode : QB3M_INVALID;
 }
 
 uint64_t qb3_get_quanta(const decsp p) {
-    if (p->stage != 2)
-        return 0; // Error
-    return p->quanta;
+    return (2 == p->stage) ? p->quanta: 0;
 }
 
 uint64_t qb3_get_order(const decsp p) {

diff --git a/QB3lib/QB3decode.h b/QB3lib/QB3decode.h
@@ -1,5 +1,5 @@
 /*
-Content: QB3 decoding
+Content: core QB3 decoding
 
 Copyright 2020-2025 Esri
 Licensed under the Apache License, Version 2.0 (the "License");
@@ -136,16 +136,14 @@ static std::pair<size_t, uint64_t> qb3dsztbl(uint64_t val, size_t rung) {
 }
 
 // Decode a B2 sized group of QB3 values from s and acc
-// Accumulator should be valid and have at least 56 valid bits
-// For rung 0, it works with 17bits or more
-// For rung 1, it works with 47bits or more
+// At least 56 valid bits in accumulator
 // returns false on failure
 template<bool applystep = true, typename T>
 static bool gdecode(iBits& s, size_t rung, T* group, uint64_t acc, size_t abits) {
     assert(((rung > 1) && (abits <= 8))
         || ((rung == 1) && (abits <= 17)) // B2 + 1
         || ((rung == 0) && (abits <= 47))); // 3 * B2 - 1
-    if (0 == rung) { // single bits, direct decoding
+    if (0 == rung) { // single bits, immediate decoding
         if (0 != (acc & 1)) {
             abits += B2;
             for (size_t i = 0; i < B2; i++) {
@@ -177,7 +175,7 @@ static bool gdecode(iBits& s, size_t rung, T* group, uint64_t acc, size_t abits)
         else if (2 == rung) { // max symbol len is 4, there are at least 14 in the accumulator
             // Use inline constants as nibble tables
             // Faster than a double value table decode, but only in this specific code organization
-            // Cleaning it up, for example doing a peek at the start then looping 16 times, makes it slower
+            // Cleaning it up, for example doing a peek at the start then looping 16 times makes it slower
             // The masks and inline constants could be smaller for size, but that eliminates the
             // common expression, making it slower
             // pre-shift accumulator, top 2 bits are not needed
@@ -207,7 +205,7 @@ static bool gdecode(iBits& s, size_t rung, T* group, uint64_t acc, size_t abits)
             const auto m = (1ull << (rung + 2)) - 1;
             for (size_t i = 0; i < B2 / 2; i++) {
                 auto v = drg[acc & m];
-                group[i] = static_cast<T>(v & TBLMASK);
+                group[i] = T(v & TBLMASK);
                 abits += v >> 12;
                 acc >>= v >> 12;
             }
@@ -216,27 +214,42 @@ static bool gdecode(iBits& s, size_t rung, T* group, uint64_t acc, size_t abits)
             abits = 0;
             for (size_t i = B2 / 2; i < B2; i++) {
                 auto v = drg[acc & m];
-                group[i] = static_cast<T>(v & TBLMASK);
+                group[i] = T(v & TBLMASK);
                 abits += v >> 12;
                 acc >>= v >> 12;
             }
             s.advance(abits);
         }
-        else { // Last part of table decoding, rungs 6-7, four values per accumulator
+        else { // Last part of table decoding, rungs 6-7
             auto drg = DRG[rung];
             const auto m = (1ull << (rung + 2)) - 1;
-            for (size_t j = 0; j < B2; j += B2 / 4) {
-                for (size_t i = 0; i < B2 / 4; i++) {
-                    auto v = drg[acc & m];
-                    group[j + i] = static_cast<T>(v & TBLMASK);
-                    abits += v >> 12;
-                    acc >>= v >> 12;
-                }
-                s.advance(abits);
-                abits = 0;
-                if (j <= B2 / 2) // Skip the last peek
-                    acc = s.peek();
-            }
+            // Three total reads, 6 4 6
+            int i = 0;
+            do {
+                auto v = drg[acc & m];
+                group[i] = T(v & TBLMASK);
+                abits += v >> 12;
+                acc >>= v >> 12;
+            } while (++i < 6);
+            s.advance(abits);
+            acc = s.peek();
+            abits = 0;
+            do {
+                auto v = drg[acc & m];
+                group[i] = T(v & TBLMASK);
+                abits += v >> 12;
+                acc >>= v >> 12;
+            } while (++i < 10);
+            s.advance(abits);
+            acc = s.peek();
+            abits = 0;
+            do {
+                auto v = drg[acc & m];
+                group[i] = T(v & TBLMASK);
+                abits += v >> 12;
+                acc >>= v >> 12;
+            } while (++i < B2);
+            s.advance(abits);
         }
     }
     else { // computed decoding
@@ -250,15 +263,15 @@ static bool gdecode(iBits& s, size_t rung, T* group, uint64_t acc, size_t abits)
                 auto p = qb3dsz(acc, rung);
                 abits += p.first;
                 acc >>= p.first;
-                group[i] = static_cast<T>(p.second);
+                group[i] = T(p.second);
             }
             s.advance(abits);
         }
         else if (rung < 63) { // 64bit and rung in [32 - 62], can't reuse accumulator
             s.advance(abits);
             for (int i = 0; i < B2; i++) {
                 auto p = qb3dsz(s.peek(), rung);
-                group[i] = static_cast<T>(p.second);
+                group[i] = T(p.second);
                 s.advance(p.first);
             }
         }
@@ -267,18 +280,18 @@ static bool gdecode(iBits& s, size_t rung, T* group, uint64_t acc, size_t abits)
             for (int i = 0; i < B2; i++) {
                 auto p = qb3dsz(s.peek(), rung);
                 auto ovf = p.first & (p.first >> 6);
-                group[i] = static_cast<T>(p.second);
+                group[i] = T(p.second);
                 s.advance(p.first ^ ovf);
                 if (ovf) // The next to top bit got dropped, rare
-                    group[i] |= s.get() << 62;
+                    group[i] |= s.pull() << 62;
             }
         }
     }
     // template parameter to avoid a test when not needed
     if (applystep && (0 == (group[B2 - 1] >> rung))) {
         auto stepp = step(group, rung);
         if (stepp < B2)
-            group[stepp] ^= static_cast<T>(1ull << rung);
+            group[stepp] ^= T(1ull << rung);
     }
     return true;
 }