Skip to content

Commit

Permalink
Merge pull request #21 from lucianpls/one-three
Browse files Browse the repository at this point in the history
Version 1.3
  • Loading branch information
lucianpls authored Feb 16, 2025
2 parents 37647e6 + dc0fee4 commit b55d3da
Show file tree
Hide file tree
Showing 11 changed files with 345 additions and 196 deletions.
2 changes: 1 addition & 1 deletion QB3lib/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -49,7 +49,7 @@ target_sources(${PROJECT_NAME}
)

set_target_properties(${PROJECT_NAME} PROPERTIES
PUBLIC_HEADER "QB3.h;${CMAKE_CURRENT_BINARY_DIR}/libqb3_export.h"
PUBLIC_HEADER QB3.h
DEBUG_POSTFIX "d"
PREFIX ""
)
Expand Down
37 changes: 20 additions & 17 deletions QB3lib/QB3.h
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
/*
Content: Public API for QB3 library
Copyright 2021-2024 Esri
Copyright 2021-2025 Esri
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
Expand All @@ -15,19 +15,22 @@ limitations under the License.
Contributors: Lucian Plesea
*/

#pragma once
#if !defined(QB3_H)
// For size_t
#include <stddef.h>
// For uint64_t
#include <stdint.h>

// CMake will generate LIBQB3_EXPORT linkage as needed
#include "libqb3_export.h"
// Defined when building the library
#if !defined(LIBQB3_EXPORT)
#define LIBQB3_EXPORT
#endif

// Keep this close to plain C so it can have a C API
#if defined(__cplusplus)
extern "C" {
#endif
// Max number of bands supported by library <= 256
#define QB3_MAXBANDS 16

typedef struct encs * encsp; // encoder
Expand All @@ -46,21 +49,20 @@ enum qb3_mode {
QB3M_BASE = 4,
QB3M_BEST = 7,

// original z-curve
QB3M_BASE_Z = 0, // Base
QB3M_CF = 1, // With common factor
QB3M_RLE = 2, // BASE + RLE
QB3M_CF_RLE = 3, // BASE + CF + RLE
// Legacy z-curve
QB3M_BASE_Z = 0, // Legacy base
QB3M_CF = 1, // + common factor
QB3M_RLE = 2, // + RLE
QB3M_CF_RLE = 3, // + CF + RLE

// better, with Hilbert curve
QB3M_BASE_H = 4, // Hilbert
QB3M_BASE_H = 4, // Hilbert base
QB3M_CF_H = 5, // Hilbert + CF
QB3M_RLE_H = 6, // Hilbert + RLE
QB3M_CF_RLE_H = 7, // Hilbert + CF + RLE

// Faster and only slightly worse than base in many cases
// Hilbert curve but no bit-step, no CF, no RLE
QB3M_FTL = 8, // Fastest, Hilbert
// Faster and only slightly worse than base
QB3M_FTL = 8, // Fastest, Hilbert base - step
QB3M_END, // Marks the end of the settable modes

QB3M_STORED = 255, // Raw bypass, can't be requested
Expand All @@ -72,7 +74,7 @@ enum qb3_error {
QB3E_OK = 0,
QB3E_EINV, // Invalid parameter
QB3E_UNKN, // Unknown
QB3E_ERR, // unspecified error
QB3E_ERR, // unspecified error
QB3E_LIBERR = 255 // internal QB3 error, should not happen
};

Expand Down Expand Up @@ -107,8 +109,8 @@ LIBQB3_EXPORT size_t qb3_max_encoded_size(const encsp p);
// If mode value is out of range, it returns the previous mode value of p
LIBQB3_EXPORT qb3_mode qb3_set_encoder_mode(encsp p, qb3_mode mode);

//// Generate raw qb3 stream, no headers
//LIBQB3_EXPORT void qb3_set_encoder_raw(encsp p);
// Set line to line stride, in dtype units, defaults to xsize * nbands
LIBQB3_EXPORT void qb3_set_encoder_stride(encsp p, size_t stride);

// Encode the source into destination buffer, which should be at least qb3_max_encoded_size
// Source organization is expected to be y major, then x, then band (interleaved)
Expand Down Expand Up @@ -138,7 +140,7 @@ LIBQB3_EXPORT size_t qb3_decoded_size(const decsp p);

LIBQB3_EXPORT qb3_dtype qb3_get_type(const decsp p);

// Set line to line to line stride for decoder, defaults to line size
// Set line to line stride, in dtype units, defaults to xsize * nbands
LIBQB3_EXPORT void qb3_set_decoder_stride(decsp p, size_t stride);

// Query settings, valid after qb3_read_info
Expand All @@ -159,3 +161,4 @@ LIBQB3_EXPORT bool qb3_get_coreband(const decsp p, size_t *cband);
}

#endif
#endif
26 changes: 19 additions & 7 deletions QB3lib/QB3common.h
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
/*
Content: QB3 parts used by both the encoder and the decoder
Copyright 2020-2024 Esri
Copyright 2020-2025 Esri
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
Expand All @@ -15,8 +15,9 @@ limitations under the License.
Contributors: Lucian Plesea
*/

#pragma once

// This file is only used during library build
// Include the linkage file generated by CMake before QB3.h
#include "libqb3_export.h"
#include "QB3.h"
#include "bitstream.h"
#include <cinttypes>
Expand All @@ -34,6 +35,10 @@ constexpr auto TBLMASK(0xfffull);
constexpr size_t B(4);
constexpr size_t B2(B * B);

#if QB3_MAXBANDS > 256
#error QB3_MAXBANDS too large
#endif

#if defined(_WIN32)
// blog2 of val, result is undefined for val == 0
static size_t topbit(uint64_t val) {
Expand Down Expand Up @@ -84,6 +89,8 @@ struct encs {
size_t xsize;
size_t ysize;
size_t nbands;
// Line to line stride in type units
size_t stride;
// micro block scanning order
uint64_t order;
uint64_t quanta;
Expand All @@ -105,7 +112,7 @@ struct decs {
size_t xsize;
size_t ysize;
size_t nbands;
// Line to line stride
// Line to line stride in type units
size_t stride;
// micro block scanning order
uint64_t order;
Expand All @@ -126,6 +133,11 @@ struct decs {
// in decode.cpp
extern const int typesizes[8];

// Could be a macro
static size_t szof(qb3_dtype dt) {
return (dt > QB3_I64) ? 0 : typesizes[int(dt)];
}

// Encode integers as magnitude and sign, with bit 0 for sign.
// This encoding has the top bits always zero, regardless of sign
// To keep the range the same as two's complement, the magnitude of
Expand All @@ -150,10 +162,10 @@ static size_t step(const T* const v, size_t rung) {
// Accumulate flipped rung bits
for (size_t i = 0; i < B2; i++)
acc = (acc << 1) | (1 ^ (v[i] >> rung));
// pattern is now 0*1*, with at least one 1 set
// s is 1 if distribution is a down step, 0 otherwise
// Looking for 0*1*, with at least one bit set
// s is true if bit pattern is a step down
bool s = ((acc & (acc + 1)) != 0);
return B2 + s - !s * setbits16(acc);
return B2 + (s ? 1 : -setbits16(acc));
}

// Two QB3 standard parsing order, encoded as a single 64bit value
Expand Down
20 changes: 8 additions & 12 deletions QB3lib/QB3decode.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -21,11 +21,11 @@ Contributors: Lucian Plesea
#include <cstring>
#include <vector>

// Main header
// 4 sig
// 2 xsize
// 2 ysize
// 1 nbands
// Main QB3 file header
// 4 signature
// 2 xmax
// 2 ymax
// 1 bandmax
// 1 data type
// 1 mode
constexpr size_t QB3_HDRSZ = 4 + 2 + 2 + 1 + 1 + 1;
Expand All @@ -35,23 +35,19 @@ void qb3_destroy_decoder(decsp p) {
}

size_t qb3_decoded_size(const decsp p) {
return p->xsize * p->ysize * p->nbands * typesizes[static_cast<int>(p->type)];
return p->xsize * p->ysize * p->nbands * szof(p->type);
}

qb3_dtype qb3_get_type(const decsp p) {
return p->type;
}

qb3_mode qb3_get_mode(const decsp p) {
if (p->stage != 2)
return qb3_mode::QB3M_INVALID;
return p->mode;
return (2 == p->stage) ? p->mode : QB3M_INVALID;
}

uint64_t qb3_get_quanta(const decsp p) {
if (p->stage != 2)
return 0; // Error
return p->quanta;
return (2 == p->stage) ? p->quanta: 0;
}

uint64_t qb3_get_order(const decsp p) {
Expand Down
65 changes: 39 additions & 26 deletions QB3lib/QB3decode.h
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
/*
Content: QB3 decoding
Content: core QB3 decoding
Copyright 2020-2025 Esri
Licensed under the Apache License, Version 2.0 (the "License");
Expand Down Expand Up @@ -136,16 +136,14 @@ static std::pair<size_t, uint64_t> qb3dsztbl(uint64_t val, size_t rung) {
}

// Decode a B2 sized group of QB3 values from s and acc
// Accumulator should be valid and have at least 56 valid bits
// For rung 0, it works with 17bits or more
// For rung 1, it works with 47bits or more
// At least 56 valid bits in accumulator
// returns false on failure
template<bool applystep = true, typename T>
static bool gdecode(iBits& s, size_t rung, T* group, uint64_t acc, size_t abits) {
assert(((rung > 1) && (abits <= 8))
|| ((rung == 1) && (abits <= 17)) // B2 + 1
|| ((rung == 0) && (abits <= 47))); // 3 * B2 - 1
if (0 == rung) { // single bits, direct decoding
if (0 == rung) { // single bits, immediate decoding
if (0 != (acc & 1)) {
abits += B2;
for (size_t i = 0; i < B2; i++) {
Expand Down Expand Up @@ -177,7 +175,7 @@ static bool gdecode(iBits& s, size_t rung, T* group, uint64_t acc, size_t abits)
else if (2 == rung) { // max symbol len is 4, there are at least 14 in the accumulator
// Use inline constants as nibble tables
// Faster than a double value table decode, but only in this specific code organization
// Cleaning it up, for example doing a peek at the start then looping 16 times, makes it slower
// Cleaning it up, for example doing a peek at the start then looping 16 times makes it slower
// The masks and inline constants could be smaller for size, but that eliminates the
// common expression, making it slower
// pre-shift accumulator, top 2 bits are not needed
Expand Down Expand Up @@ -207,7 +205,7 @@ static bool gdecode(iBits& s, size_t rung, T* group, uint64_t acc, size_t abits)
const auto m = (1ull << (rung + 2)) - 1;
for (size_t i = 0; i < B2 / 2; i++) {
auto v = drg[acc & m];
group[i] = static_cast<T>(v & TBLMASK);
group[i] = T(v & TBLMASK);
abits += v >> 12;
acc >>= v >> 12;
}
Expand All @@ -216,27 +214,42 @@ static bool gdecode(iBits& s, size_t rung, T* group, uint64_t acc, size_t abits)
abits = 0;
for (size_t i = B2 / 2; i < B2; i++) {
auto v = drg[acc & m];
group[i] = static_cast<T>(v & TBLMASK);
group[i] = T(v & TBLMASK);
abits += v >> 12;
acc >>= v >> 12;
}
s.advance(abits);
}
else { // Last part of table decoding, rungs 6-7, four values per accumulator
else { // Last part of table decoding, rungs 6-7
auto drg = DRG[rung];
const auto m = (1ull << (rung + 2)) - 1;
for (size_t j = 0; j < B2; j += B2 / 4) {
for (size_t i = 0; i < B2 / 4; i++) {
auto v = drg[acc & m];
group[j + i] = static_cast<T>(v & TBLMASK);
abits += v >> 12;
acc >>= v >> 12;
}
s.advance(abits);
abits = 0;
if (j <= B2 / 2) // Skip the last peek
acc = s.peek();
}
// Three total reads, 6 4 6
int i = 0;
do {
auto v = drg[acc & m];
group[i] = T(v & TBLMASK);
abits += v >> 12;
acc >>= v >> 12;
} while (++i < 6);
s.advance(abits);
acc = s.peek();
abits = 0;
do {
auto v = drg[acc & m];
group[i] = T(v & TBLMASK);
abits += v >> 12;
acc >>= v >> 12;
} while (++i < 10);
s.advance(abits);
acc = s.peek();
abits = 0;
do {
auto v = drg[acc & m];
group[i] = T(v & TBLMASK);
abits += v >> 12;
acc >>= v >> 12;
} while (++i < B2);
s.advance(abits);
}
}
else { // computed decoding
Expand All @@ -250,15 +263,15 @@ static bool gdecode(iBits& s, size_t rung, T* group, uint64_t acc, size_t abits)
auto p = qb3dsz(acc, rung);
abits += p.first;
acc >>= p.first;
group[i] = static_cast<T>(p.second);
group[i] = T(p.second);
}
s.advance(abits);
}
else if (rung < 63) { // 64bit and rung in [32 - 62], can't reuse accumulator
s.advance(abits);
for (int i = 0; i < B2; i++) {
auto p = qb3dsz(s.peek(), rung);
group[i] = static_cast<T>(p.second);
group[i] = T(p.second);
s.advance(p.first);
}
}
Expand All @@ -267,18 +280,18 @@ static bool gdecode(iBits& s, size_t rung, T* group, uint64_t acc, size_t abits)
for (int i = 0; i < B2; i++) {
auto p = qb3dsz(s.peek(), rung);
auto ovf = p.first & (p.first >> 6);
group[i] = static_cast<T>(p.second);
group[i] = T(p.second);
s.advance(p.first ^ ovf);
if (ovf) // The next to top bit got dropped, rare
group[i] |= s.get() << 62;
group[i] |= s.pull() << 62;
}
}
}
// template parameter to avoid a test when not needed
if (applystep && (0 == (group[B2 - 1] >> rung))) {
auto stepp = step(group, rung);
if (stepp < B2)
group[stepp] ^= static_cast<T>(1ull << rung);
group[stepp] ^= T(1ull << rung);
}
return true;
}
Expand Down
Loading

0 comments on commit b55d3da

Please sign in to comment.