Skip to content

Commit 7a4a2e5

Browse files
author
moonshadow565
committed
advanced chunking
1 parent 5fb623d commit 7a4a2e5

12 files changed

+554
-60
lines changed

CMakeLists.txt

+8
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,14 @@ set(CMAKE_CXX_STANDARD_REQUIRED ON)
88
add_subdirectory(dep)
99

1010
add_library(rlib STATIC
11+
lib/rlib/ar.hpp
12+
lib/rlib/ar.cpp
13+
lib/rlib/ar/bnk.hpp
14+
lib/rlib/ar/bnk.cpp
15+
lib/rlib/ar/wad.hpp
16+
lib/rlib/ar/wad.cpp
17+
lib/rlib/ar/wpk.hpp
18+
lib/rlib/ar/wpk.cpp
1119
lib/rlib/common.hpp
1220
lib/rlib/common.cpp
1321
lib/rlib/iofile.cpp

lib/rlib/ar.cpp

+81
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,81 @@
1+
#include "ar.hpp"
2+
3+
#include "ar/bnk.hpp"
4+
#include "ar/wad.hpp"
5+
#include "ar/wpk.hpp"
6+
7+
using namespace rlib;
8+
using namespace rlib::ar;
9+
10+
auto ArSplit::operator()(IO const& io, offset_cb cb) const -> void {
11+
process(io, cb, 0, {.offset = 0, .size = io.size()});
12+
}
13+
14+
template <typename T>
15+
auto ArSplit::process_ar(IO const& io, offset_cb cb, Entry top_entry) const -> void {
16+
auto archive = T{};
17+
if (auto error = archive.read(io, top_entry.offset, top_entry.size)) rlib_error(error);
18+
19+
// ensure offsets are processed in order
20+
std::sort(archive.entries.begin(), archive.entries.end(), [](auto const& lhs, auto const& rhs) {
21+
if (lhs.offset < rhs.offset) return true;
22+
if (lhs.offset == rhs.offset && lhs.size > rhs.size) return true;
23+
return false;
24+
});
25+
26+
auto cur = top_entry.offset;
27+
for (auto entry : archive.entries) {
28+
// skip empty entries
29+
if (!entry.size) continue;
30+
31+
// skip duplicate or overlapping entries
32+
if (entry.offset < cur) {
33+
continue;
34+
}
35+
36+
// process any skipped data
37+
if (auto leftover = entry.offset - cur) {
38+
process(io, cb, -1, {.offset = cur, .size = leftover, .compressed = top_entry.compressed});
39+
}
40+
41+
// process current entry
42+
process(io,
43+
cb,
44+
T::can_nest && !no_nest && !entry.compressed ? 1 : -1,
45+
{
46+
.offset = entry.offset,
47+
.size = entry.size,
48+
.compressed = entry.compressed,
49+
});
50+
51+
// go to next entry
52+
cur = entry.offset + entry.size;
53+
}
54+
55+
// process any remaining data
56+
if (auto remain = (top_entry.offset + top_entry.size) - cur) {
57+
process(io, cb, -1, {.offset = cur, .size = remain, .compressed = top_entry.compressed});
58+
}
59+
}
60+
61+
auto ArSplit::process(IO const& io, offset_cb cb, int depth, Entry top_entry) const -> void {
62+
if (depth >= 0 && top_entry.size >= 64) {
63+
char buffer[8] = {};
64+
rlib_assert(io.read(top_entry.offset, buffer));
65+
if (!no_bnk && BNK::check_magic(buffer)) {
66+
return process_ar<BNK>(io, cb, top_entry);
67+
}
68+
if (!no_wad && depth < 1 && WAD::check_magic(buffer)) {
69+
return process_ar<WAD>(io, cb, top_entry);
70+
}
71+
if (!no_wpk && WPK::check_magic(buffer)) {
72+
return process_ar<WPK>(io, cb, top_entry);
73+
}
74+
}
75+
for (auto i = top_entry.offset, remain = top_entry.size; remain;) {
76+
auto size = std::min(chunk_size, remain);
77+
cb({.offset = i, .size = size, .compressed = top_entry.compressed});
78+
i += size;
79+
remain -= size;
80+
}
81+
}

lib/rlib/ar.hpp

+28
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,28 @@
1+
#pragma once
2+
#include <rlib/common.hpp>
3+
#include <rlib/iofile.hpp>
4+
5+
namespace rlib {
6+
struct ArSplit {
7+
struct Entry {
8+
std::size_t offset;
9+
std::size_t size;
10+
bool compressed;
11+
};
12+
using offset_cb = function_ref<void(Entry)>;
13+
14+
std::size_t chunk_size;
15+
bool no_bnk;
16+
bool no_wad;
17+
bool no_wpk;
18+
bool no_nest;
19+
20+
auto operator()(IO const& io, offset_cb cb) const -> void;
21+
22+
private:
23+
auto process(IO const& io, offset_cb cb, int depth, Entry top_entry) const -> void;
24+
25+
template <typename T>
26+
auto process_ar(IO const& io, offset_cb cb, Entry top_entry) const -> void;
27+
};
28+
}

lib/rlib/ar/bnk.cpp

+89
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,89 @@
1+
#include "bnk.hpp"
2+
3+
#include <map>
4+
5+
#define ar_assert(...) \
6+
do { \
7+
if (!(__VA_ARGS__)) return " BNK::read: " #__VA_ARGS__; \
8+
} while (false)
9+
10+
using namespace rlib;
11+
using namespace rlib::ar;
12+
13+
struct BNK::Entry::Raw {
14+
std::array<char, 4> type;
15+
std::uint32_t size;
16+
};
17+
18+
struct BNK::Entry::DIDX {
19+
std::uint32_t id;
20+
std::uint32_t offset;
21+
std::uint32_t size;
22+
};
23+
24+
auto BNK::check_magic(std::span<char const> data) noexcept -> bool {
25+
return data.size() >= 4 && std::memcmp(data.data(), "BKHD", 4) == 0;
26+
}
27+
28+
auto BNK::read(IO const& io, std::size_t offset, std::size_t size) -> char const* {
29+
using TYPE = std::array<char, 4>;
30+
static constexpr auto BKHD = TYPE{'B', 'K', 'H', 'D'};
31+
static constexpr auto DIDX = TYPE{'D', 'I', 'D', 'X'};
32+
static constexpr auto DATA = TYPE{'D', 'A', 'T', 'A'};
33+
34+
auto magic = TYPE{};
35+
ar_assert(size >= 8);
36+
io.read(offset, magic);
37+
ar_assert(magic == BKHD);
38+
39+
auto sections = std::map<TYPE, Entry>{};
40+
for (std::size_t i = offset; i != offset + size;) {
41+
Entry::Raw raw = {};
42+
ar_assert(size >= i);
43+
ar_assert(size - i >= sizeof(raw));
44+
io.read(i, {(char*)&raw, sizeof(raw)});
45+
46+
i += sizeof(Entry::Raw);
47+
ar_assert(size - i >= raw.size);
48+
49+
sections[raw.type] = Entry{.offset = i, .size = raw.size};
50+
51+
i += raw.size;
52+
}
53+
54+
entries.clear();
55+
entries.reserve(sections.size());
56+
57+
auto i_didx = sections.find(DIDX);
58+
auto i_data = sections.find(DATA);
59+
if (i_didx != sections.end() && i_data != sections.end()) {
60+
auto didx_base = i_didx->second;
61+
auto data_base = i_data->second;
62+
63+
ar_assert(didx_base.size % sizeof(Entry::DIDX) == 0);
64+
auto didx_list = std::vector<Entry::DIDX>(didx_base.size / sizeof(Entry::DIDX));
65+
io.read(didx_base.offset, {(char*)didx_list.data(), didx_base.size});
66+
67+
entries.reserve(sections.size() + didx_list.size());
68+
for (auto const& didx : didx_list) {
69+
ar_assert(data_base.size >= didx.offset);
70+
ar_assert(data_base.size - didx.offset >= didx.size);
71+
entries.push_back(Entry{
72+
.offset = data_base.offset + didx.offset,
73+
.size = didx.size,
74+
.compressed = true,
75+
});
76+
}
77+
78+
i_didx->second.size = 0;
79+
i_data->second.size = 0;
80+
}
81+
82+
for (auto [key, entry] : sections) {
83+
entry.offset -= sizeof(Entry::Raw);
84+
entry.size += sizeof(Entry::Raw);
85+
entries.push_back(entry);
86+
}
87+
88+
return nullptr;
89+
}

lib/rlib/ar/bnk.hpp

+21
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,21 @@
1+
#pragma once
2+
#include <rlib/common.hpp>
3+
#include <rlib/iofile.hpp>
4+
5+
namespace rlib::ar {
6+
struct BNK {
7+
static constexpr bool can_nest = false;
8+
9+
struct Entry {
10+
struct Raw;
11+
struct DIDX;
12+
std::size_t offset;
13+
std::size_t size;
14+
bool compressed;
15+
};
16+
std::vector<Entry> entries;
17+
18+
static auto check_magic(std::span<char const> data) noexcept -> bool;
19+
auto read(IO const& io, std::size_t offset, std::size_t size) -> char const*;
20+
};
21+
}

lib/rlib/ar/wad.cpp

+126
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,126 @@
1+
#include "wad.hpp"
2+
3+
using namespace rlib;
4+
using namespace rlib::ar;
5+
6+
#define ar_assert(...) \
7+
do { \
8+
if (!(__VA_ARGS__)) return " WAD::read: " #__VA_ARGS__; \
9+
} while (false)
10+
11+
struct WAD::Header {
12+
struct Base;
13+
struct V1;
14+
struct V2;
15+
struct V3;
16+
17+
std::size_t entry_size;
18+
std::size_t entry_count;
19+
std::size_t toc_start;
20+
std::size_t toc_size;
21+
};
22+
23+
struct WAD::Header::Base {
24+
std::array<char, 2> magic;
25+
std::uint8_t version[2];
26+
};
27+
28+
struct WAD::Header::V1 : Base {
29+
std::uint16_t toc_start;
30+
std::uint16_t entry_size;
31+
std::uint32_t entry_count;
32+
};
33+
34+
struct WAD::Header::V2 : Base {
35+
std::array<std::uint8_t, 84> signature;
36+
std::array<std::uint8_t, 8> checksum;
37+
std::uint16_t toc_start;
38+
std::uint16_t entry_size;
39+
std::uint32_t entry_count;
40+
};
41+
42+
struct WAD::Header::V3 : Base {
43+
std::uint8_t signature[256];
44+
std::array<std::uint8_t, 8> checksum;
45+
static constexpr std::uint16_t toc_start = 272;
46+
static constexpr std::uint16_t entry_size = 32;
47+
std::uint32_t entry_count;
48+
};
49+
50+
struct WAD::Entry::Raw {
51+
std::uint64_t path;
52+
std::uint32_t offset;
53+
std::uint32_t size_compressed;
54+
std::uint32_t size_uncompressed;
55+
std::uint8_t type : 4;
56+
std::uint8_t subchunks : 4;
57+
std::uint8_t pad[3];
58+
};
59+
60+
auto WAD::check_magic(std::span<char const> data) noexcept -> bool {
61+
return data.size() >= 4 && std::memcmp(data.data(), "RW", 2) == 0 && (uint8_t)data[2] <= 10;
62+
}
63+
64+
auto WAD::read(IO const& io, std::size_t offset, std::size_t size) -> char const* {
65+
static constexpr auto MAGIC = std::array{'R', 'W'};
66+
67+
Header::Base header_base = {};
68+
ar_assert(size >= sizeof(header_base));
69+
io.read(offset, {(char*)&header_base, sizeof(header_base)});
70+
ar_assert(header_base.magic == MAGIC);
71+
72+
Header header = {};
73+
switch (header_base.version[0]) {
74+
#define read_header($V) \
75+
do { \
76+
Header::V##$V v_header = {}; \
77+
ar_assert(size >= sizeof(header)); \
78+
io.read(offset, {(char*)&v_header, sizeof(v_header)}); \
79+
header.entry_size = v_header.entry_size; \
80+
header.entry_count = v_header.entry_count; \
81+
header.toc_start = v_header.toc_start; \
82+
header.toc_size = header.entry_size * header.entry_count; \
83+
} while (false)
84+
case 0:
85+
case 1:
86+
read_header(1);
87+
break;
88+
case 2:
89+
read_header(2);
90+
break;
91+
case 3:
92+
read_header(3);
93+
break;
94+
#undef read_header
95+
default:
96+
return "Unknown wad version";
97+
}
98+
ar_assert(size >= header.toc_start);
99+
ar_assert(size - header.toc_start >= header.toc_size);
100+
header.toc_start += offset;
101+
102+
entries.clear();
103+
entries.reserve(header.entry_count + 1);
104+
105+
entries.push_back(Entry{
106+
.offset = header.toc_start,
107+
.size = header.toc_size,
108+
.compressed = false,
109+
});
110+
for (std::size_t i = 0; i != header.entry_count; ++i) {
111+
auto raw_entry = Entry::Raw{};
112+
io.read(header.toc_start + i * header.entry_size, {(char*)&raw_entry, header.entry_size});
113+
114+
auto entry = Entry{
115+
.offset = offset + raw_entry.offset,
116+
.size = raw_entry.size_compressed,
117+
.compressed = raw_entry.type != 0,
118+
};
119+
ar_assert(entry.offset >= header.toc_start + header.toc_size);
120+
ar_assert(size >= entry.offset);
121+
ar_assert(size - entry.offset >= entry.size);
122+
entries.push_back(entry);
123+
}
124+
125+
return nullptr;
126+
}

lib/rlib/ar/wad.hpp

+21
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,21 @@
1+
#pragma once
2+
#include <rlib/common.hpp>
3+
#include <rlib/iofile.hpp>
4+
5+
namespace rlib::ar {
6+
struct WAD {
7+
static constexpr bool can_nest = true;
8+
9+
struct Header;
10+
struct Entry {
11+
struct Raw;
12+
std::size_t offset;
13+
std::size_t size;
14+
bool compressed;
15+
};
16+
std::vector<Entry> entries;
17+
18+
static auto check_magic(std::span<char const> data) noexcept -> bool;
19+
auto read(IO const& io, std::size_t offset, std::size_t size) -> char const*;
20+
};
21+
}

0 commit comments

Comments
 (0)