@@ -4,19 +4,107 @@ module Slice = Byte_slice
4
4
module LEB128 = Imandrakit_leb128. Decode
5
5
6
6
type cached = ..
7
- type cached + = Miss
8
7
9
8
module type CACHE_KEY = sig
10
9
type elt
11
10
type cached + = C of elt
12
11
end
13
12
14
- type t = {
15
- sl : slice ;
16
- cache : cached array ;
17
- mutable hmap : Hmap .t ;
13
+ type t =
14
+ | Decode : {
15
+ st : 'st ;
16
+ ops : 'st ops ;
17
+ cache : cached Int_tbl .t ;
18
+ mutable hmap : Hmap .t ;
19
+ }
20
+ -> t
21
+
22
+ and 'st ops = {
23
+ length : 'st -> int ;
24
+ read_char : 'st -> int -> char ;
25
+ read_int32 : 'st -> int -> int32 ;
26
+ read_int64 : 'st -> int -> int64 ;
27
+ read_blob : 'st -> int -> int -> slice ;
28
+ read_leb128 : 'st -> int -> int64 * offset ;
18
29
}
19
30
31
+ let st_len t =
32
+ let (Decode { st; ops; _ }) = t in
33
+ ops.length st
34
+
35
+ let read_char t offset =
36
+ let (Decode { st; ops; _ }) = t in
37
+ ops.read_char st offset
38
+
39
+ let read_int32 t offset =
40
+ let (Decode { st; ops; _ }) = t in
41
+ ops.read_int32 st offset
42
+
43
+ let read_int64 t offset =
44
+ let (Decode { st; ops; _ }) = t in
45
+ ops.read_int64 st offset
46
+
47
+ let read_blob t offset length =
48
+ let (Decode { st; ops; _ }) = t in
49
+ ops.read_blob st offset length
50
+
51
+ let read_leb128 t offset =
52
+ let (Decode { st; ops; _ }) = t in
53
+ ops.read_leb128 st offset
54
+
55
+ let slice_ops : slice ops =
56
+ {
57
+ length = (fun (st : slice ) -> Slice. len st);
58
+ read_char = (fun (st : slice ) (offset : int ) -> Slice. get st offset);
59
+ read_int32 =
60
+ (fun (st : slice ) (offset : int ) ->
61
+ Bytes. get_int32_le st.bs (st.off + offset));
62
+ read_int64 =
63
+ (fun (st : slice ) (offset : int ) ->
64
+ Bytes. get_int64_le st.bs (st.off + offset));
65
+ read_blob =
66
+ (fun (st : slice ) (offset : int ) (length : int ) ->
67
+ Slice. sub st offset (min length (Slice. len st - st.off - offset)));
68
+ read_leb128 = (fun (st : slice ) (offset : int ) -> LEB128. u64 st offset);
69
+ }
70
+
71
+ let in_channel_ops : in_channel ops =
72
+ {
73
+ length = (fun (st : in_channel ) -> Int64. to_int (In_channel. length st));
74
+ read_char =
75
+ (fun (st : in_channel ) (offset : int ) ->
76
+ In_channel. seek st (Int64. of_int offset);
77
+ Option. get (In_channel. input_char st));
78
+ read_int32 =
79
+ (fun (st : in_channel ) (offset : int ) ->
80
+ let bs = Bytes. create 4 in
81
+ In_channel. seek st (Int64. of_int offset);
82
+ let _ = In_channel. really_input st bs 0 4 in
83
+ Bytes. get_int32_le bs 0 );
84
+ read_int64 =
85
+ (fun (st : in_channel ) (offset : int ) ->
86
+ let bs = Bytes. create 8 in
87
+ In_channel. seek st (Int64. of_int offset);
88
+ let _ = In_channel. really_input st bs 0 8 in
89
+ Bytes. get_int64_le bs 0 );
90
+ read_blob =
91
+ (fun (st : in_channel ) (offset : int ) (length : int ) ->
92
+ let remaining = Int64. to_int (In_channel. length st) - offset in
93
+ let length = min length remaining in
94
+ let bs = Bytes. create length in
95
+ In_channel. seek st (Int64. of_int offset);
96
+ let _ = In_channel. really_input st bs 0 length in
97
+ Slice. create bs);
98
+ read_leb128 =
99
+ (fun (st : in_channel ) (offset : int ) ->
100
+ let remaining = Int64. to_int (In_channel. length st) - offset in
101
+ let length = min 16 remaining in
102
+ let bs = Bytes. create length in
103
+ In_channel. seek st (Int64. of_int offset);
104
+ let _ = In_channel. really_input st bs 0 length in
105
+ LEB128. u64 (Slice. create bs) 0 );
106
+ }
107
+
20
108
type cstor_index = int [@@ deriving show ]
21
109
22
110
type cursor = {
@@ -30,16 +118,26 @@ let show_cursor (self : cursor) =
30
118
31
119
let pp_cursor = Fmt. of_to_string show_cursor
32
120
33
- let [@ inline] create sl : t =
34
- { sl; cache = Array. make sl.len Miss ; hmap = Hmap. empty }
121
+ let [@ inline] create (st : 'st ) (ops : 'st ops ) : t =
122
+ Decode { st; ops; cache = Int_tbl. create 32 ; hmap = Hmap. empty }
123
+
124
+ let [@ inline] of_slice s : t = create s slice_ops
125
+ let [@ inline] of_string s : t = create (Slice. of_string s) slice_ops
126
+ let [@ inline] of_in_channel c : t = create c in_channel_ops
127
+
128
+ let [@ inline] hmap_set self k v =
129
+ match self with
130
+ | Decode d -> d.hmap < - Hmap. add k v d.hmap
35
131
36
- let [@ inline] of_string s = create @@ Slice. of_string s
37
- let [ @ inline] hmap_set self k v = self.hmap < - Hmap. add k v self.hmap
38
- let [ @ inline] hmap_get self k = Hmap. find k self .hmap
132
+ let [@ inline] hmap_get self k =
133
+ match self with
134
+ | Decode d -> Hmap. find k d .hmap
39
135
40
136
let hmap_transfer d1 ~into :d2 : unit =
41
- d2.hmap < -
42
- Hmap. fold (fun (Hmap. B (k , v )) h2 -> Hmap. add k v h2) d1.hmap d2.hmap
137
+ match d1, d2 with
138
+ | Decode d1 , Decode d2 ->
139
+ d2.hmap < -
140
+ Hmap. fold (fun (Hmap. B (k , v )) h2 -> Hmap. add k v h2) d1.hmap d2.hmap
43
141
44
142
type 'a decoder = t -> offset -> 'a
45
143
type num_bytes_consumed = int
@@ -77,7 +175,7 @@ let invalid_first_byte_ msg ~offset ~high ~low =
77
175
failf " Decode: invalid first byte %d/%d at %d: %s" high low offset msg
78
176
79
177
let [@ inline] get_char_ (self : t ) (offset : offset ) : int =
80
- Char. code ( Slice. get self.sl offset)
178
+ Char. code @@ read_char self offset
81
179
82
180
let [@ inline] get_high (c : int ) : int = (c land 0b1111_0000 ) lsr 4
83
181
let [@ inline] get_low (c : int ) : int = c land 0b0000_1111
@@ -87,7 +185,7 @@ let[@inline] get_int64_ self offset ~low : int64 * num_bytes_consumed =
87
185
if low < 15 then
88
186
Int64. of_int low, 0
89
187
else (
90
- let i, n = LEB128. u64 self.sl (offset + 1 ) in
188
+ let i, n = read_leb128 self (offset + 1 ) in
91
189
Int64. add i 15L , n
92
190
)
93
191
@@ -106,13 +204,9 @@ let get_special_ offset ~high ~low : Value.t =
106
204
let [@ inline] get_float_ (self : t ) offset ~low : float * num_bytes_consumed =
107
205
let isf32 = low = 0 in
108
206
if isf32 then
109
- ( Bytes. get_int32_le self.sl.bs (self.sl.off + offset + 1 )
110
- |> Int32. float_of_bits,
111
- 4 )
207
+ read_int32 self (offset + 1 ) |> Int32. float_of_bits, 4
112
208
else
113
- ( Bytes. get_int64_le self.sl.bs (self.sl.off + offset + 1 )
114
- |> Int64. float_of_bits,
115
- 8 )
209
+ read_int64 self (offset + 1 ) |> Int64. float_of_bits, 8
116
210
117
211
(* * Number of bytes to skip *)
118
212
let skip_float_ ~low : int =
@@ -173,11 +267,11 @@ let read ?(auto_deref = true) (self : t) (offset : offset) : Value.t =
173
267
Value. Float f
174
268
| 4 ->
175
269
let len, size_len = get_int_truncate_ self offset ~low in
176
- let s = Slice. sub self.sl (offset + 1 + size_len) len in
270
+ let s = read_blob self (offset + 1 + size_len) len in
177
271
Value. String s
178
272
| 5 ->
179
273
let len, size_len = get_int_truncate_ self offset ~low in
180
- let s = Slice. sub self.sl (offset + 1 + size_len) len in
274
+ let s = read_blob self (offset + 1 + size_len) len in
181
275
Value. Blob s
182
276
| 6 ->
183
277
let len, size_len = get_int_truncate_ self offset ~low in
@@ -208,7 +302,8 @@ let read ?(auto_deref = true) (self : t) (offset : offset) : Value.t =
208
302
let idx_cstor, size_idx_cstor = get_int_truncate_ self offset ~low in
209
303
let offset_after_n = offset + 1 + size_idx_cstor in
210
304
let num_items, size_num_items =
211
- LEB128. uint_truncate self.sl offset_after_n
305
+ let sl = read_blob self offset_after_n 16 in
306
+ LEB128. uint_truncate sl 0
212
307
in
213
308
let c : cursor =
214
309
{
@@ -314,7 +409,7 @@ let string_slice self offset =
314
409
let low = get_low c in
315
410
if high <> 4 then fail_decode_type_ ~what: " string" offset;
316
411
let len, size_len = get_int_truncate_ self offset ~low in
317
- Slice. sub self.sl (offset + 1 + size_len) len
412
+ read_blob self (offset + 1 + size_len) len
318
413
319
414
let string self offset = Slice. contents @@ string_slice self offset
320
415
@@ -325,7 +420,7 @@ let blob_slice self offset =
325
420
let low = get_low c in
326
421
if high <> 5 then fail_decode_type_ ~what: " blob" offset;
327
422
let len, size_len = get_int_truncate_ self offset ~low in
328
- Slice. sub self.sl (offset + 1 + size_len) len
423
+ read_blob self (offset + 1 + size_len) len
329
424
330
425
let blob self offset = Slice. contents @@ blob_slice self offset
331
426
@@ -380,7 +475,8 @@ let cstor self offset =
380
475
let idx_cstor, size_idx_cstor = get_int_truncate_ self offset ~low in
381
476
let offset_after_n = offset + 1 + size_idx_cstor in
382
477
let num_items, size_num_items =
383
- LEB128. uint_truncate self.sl offset_after_n
478
+ let sl = read_blob self offset_after_n 16 in
479
+ LEB128. uint_truncate sl 0
384
480
in
385
481
let c : cursor =
386
482
{
@@ -405,15 +501,16 @@ let[@inline] ref_for (self : t) offset : _ offset_for =
405
501
Offset_for (ref_ self offset)
406
502
407
503
let get_entrypoint (self : t ) : offset =
408
- assert (Slice. len self.sl > 0 );
409
- let last = Slice. len self.sl - 1 in
410
- let offset = Char. code @@ Slice. get self.sl last in
504
+ let len = st_len self in
505
+ assert (len > 0 );
506
+ let last = len - 1 in
507
+ let offset = Char. code @@ read_char self last in
411
508
last - offset - 1
412
509
413
510
let read_entrypoint (self : t ) : Value.t =
414
511
read self @@ deref_rec self @@ get_entrypoint self
415
512
416
- let decode_string ?(init = ignore) (d : _ decoder ) (s : string ) =
513
+ let decode_string ?(init = ignore) (d : 'a decoder ) (s : string ) : 'a =
417
514
let self = of_string s in
418
515
init self;
419
516
let off = deref_rec self @@ get_entrypoint self in
@@ -544,12 +641,13 @@ let with_cache (type a) (key : a cache_key) (dec : a decoder) : a decoder =
544
641
dec st off
545
642
else (
546
643
(* go through the cache *)
547
- match st.cache.(off) with
548
- | K. C v -> v
549
- | Miss ->
644
+ let ( Decode { cache; _ }) = st in
645
+ match Int_tbl. find cache off with
646
+ | exception Not_found ->
550
647
let v = dec st off in
551
- st. cache.( off) < - K. C v;
648
+ Int_tbl. add cache off ( K. C v) ;
552
649
v
650
+ | K. C v -> v
553
651
| _ -> (* weird collision, just don't cache… *) dec st off
554
652
)
555
653
0 commit comments