FEAT: enhanced BINCODE dialect with new features:

Oldes · Oldes · commit af1be5b3483b · 2019-02-25T22:52:21.000+01:00
* added /with refinement to provide additional value for reading single value like:
  `binary/read/with b 'BYTES 42` which returns just the binary and not a block with binary.
* it's now possible to use zero bits without range error: `binary/read b [UB 0]` returns 0.
* new read command `FIXED8` (16-bit 8.8 fixed-point number)
* new read command `FIXED16` (32-bit 16.16 fixed-point number)
* new read commands `TUPLE3` and `TUPLE4` (returns tuple type value from 3 or 4 bytes)
* new read command `SkipBits` (allows to skip given number of bits without reading anything)
* new read command `ALIGN` (aligns bit stream to byte boundary)
* new read commands `FLOAT16`, `FLOAT` and `DOUBLE` (16bit, 32bit and 64bit decimal value)
* new read command `SI16LE` (16bit signed integer using little-endian byte order)
* new read command `FB` (Signed, fixed-point bit value)
* fixed read command `SB`
diff --git a/src/core/u-bincode.c b/src/core/u-bincode.c
@@ -73,6 +73,41 @@
 #include <stdio.h>
 #include <time.h>
 
+// FLOAT16 credits: Steven Pigeon
+// https://hbfs.wordpress.com/2013/02/12/float16/
+typedef union {
+	// float16 v;
+	struct {
+		// type determines alignment!
+		u16 m : 10;
+		u16 e : 5;
+		u16 s : 1;
+	} bits;
+	struct {
+		u8 low;
+		u8 high;
+	} bytes;
+} float16_s;
+
+typedef union {
+	float v;
+	struct {
+		u32 m : 23;
+		u32 e : 8;
+		u32 s : 1;
+	} bits;
+} float32_s;
+
+float float16to32(float16_s f16) {
+	// back to 32
+	float32_s f32;
+	f32.bits.s = f16.bits.s;
+	f32.bits.e = (f16.bits.e - 15) + 127; // safe in this direction
+	f32.bits.m = ((u32)f16.bits.m) << 13;
+	return f32.v;
+}
+
+
 #define ASSERT_SI_RANGE(v, n) if (VAL_INT64(v) < (- (i64)n) || VAL_INT64(v) > (i64)n) Trap1(RE_OUT_OF_RANGE, v);
 #define ASSERT_UI_RANGE(v, n) if (VAL_INT32(v) > n) Trap1(RE_OUT_OF_RANGE, v);
 #define ASSERT_UIBYTES_RANGE(v, n) if (VAL_LEN(v) > n) Trap1(RE_OUT_OF_RANGE, v);
@@ -141,6 +176,8 @@ static REBCNT EncodedU32_Size(u32 value) {
 //			code [word! block!]   "Input encoding"
 //		/into    "Put READ results in out block, instead of creating a new block"
 //			out  [block!] "Target block for results, when /into is used"
+//		/with    "Additional input argument"
+//			num  [integer!] "Bits/bytes number used with WORD! code type to resolve just single value"
 //	]
 ***********************************************************************/
 {
@@ -153,6 +190,8 @@ static REBCNT EncodedU32_Size(u32 value) {
 	REBVAL *val_read  = D_ARG(7);
 	REBOOL  ref_into  = D_REF(8);
 	REBVAL *val_into  = D_ARG(9);
+	REBOOL  ref_with  = D_REF(10);
+	REBVAL *val_num   = D_ARG(11);
 
     REBVAL *ret = D_RET;
 	//REBVAL *buf;
@@ -766,6 +805,7 @@ static REBCNT EncodedU32_Size(u32 value) {
 			// if encoding is just a word, simulate block with single value on stack
 			DS_PUSH(val_read);
 			value = DS_TOP;
+			if (IS_INTEGER(val_num)) DS_PUSH(val_num);
 			DS_PUSH_END; // marks end of the block
 			if(ref_into) {
 				blk = VAL_SERIES(val_into);
@@ -778,12 +818,14 @@ static REBCNT EncodedU32_Size(u32 value) {
 		DS_PUSH_NONE;
 		temp = DS_TOP;
 		REBINT ssp = DSP;  // starting stack pointer
+		REBINT cmd;
 		
 		for (; NOT_END(value); value++) {
 			n = 0;
 			switch (VAL_TYPE(value)) {
 				case REB_WORD:
-					switch (VAL_WORD_CANON(value)) {
+					cmd = VAL_WORD_CANON(value);
+					switch (cmd) {
 						case SYM_UI8:
 							n = 1;
 							ASSERT_READ_SIZE(value, cp, ep, n);
@@ -810,6 +852,12 @@ static REBCNT EncodedU32_Size(u32 value) {
 							VAL_SET(temp, REB_INTEGER);
 							SET_INT32(temp, (i16)((i16)cp[1] + ((i16)cp[0] << 8)));
 							break;
+						case SYM_SI16LE:
+							n = 2;
+							ASSERT_READ_SIZE(value, cp, ep, n);
+							VAL_SET(temp, REB_INTEGER);
+							SET_INT32(temp, (i16)((i16)cp[0] + ((i16)cp[1] << 8)));
+							break;
 						case SYM_UI24:
 						case SYM_UI24BE:
 							n = 3;
@@ -967,48 +1015,55 @@ static REBCNT EncodedU32_Size(u32 value) {
 							if (IS_GET_WORD(next)) next = Get_Var(next);
 							if (!IS_INTEGER(next)) Trap1(RE_INVALID_SPEC, value);
 							i = 0;
-							if (inBit == 0) inBit = 0x80;
 							// could be optimized?
 							nbits = VAL_INT32(next);
 							//printf("bits: %i %i\n", nbits, 1 << nbits);
-							nbits = 1 << nbits;
-							ASSERT_READ_SIZE(value, cp, ep, 1);
-							while(nbits > 1) {
-								nbits = nbits >> 1;
-								if(IS_BIT_SET(cp[0], inBit)) i = i | nbits;
-								//printf("?? %i %i\n", inBit, i);
-								NEXT_IN_BIT(inBit);
-								//printf("inBit: %i\n", inBit);
+							if (nbits > 0) {
+								if (inBit == 0) inBit = 0x80;
+								nbits = 1 << nbits;
+								ASSERT_READ_SIZE(value, cp, ep, 1);
+								while(nbits > 1) {
+									nbits = nbits >> 1;
+									if(IS_BIT_SET(cp[0], inBit)) i = i | nbits;
+									//printf("?? %i %i\n", inBit, i);
+									NEXT_IN_BIT(inBit);
+									//printf("inBit: %i\n", inBit);
+								}
+								STORE_IN_BIT(val_ctx, inBit);
 							}
 							VAL_SET(temp, REB_INTEGER);
 							SET_INT32(temp, i);
-							STORE_IN_BIT(val_ctx, inBit);
 							break;
 						case SYM_SB:
+						case SYM_FB:
 							next = ++value;
 							if (IS_GET_WORD(next)) next = Get_Var(next);
 							if (!IS_INTEGER(next)) Trap1(RE_INVALID_SPEC, value);
-							i = 0;
-							if (inBit == 0) inBit = 0x80;
+							u = 0;
 							// could be optimized?
 							nbits = VAL_INT32(next);
-							nbits = 1 << nbits;
 							if (nbits > 0) {
-								//printf("nbits: %i\n", nbits);
-								ASSERT_READ_SIZE(value, cp, ep, 1);
-								BOOL negative = IS_BIT_SET(cp[0], inBit);
-								nbits = nbits >> 1;
-								NEXT_IN_BIT(inBit);
-								while (nbits > 1) {
-									nbits = nbits >> 1;
-									if (IS_BIT_SET(cp[0], inBit)) i = i | nbits;
-									//printf("?? %i %i\n", inBit, i);
-									NEXT_IN_BIT(inBit);
-								}
-								if(negative) i = -i;
+								if (inBit == 0) inBit = 0x80;
+								// http://graphics.stanford.edu/~seander/bithacks.html#VariableSignExtend
+								u64 m = 1U << (nbits - 1); // sign bit mask
+								nbits = 1 << nbits;
+								//if (nbits > 0) {
+									//printf("SB nbits: %i\n", nbits);
+									while (nbits > 1) {
+										nbits >>= 1;
+										if (IS_BIT_SET(cp[0], inBit)) u = u | nbits;
+										//printf("?? %i %i %u\n", nbits, inBit, u);
+										NEXT_IN_BIT(inBit);
+									}
+									u = (u ^ m) - m;
+								//}
+								STORE_IN_BIT(val_ctx, inBit);
+							}
+							if (cmd == SYM_SB) {
+								SET_INTEGER(temp, u);
+							} else {
+								SET_DECIMAL(temp, (double)u / 65536.0);
 							}
-							VAL_SET(temp, REB_INTEGER);
-							SET_INT32(temp, i);
 							break;
 						case SYM_BIT:
 						case SYM_NOT_BIT:
@@ -1027,7 +1082,8 @@ static REBCNT EncodedU32_Size(u32 value) {
 							break;
 						case SYM_ALIGN:
 							// aligns bit buffer to byte boundary
-							if (inBit > 0) {
+							//if (inBit == 128) inBit = 0;
+							if (inBit > 0 && inBit < 128) {
 								inBit = 0;
 								cp++;
 								VAL_INDEX(buffer_read)++;
@@ -1145,10 +1201,75 @@ static REBCNT EncodedU32_Size(u32 value) {
 							VAL_INDEX(buffer_read) = i; //TODO: range test
 							cp = BIN_DATA(bin) + VAL_INDEX(buffer_read);
 							continue;
+						case SYM_SKIPBITS:
+							next = ++value;
+							if (IS_GET_WORD(next)) next = Get_Var(next);
+							if (!IS_INTEGER(next)) Trap1(RE_INVALID_SPEC, value);
+							i = VAL_INT32(next);
+							if (i >= 8) {
+								i /= 8;
+								//printf("byte skip: %d\n", i);
+								ASSERT_READ_SIZE(value, cp, ep, i);
+								cp += i;
+								VAL_INDEX(buffer_read) += i;
+								i = VAL_INT32(next) - (i * 8);
+							}
+							if (inBit == 0) inBit = 0x80;
+							while (i > 0) {
+								i--;
+								//printf("inbit %d: %d %d\n",i, inBit, VAL_INDEX(buffer_read));
+								NEXT_IN_BIT(inBit);
+							}
+							continue;
 						case SYM_LENGTHQ:
 							VAL_SET(temp, REB_INTEGER);
 							SET_INT32(temp, VAL_TAIL(buffer_read) - VAL_INDEX(buffer_read));
 							break;
+						case SYM_TUPLE3:
+							n = 3;
+							goto readNTuple;
+						case SYM_TUPLE4:
+							n = 4;
+						readNTuple:
+							ASSERT_READ_SIZE(value, cp, ep, n);
+							Set_Tuple(temp, BIN_DATA(bin) + VAL_INDEX(buffer_read), n);
+							break;
+						case SYM_FLOAT16:
+							n = 2;
+							ASSERT_READ_SIZE(value, cp, ep, n);
+							float16_s f16;
+							f16.bytes.low  = cp[0];
+							f16.bytes.high = cp[1];
+							SET_DECIMAL(temp, float16to32(f16) );
+							break;
+						case SYM_FLOAT:
+							n = 4;
+							ASSERT_READ_SIZE(value, cp, ep, n);
+							SET_DECIMAL(temp, ((float*)cp)[0]);
+							break;
+						case SYM_DOUBLE:
+							n = 8;
+							ASSERT_READ_SIZE(value, cp, ep, n);
+							SET_DECIMAL(temp, ((double*)cp)[0]);
+							break;
+						case SYM_FIXED8:
+							n = 2;
+							ASSERT_READ_SIZE(value, cp, ep, n);
+							i = ((i32)cp[0] << 0)  |
+								((i32)cp[1] << 8)  ;
+							SET_DECIMAL(temp, (float)i / 256.0f);
+							break;
+						case SYM_FIXED16:
+							n = 4;
+							ASSERT_READ_SIZE(value, cp, ep, n);
+							i = ((i32)cp[0] << 0)  |
+								((i32)cp[1] << 8)  |
+								((i32)cp[2] << 16) |
+								((i32)cp[3] << 24) ;
+							VAL_SET(temp, REB_DECIMAL);
+							VAL_DECIMAL(temp) = ((float)i / 65536.0f);
+							break;
+							
 						default:
 							Trap1(RE_INVALID_SPEC, value);
 					}
@@ -1184,6 +1305,8 @@ static REBCNT EncodedU32_Size(u32 value) {
 			DS_DROP; // temp
 			DS_DROP; // END of the virtual block
 			DS_DROP; // value
+			if (IS_INTEGER(val_num)) DS_DROP;
+			//@@ could above be done better?
 		}
 
 		if(ref_into) *ret = *val_into;
diff --git a/src/tests/units/bincode-test.r3 b/src/tests/units/bincode-test.r3
@@ -133,6 +133,10 @@ is-protected-error?: func[code][
 		; results:
 		--assert [255 65535 16777215 4294967295] = binary/read b [UI8 UI16 UI24 UI32]
 
+	--test-- "BinCode - SI16LE, SI16BE"
+	     --assert 1 = binary/read #{0100} 'SI16LE
+	     --assert 1 = binary/read #{0001} 'SI16BE
+
 	b: binary 32
 	--test-- "BinCode - BYTES"
 	     --assert object? binary/write b [#{cafe}]
@@ -226,9 +230,31 @@ is-protected-error?: func[code][
 		--assert str = "test"
 		--assert   i = 42
 
-	--test-- "BinCode - bits (SB, UB, ALIGN)"
+	--test-- "BinCode - bits (SB, UB, FB, ALIGN)"
 		b: binary 2#{01011011 10110011 11111111}
-		--assert [2 -2 3 -3 255] = binary/read b [SB 3 SB 3 UB 2 SB 4 ALIGN UI8]
+		--assert [2 -2 3 -5 255] = binary/read b [SB 3 SB 3 UB 2 SB 4 ALIGN UI8]
+		--assert [-2 6] = binary/read 2#{1110 0110} [SB 4 SB 4]
+		--assert 14  = binary/read/with 2#{1110 0000} 'UB 4
+		--assert [2.5] = binary/read #{500000} [FB 19]
+
+	--test-- "BinCode - bits (variant using sigle value access)"
+		bin: binary #{438E9438}
+		--assert 1080 = binary/read/with bin 'SB 12
+		--assert binary/read bin 'BIT
+		--assert binary/read bin 'BIT
+		--assert 10 = binary/read/with bin 'UB 4
+		--assert not binary/read bin 'BIT
+		--assert binary/read bin 'BIT
+		--assert 1080 = binary/read/with bin 'SB 12
+		--assert 2.5  = binary/read/with #{500000} 'FB 19
+
+		bin: binary #{438E9438}
+		binary/read bin [a: SB 12 BIT BIT b: UB 4 BIT BIT c: SB 12]
+		--assert all [a = 1080 b = 10 c = 1080]
+
+	--test-- "BinCode - bits with zero skip"
+		; shuld not throw range error when bits number is 0
+		--assert [0 0 0] = binary/read #{00} [UI8 SB 0 UB 0]
 
 	--test-- "BinCode - EncodedU32"
 		b: binary/init none 16
@@ -263,10 +289,43 @@ is-protected-error?: func[code][
 			not any [f32/1 f32/2 f32/3 f32/4 f32/5 f32/6 f32/7 f32/15]
 		]
 
-===end-group===
+	--test-- "BinCode - FIXED8 and FIXED16 (read)"
+		binary/read #{800700800700} [
+			f8:  FIXED8
+			f16: FIXED16
+		]
+		--assert 7.5 = f8
+		--assert 7.5 = f16
+
+	--test-- "BinCode - TUPLE3 and TUPLE4 (read)"
+		binary/read #{01020304050607} [
+			rgb:  TUPLE3
+			rgba: TUPLE4
+		]
+		--assert 1.2.3   = rgb
+		--assert 4.5.6.7 = rgba
 
+	--test-- "BinCode - SKIPBITS"
+		--assert [2 3] = binary/read 2#{00000000 11000011} [
+			SKIPBITS 9 UB 2
+			SKIPBITS 3 UB 2
+		]
+
+	--test-- "BinCode - ALIGN"
+		--assert [0 1 2] = binary/read #{008002} [
+			UB 8 ALIGN ; align on byte boundary is noop
+			UB 1 ALIGN ; this align should move input pointer
+			UI8 ; and this read should return value 2
+		]
+
+	--test-- "BinCode - FLOAT16, FLOAT, DOUBLE (read)"
+		--assert 1.0 = binary/read #{003C} 'FLOAT16
+		--assert 1.0 = binary/read #{0000803F} 'FLOAT
+		--assert 1.0 = binary/read #{000000000000F03F} 'DOUBLE
+
+
+===end-group===
 
 
-probe 
 
 ~~~end-file~~~