Skip to content

Commit ab148b2

Browse files
authored
[AVRO-4081][C++] Add big decimal support and update documentation (#3148)
1 parent 3621ef2 commit ab148b2

File tree

6 files changed

+40
-7
lines changed

6 files changed

+40
-7
lines changed

doc/content/en/docs/++version++/Specification/_index.md

+5-3
Original file line numberDiff line numberDiff line change
@@ -787,6 +787,8 @@ A logical type is always serialized using its underlying Avro type so that value
787787
Language implementations must ignore unknown logical types when reading, and should use the underlying Avro type. If a logical type is invalid, for example a decimal with scale greater than its precision, then implementations should ignore the logical type and use the underlying Avro type.
788788

789789
### Decimal
790+
791+
#### Fixed precision
790792
The `decimal` logical type represents an arbitrary-precision signed decimal number of the form _unscaled × 10<sup>-scale</sup>_.
791793

792794
A `decimal` logical type annotates Avro _bytes_ or _fixed_ types. The byte array must contain the two's-complement representation of the unscaled integer value in big-endian byte order. The scale is fixed, and is specified using an attribute.
@@ -810,19 +812,19 @@ Scale must be zero or a positive integer less than or equal to the precision.
810812

811813
For the purposes of schema resolution, two schemas that are `decimal` logical types _match_ if their scales and precisions match.
812814

813-
**alternative**
815+
#### Scalable precision
814816

815817
As it's not always possible to fix scale and precision in advance for a decimal field, `big-decimal` is another `decimal` logical type restrict to Avro _bytes_.
816818

817-
_Currently only available in Java and Rust_.
819+
_Currently only available in C++, Java and Rust_.
818820

819821
```json
820822
{
821823
"type": "bytes",
822824
"logicalType": "big-decimal"
823825
}
824826
```
825-
Here, as scale property is stored in value itself it needs more bytes than preceding `decimal` type, but it allows more flexibility.
827+
Here, bytes array contains two serialized properties. First part is an Avro byte arrays which is the two's-complement representation of the unscaled integer value in big-endian byte order. The second part is the scale property stored as an Avro integer. Scale must be zero or a positive integer less than or equal to the precision. Value itself needs more bytes than preceding `decimal` type, but it allows more flexibility.
826828

827829
### UUID
828830

lang/c++/impl/Compiler.cc

+5-1
Original file line numberDiff line numberDiff line change
@@ -359,7 +359,11 @@ static LogicalType makeLogicalType(const Entity &e, const Object &m) {
359359
}
360360

361361
LogicalType::Type t = LogicalType::NONE;
362-
if (typeField == "date")
362+
if (typeField == "big-decimal"
363+
&& !containsField(m, "precision")
364+
&& !containsField(m, "scale"))
365+
t = LogicalType::BIG_DECIMAL;
366+
else if (typeField == "date")
363367
t = LogicalType::DATE;
364368
else if (typeField == "time-millis")
365369
t = LogicalType::TIME_MILLIS;

lang/c++/impl/LogicalType.cc

+3
Original file line numberDiff line numberDiff line change
@@ -51,6 +51,9 @@ void LogicalType::setScale(int32_t scale) {
5151
void LogicalType::printJson(std::ostream &os) const {
5252
switch (type_) {
5353
case LogicalType::NONE: break;
54+
case LogicalType::BIG_DECIMAL:
55+
os << R"("logicalType": "big-decimal")";
56+
break;
5457
case LogicalType::DECIMAL:
5558
os << R"("logicalType": "decimal")";
5659
os << ", \"precision\": " << precision_;

lang/c++/impl/Node.cc

+7
Original file line numberDiff line numberDiff line change
@@ -139,6 +139,13 @@ void Node::setLogicalType(LogicalType logicalType) {
139139
// Check that the logical type is applicable to the node type.
140140
switch (logicalType.type()) {
141141
case LogicalType::NONE: break;
142+
case LogicalType::BIG_DECIMAL: {
143+
if (type_ != AVRO_BYTES) {
144+
throw Exception("BIG_DECIMAL logical type can annotate "
145+
"only BYTES type");
146+
}
147+
break;
148+
}
142149
case LogicalType::DECIMAL: {
143150
if (type_ != AVRO_BYTES && type_ != AVRO_FIXED) {
144151
throw Exception("DECIMAL logical type can annotate "

lang/c++/include/avro/LogicalType.hh

+1
Original file line numberDiff line numberDiff line change
@@ -29,6 +29,7 @@ class AVRO_DECL LogicalType {
2929
public:
3030
enum Type {
3131
NONE,
32+
BIG_DECIMAL,
3233
DECIMAL,
3334
DATE,
3435
TIME_MILLIS,

lang/c++/test/SchemaTests.cc

+19-3
Original file line numberDiff line numberDiff line change
@@ -314,6 +314,7 @@ const char *roundTripSchemas[] = {
314314
R"({"type":"fixed","name":"Test","size":1})",
315315

316316
// Logical types
317+
R"({"type":"bytes","logicalType":"big-decimal"})",
317318
R"({"type":"bytes","logicalType":"decimal","precision":12,"scale":6})",
318319
R"({"type":"fixed","name":"test","size":16,"logicalType":"decimal","precision":38,"scale":9})",
319320
R"({"type":"fixed","name":"test","size":129,"logicalType":"decimal","precision":310,"scale":155})",
@@ -361,6 +362,7 @@ const char *roundTripSchemas[] = {
361362

362363
const char *malformedLogicalTypes[] = {
363364
// Wrong base type.
365+
R"({"type":"long","logicalType": "big-decimal"})",
364366
R"({"type":"long","logicalType": "decimal","precision": 10})",
365367
R"({"type":"string","logicalType":"date"})",
366368
R"({"type":"string","logicalType":"time-millis"})",
@@ -379,9 +381,12 @@ const char *malformedLogicalTypes[] = {
379381
R"({"type":"fixed","logicalType":"decimal","size":4,"name":"a","precision":20})",
380382
R"({"type":"fixed","logicalType":"decimal","size":129,"name":"a","precision":311})",
381383
// Scale is larger than precision.
382-
R"({"type":"bytes","logicalType":"decimal","precision":5,"scale":10})"
383-
};
384-
384+
R"({"type":"bytes","logicalType":"decimal","precision":5,"scale":10})",
385+
// Precision is not supported by the big-decimal logical type
386+
// and scale is integrated in bytes.
387+
R"({"type":"bytes","logicalType": "big-decimal","precision": 9})",
388+
R"({"type":"bytes","logicalType": "big-decimal","scale": 2})",
389+
R"({"type":"bytes","logicalType": "big-decimal","precision": 9,"scale": 2})"};
385390
const char *schemasToCompact[] = {
386391
// Schema without any whitespace
387392
R"({"type":"record","name":"Test","fields":[]})",
@@ -469,6 +474,10 @@ static void testCompactSchemas() {
469474
}
470475

471476
static void testLogicalTypes() {
477+
const char *bytesBigDecimalType = R"({
478+
"type": "bytes",
479+
"logicalType": "big-decimal"
480+
})";
472481
const char *bytesDecimalType = R"({
473482
"type": "bytes",
474483
"logicalType": "decimal",
@@ -496,6 +505,13 @@ static void testLogicalTypes() {
496505
const char *uuidType = R"({"type": "string","logicalType": "uuid"})";
497506
// AVRO-2923 Union with LogicalType
498507
const char *unionType = R"([{"type":"string", "logicalType":"uuid"},"null"]})";
508+
{
509+
BOOST_TEST_CHECKPOINT(bytesBigDecimalType);
510+
ValidSchema schema = compileJsonSchemaFromString(bytesBigDecimalType);
511+
BOOST_CHECK(schema.root()->type() == AVRO_BYTES);
512+
LogicalType logicalType = schema.root()->logicalType();
513+
BOOST_CHECK(logicalType.type() == LogicalType::BIG_DECIMAL);
514+
}
499515
{
500516
BOOST_TEST_CHECKPOINT(bytesDecimalType);
501517
ValidSchema schema1 = compileJsonSchemaFromString(bytesDecimalType);

0 commit comments

Comments
 (0)