Skip to content

Commit da27000

Browse files
committed
Add a function to count the number of characters in a UTF8 string
This is useful when converting from utf8 to wide_t for space allocation
1 parent a91a8d6 commit da27000

File tree

1 file changed

+33
-0
lines changed

1 file changed

+33
-0
lines changed

src/core/s-unicode.c

+33
Original file line numberDiff line numberDiff line change
@@ -991,6 +991,39 @@ ConversionResult ConvertUTF8toUTF32 (
991991
return size;
992992
}
993993

994+
/***********************************************************************
995+
**
996+
*/ REBCNT UTF8_Length(REBYTE *src, REBCNT len)
997+
/*
998+
** Returns how many character the UTF8 string has
999+
**
1000+
***********************************************************************/
1001+
{
1002+
int i = 0;
1003+
int n = 0;
1004+
while (i < len){
1005+
if (src[i] == 0){
1006+
return n;
1007+
}
1008+
if (src[i] < 0x80) i += 1;
1009+
else if (src[i] < 0xE0) {
1010+
if (src[i] >= 0xC0) {
1011+
i += 2;
1012+
} else {
1013+
return -1;
1014+
}
1015+
}
1016+
else if (src[i] < 0xF0) i += 3;
1017+
else if (src[i] < 0xF8) i += 4;
1018+
else if (src[i] < 0xFC) i += 5;
1019+
else if (src[i] < 0xFE) i += 6;
1020+
else {
1021+
return -1;
1022+
}
1023+
n ++;
1024+
}
1025+
return n;
1026+
}
9941027

9951028
/***********************************************************************
9961029
**

0 commit comments

Comments
 (0)