Skip to content

Commit 8bb6115

Browse files
committed
FIX: proper handling wide Unicode characters in console input on Posix systems
1 parent 0cb8bda commit 8bb6115

File tree

1 file changed

+39
-18
lines changed

1 file changed

+39
-18
lines changed

src/os/posix/host-readline.c

+39-18
Original file line numberDiff line numberDiff line change
@@ -47,6 +47,7 @@
4747
#include <string.h>
4848
#include <unistd.h> //for read and write
4949
#include <errno.h>
50+
#include <wchar.h>
5051

5152
#include "reb-c.h"
5253

@@ -87,7 +88,7 @@ static const char trailingBytesForUTF8[256] = {
8788
#define CHAR_LEN(c) (1 + trailingBytesForUTF8[c])
8889

8990
#define STEP_FORWARD(term) term->pos += 1 + trailingBytesForUTF8[term->buffer[term->pos]];
90-
#define STEP_BACKWARD(term) do {--term->pos;} while ((term->buffer[term->pos] & 0xC0) == 0x80);
91+
#define STEP_BACKWARD(term) while ((term->buffer[--term->pos] & 0xC0) == 0x80);
9192
// Stepping backwards in UTF8 just means to keep going back so long
9293
// as you are looking at a byte with bit 7 set and bit 6 clear:
9394
// https://stackoverflow.com/a/22257843/211160
@@ -190,24 +191,41 @@ static struct termios Term_Attrs; // Initial settings, restored on exit
190191
Term_Init = FALSE;
191192
}
192193

193-
194194
/***********************************************************************
195195
**
196-
*/static int Get_UTF8_Chars(unsigned char *buffer, int byte_count)
196+
*/ static int Get_UTF8_Chars(unsigned char *buffer, int byte_count)
197197
/*
198-
** Count number of characters (not bytes) of a UTF-8 string.
198+
** Count number of character's columns of a UTF-8 string.
199199
** Used to count number of BS chars needed to clear the line,
200200
** or set cursor position
201201
** Note: does not checks char's validity
202202
**
203203
***********************************************************************/
204204
{
205-
int i, char_count = 0;
206-
for(i = 0 ; i < byte_count ; i++)
207-
if ((buffer[i] & 0xC0) != 0x80)
208-
char_count++;
209-
210-
return char_count;
205+
wchar_t wideChar = 0;
206+
int n, width_count = 0;
207+
208+
while (byte_count > 0) {
209+
n = mbtowc(&wideChar, buffer, byte_count);
210+
if (n <= 0) break;
211+
buffer += n;
212+
byte_count -= n;
213+
width_count += wcwidth(wideChar);
214+
}
215+
return width_count;
216+
}
217+
218+
/***********************************************************************
219+
**
220+
*/ static int Get_Char_Width(REBYTE *buffer)
221+
/*
222+
** Return width of the current char in columns.
223+
**
224+
***********************************************************************/
225+
{
226+
wchar_t wideChar = 0;
227+
mbtowc(&wideChar, buffer, MB_CUR_MAX);
228+
return wcwidth(wideChar);
211229
}
212230

213231

@@ -417,7 +435,7 @@ static struct termios Term_Attrs; // Initial settings, restored on exit
417435
return ++cp;
418436
}
419437

420-
bytes = 1 + trailingBytesForUTF8[*cp];
438+
bytes = CHAR_LEN(*cp);
421439
if (term->end < TERM_BUF_LEN-bytes) { // avoid buffer overrun
422440

423441
if (term->pos < term->end) { // open space for it:
@@ -456,14 +474,18 @@ static struct termios Term_Attrs; // Initial settings, restored on exit
456474
if ( (term->pos == term->end) && back == 0) return; //Ctrl-D at EOL
457475
if ( term->pos == 0 && back ) return; // backspace at beginning of line
458476

459-
if (back) STEP_BACKWARD(term);
477+
if (back) {
478+
STEP_BACKWARD(term);
479+
back = Get_Char_Width(term->buffer + term->pos);
480+
}
481+
482+
encoded_len = CHAR_LEN(term->buffer[term->pos]);
460483

461-
encoded_len = 1 + trailingBytesForUTF8[term->buffer[term->pos]];
462484
len = encoded_len + term->end - term->pos;
463485

464486
if (term->pos >= 0 && len > 0) {
465487
MOVE_MEM(term->buffer + term->pos, term->buffer + term->pos + encoded_len, len);
466-
if (back) Write_Char(BS, 1);
488+
if (back) Write_Char(BS, back);
467489
term->end -= encoded_len;
468490
Show_Line(term, 1);
469491
}
@@ -482,7 +504,7 @@ static struct termios Term_Attrs; // Initial settings, restored on exit
482504
if (count < 0) {
483505
if (term->pos > 0) {
484506
STEP_BACKWARD(term);
485-
Write_Char(BS, 1);
507+
Write_Char(BS, Get_Char_Width(term->buffer + term->pos));
486508
}
487509
}
488510
else {
@@ -508,7 +530,7 @@ static struct termios Term_Attrs; // Initial settings, restored on exit
508530
if (term->buffer[term->pos-1] == ' ') {
509531
for(;term->pos > 0;){
510532
STEP_BACKWARD(term);
511-
Write_Char(BS, 1);
533+
Write_Char(BS, Get_Char_Width(term->buffer + term->pos));
512534
if(term->buffer[term->pos] != ' ') break;
513535
}
514536
}
@@ -519,7 +541,7 @@ static struct termios Term_Attrs; // Initial settings, restored on exit
519541
term->pos = pos;
520542
break;
521543
}
522-
Write_Char(BS, 1);
544+
Write_Char(BS, Get_Char_Width(term->buffer + term->pos));
523545
}
524546
}
525547
else {
@@ -575,7 +597,6 @@ static struct termios Term_Attrs; // Initial settings, restored on exit
575597
return cp;
576598
}
577599
else if (*cp == '[' || *cp == 'O') {
578-
579600
// Special key:
580601
switch (*++cp) {
581602

0 commit comments

Comments
 (0)