Compare commits

...

4 Commits

Author SHA1 Message Date
Roberto E. Vargas Caballero e8f1308586 Receive only a wchar_t in tchar()
It makes simpler the conversion from utf8 input string and
makes simpler the checks done in tputc, but it still requires
a lot of additional conversions that will be removed later.
2014-09-26 19:25:07 +02:00
Roberto E. Vargas Caballero d65ebe9aed First step to plain unicode representation
tprinter() is used in tputc, so a first step is to change it
to accept a wchar_t instead of a multibyte string. This patch makes
more ugly the code because it needs a lot of conversions, but
they will be removed later.
2014-09-26 19:25:07 +02:00
Roberto E. Vargas Caballero 28f56c8842 Remove last parameter of utf8encode
This parameter was always UTF_SIZ, so it is better remove it and
use directly UTF_SIZ in in.
2014-09-26 19:25:06 +02:00
Roberto E. Vargas Caballero 23af75fc75 Simplify utf8decodebyte using some locals
These local variables help to make expressions simpler and avoid
use a pointer as induction variable in a for loop.
2014-09-26 19:25:06 +02:00
1 changed files with 127 additions and 106 deletions

233
st.c
View File

@ -363,7 +363,7 @@ static void strparse(void);
static void strreset(void); static void strreset(void);
static int tattrset(int); static int tattrset(int);
static void tprinter(char *, size_t); static void tprinter(wchar_t);
static void tdumpsel(void); static void tdumpsel(void);
static void tdumpline(int); static void tdumpline(int);
static void tdump(void); static void tdump(void);
@ -379,20 +379,20 @@ static void tmoveato(int, int);
static void tnew(int, int); static void tnew(int, int);
static void tnewline(int); static void tnewline(int);
static void tputtab(int); static void tputtab(int);
static void tputc(char *, int); static void tputc(wchar_t);
static void treset(void); static void treset(void);
static void tresize(int, int); static void tresize(int, int);
static void tscrollup(int, int); static void tscrollup(int, int);
static void tscrolldown(int, int); static void tscrolldown(int, int);
static void tsetattr(int *, int); static void tsetattr(int *, int);
static void tsetchar(char *, Glyph *, int, int); static void tsetchar(wchar_t, Glyph *, int, int);
static void tsetscroll(int, int); static void tsetscroll(int, int);
static void tswapscreen(void); static void tswapscreen(void);
static void tsetdirt(int, int); static void tsetdirt(int, int);
static void tsetdirtattr(int); static void tsetdirtattr(int);
static void tsetmode(bool, bool, int *, int); static void tsetmode(bool, bool, int *, int);
static void tfulldirt(void); static void tfulldirt(void);
static void techo(char *, int); static void techo(wchar_t);
static void tcontrolcode(uchar ); static void tcontrolcode(uchar );
static void tdectest(char ); static void tdectest(char );
static int32_t tdefcolor(int *, int *, int); static int32_t tdefcolor(int *, int *, int);
@ -452,12 +452,12 @@ static void selsnap(int, int *, int *, int);
static void getbuttoninfo(XEvent *); static void getbuttoninfo(XEvent *);
static void mousereport(XEvent *); static void mousereport(XEvent *);
static size_t utf8decode(char *, long *, size_t); static size_t utf8decode(char *, wchar_t *, size_t);
static long utf8decodebyte(char, size_t *); static wchar_t utf8decodebyte(uchar, size_t *);
static size_t utf8encode(long, char *, size_t); static size_t utf8encode(wchar_t, char *);
static char utf8encodebyte(long, size_t); static char utf8encodebyte(wchar_t, size_t);
static size_t utf8len(char *); static size_t utf8len(char *);
static size_t utf8validate(long *, size_t); static size_t utf8validate(wchar_t *, size_t);
static ssize_t xwrite(int, const char *, size_t); static ssize_t xwrite(int, const char *, size_t);
static void *xmalloc(size_t); static void *xmalloc(size_t);
@ -567,9 +567,9 @@ xstrdup(char *s) {
} }
size_t size_t
utf8decode(char *c, long *u, size_t clen) { utf8decode(char *c, wchar_t *u, size_t clen) {
size_t i, j, len, type; size_t i, j, len, type;
long udecoded; wchar_t udecoded;
*u = UTF_INVALID; *u = UTF_INVALID;
if(!clen) if(!clen)
@ -589,20 +589,27 @@ utf8decode(char *c, long *u, size_t clen) {
return len; return len;
} }
long wchar_t
utf8decodebyte(char c, size_t *i) { utf8decodebyte(uchar c, size_t *len) {
for(*i = 0; *i < LEN(utfmask); ++(*i)) size_t i;
if(((uchar)c & utfmask[*i]) == utfbyte[*i]) long ret = 0;
return (uchar)c & ~utfmask[*i];
return 0; for(i = 0; i < LEN(utfmask); ++i) {
if((c & utfmask[i]) == utfbyte[i]) {
ret = c & ~utfmask[i];
break;
}
}
*len = i;
return ret;
} }
size_t size_t
utf8encode(long u, char *c, size_t clen) { utf8encode(wchar_t u, char *c) {
size_t len, i; size_t len, i;
len = utf8validate(&u, 0); len = utf8validate(&u, 0);
if(clen < len) if(len > UTF_SIZ)
return 0; return 0;
for(i = len - 1; i != 0; --i) { for(i = len - 1; i != 0; --i) {
c[i] = utf8encodebyte(u, 0); c[i] = utf8encodebyte(u, 0);
@ -613,17 +620,17 @@ utf8encode(long u, char *c, size_t clen) {
} }
char char
utf8encodebyte(long u, size_t i) { utf8encodebyte(wchar_t u, size_t i) {
return utfbyte[i] | (u & ~utfmask[i]); return utfbyte[i] | (u & ~utfmask[i]);
} }
size_t size_t
utf8len(char *c) { utf8len(char *c) {
return utf8decode(c, &(long){0}, UTF_SIZ); return utf8decode(c, &(wchar_t){0}, UTF_SIZ);
} }
size_t size_t
utf8validate(long *u, size_t i) { utf8validate(wchar_t *u, size_t i) {
if(!BETWEEN(*u, utfmin[i], utfmax[i]) || BETWEEN(*u, 0xD800, 0xDFFF)) if(!BETWEEN(*u, utfmin[i], utfmax[i]) || BETWEEN(*u, 0xD800, 0xDFFF))
*u = UTF_INVALID; *u = UTF_INVALID;
for(i = 1; *u > utfmax[i]; ++i) for(i = 1; *u > utfmax[i]; ++i)
@ -1244,9 +1251,8 @@ ttyread(void) {
static char buf[BUFSIZ]; static char buf[BUFSIZ];
static int buflen = 0; static int buflen = 0;
char *ptr; char *ptr;
char s[UTF_SIZ];
int charsize; /* size of utf8 char in bytes */ int charsize; /* size of utf8 char in bytes */
long unicodep; wchar_t unicodep;
int ret; int ret;
/* append read bytes to unprocessed bytes */ /* append read bytes to unprocessed bytes */
@ -1257,8 +1263,7 @@ ttyread(void) {
buflen += ret; buflen += ret;
ptr = buf; ptr = buf;
while((charsize = utf8decode(ptr, &unicodep, buflen))) { while((charsize = utf8decode(ptr, &unicodep, buflen))) {
utf8encode(unicodep, s, UTF_SIZ); tputc(unicodep);
tputc(s, charsize);
ptr += charsize; ptr += charsize;
buflen -= charsize; buflen -= charsize;
} }
@ -1275,9 +1280,17 @@ ttywrite(const char *s, size_t n) {
void void
ttysend(char *s, size_t n) { ttysend(char *s, size_t n) {
int len;
wchar_t u;
ttywrite(s, n); ttywrite(s, n);
if(IS_SET(MODE_ECHO)) if(IS_SET(MODE_ECHO)) {
techo(s, n); while ((len = utf8decode(s, &u, n)) != 0) {
techo(u);
n -= len;
s += len;
}
}
} }
void void
@ -1527,7 +1540,7 @@ tmoveto(int x, int y) {
} }
void void
tsetchar(char *c, Glyph *attr, int x, int y) { tsetchar(wchar_t u, Glyph *attr, int x, int y) {
static char *vt100_0[62] = { /* 0x41 - 0x7e */ static char *vt100_0[62] = { /* 0x41 - 0x7e */
"", "", "", "", "", "", "", /* A - G */ "", "", "", "", "", "", "", /* A - G */
0, 0, 0, 0, 0, 0, 0, 0, /* H - O */ 0, 0, 0, 0, 0, 0, 0, 0, /* H - O */
@ -1538,16 +1551,21 @@ tsetchar(char *c, Glyph *attr, int x, int y) {
"", "", "", "", "", "", "", "", /* p - w */ "", "", "", "", "", "", "", "", /* p - w */
"", "", "", "π", "", "£", "·", /* x - ~ */ "", "", "", "π", "", "£", "·", /* x - ~ */
}; };
char c[UTF_SIZ];
c[0] = '\0';
/* /*
* The table is proudly stolen from rxvt. * The table is proudly stolen from rxvt.
*/ */
if(term.trantbl[term.charset] == CS_GRAPHIC0) { if(term.trantbl[term.charset] == CS_GRAPHIC0) {
if(BETWEEN(c[0], 0x41, 0x7e) && vt100_0[c[0] - 0x41]) { if(BETWEEN(u, 0x41, 0x7e) && vt100_0[u - 0x41]) {
c = vt100_0[c[0] - 0x41]; strcpy(c, vt100_0[u - 0x41]);
} }
} }
if (c[0] == '\0')
utf8encode(u, c);
if(term.line[y][x].mode & ATTR_WIDE) { if(term.line[y][x].mode & ATTR_WIDE) {
if(x+1 < term.col) { if(x+1 < term.col) {
term.line[y][x+1].c[0] = ' '; term.line[y][x+1].c[0] = ' ';
@ -2248,7 +2266,13 @@ strreset(void) {
} }
void void
tprinter(char *s, size_t len) { tprinter(wchar_t u) {
size_t len;
char s[UTF_SIZ];
if((len = utf8encode(u, s)) == 0)
return;
if(iofd != -1 && xwrite(iofd, s, len) < 0) { if(iofd != -1 && xwrite(iofd, s, len) < 0) {
fprintf(stderr, "Error writing in %s:%s\n", fprintf(stderr, "Error writing in %s:%s\n",
opt_io, strerror(errno)); opt_io, strerror(errno));
@ -2275,24 +2299,34 @@ printsel(const Arg *arg) {
void void
tdumpsel(void) { tdumpsel(void) {
char *ptr; char *ptr;
size_t len;
wchar_t u;
if((ptr = getsel())) { if((ptr = getsel()) == NULL)
tprinter(ptr, strlen(ptr)); return;
free(ptr);
while((len = utf8decode(ptr, &u, UTF_SIZ))) {
tprinter(u);
ptr += len;
} }
free(ptr);
} }
void void
tdumpline(int n) { tdumpline(int n) {
Glyph *bp, *end; Glyph *bp, *end;
wchar_t u;
bp = &term.line[n][0]; bp = &term.line[n][0];
end = &bp[MIN(tlinelen(n), term.col) - 1]; end = &bp[MIN(tlinelen(n), term.col) - 1];
if(bp != end || bp->c[0] != ' ') { if(bp != end || bp->c[0] != ' ') {
for( ;bp <= end; ++bp) for( ;bp <= end; ++bp) {
tprinter(bp->c, utf8len(bp->c)); if(!utf8decode(bp->c, &u, UTF_SIZ))
break;
tprinter(u);
}
} }
tprinter("\n", 1); tprinter('\n');
} }
void void
@ -2320,26 +2354,18 @@ tputtab(int n) {
} }
void void
techo(char *buf, int len) { techo(wchar_t u) {
for(; len > 0; buf++, len--) { if(ISCONTROL(u)) { /* control code */
char c = *buf; if(u & 0x80) {
u &= 0x7f;
if(ISCONTROL((uchar) c)) { /* control code */ tputc('^');
if(c & 0x80) { tputc('[');
c &= 0x7f; } else if(u != '\n' && u != '\r' && u != '\t') {
tputc("^", 1); u ^= 0x40;
tputc("[", 1); tputc('^');
} else if(c != '\n' && c != '\r' && c != '\t') {
c ^= 0x40;
tputc("^", 1);
}
tputc(&c, 1);
} else {
break;
} }
} }
if(len) tputc(u);
tputc(buf, len);
} }
void void
@ -2357,13 +2383,12 @@ tdeftran(char ascii) {
void void
tdectest(char c) { tdectest(char c) {
static char E[UTF_SIZ] = "E";
int x, y; int x, y;
if(c == '8') { /* DEC screen alignment test. */ if(c == '8') { /* DEC screen alignment test. */
for(x = 0; x < term.col; ++x) { for(x = 0; x < term.col; ++x) {
for(y = 0; y < term.row; ++y) for(y = 0; y < term.row; ++y)
tsetchar(E, &term.c.attr, x, y); tsetchar('E', &term.c.attr, x, y);
} }
} }
} }
@ -2394,7 +2419,6 @@ tstrsequence(uchar c) {
void void
tcontrolcode(uchar ascii) { tcontrolcode(uchar ascii) {
static char question[UTF_SIZ] = "?";
switch(ascii) { switch(ascii) {
case '\t': /* HT */ case '\t': /* HT */
@ -2435,7 +2459,7 @@ tcontrolcode(uchar ascii) {
term.charset = 1; term.charset = 1;
return; return;
case '\032': /* SUB */ case '\032': /* SUB */
tsetchar(question, &term.c.attr, term.c.x, term.c.y); tsetchar('?', &term.c.attr, term.c.x, term.c.y);
case '\030': /* CAN */ case '\030': /* CAN */
csireset(); csireset();
break; break;
@ -2556,26 +2580,20 @@ eschandle(uchar ascii) {
} }
void void
tputc(char *c, int len) { tputc(wchar_t u) {
uchar ascii; char s[UTF_SIZ];
int len;
bool control; bool control;
long unicodep;
int width; int width;
Glyph *gp; Glyph *gp;
if(len == 1) {
width = 1; width = wcwidth(u);
unicodep = ascii = *c; control = ISCONTROLC1(u);
} else {
utf8decode(c, &unicodep, UTF_SIZ);
width = wcwidth(unicodep);
control = ISCONTROLC1(unicodep);
ascii = unicodep;
}
if(IS_SET(MODE_PRINT)) if(IS_SET(MODE_PRINT))
tprinter(c, len); tprinter(u);
control = ISCONTROL(unicodep); control = ISCONTROL(u);
/* /*
* STR sequence must be checked before anything else * STR sequence must be checked before anything else
@ -2584,31 +2602,33 @@ tputc(char *c, int len) {
* character. * character.
*/ */
if(term.esc & ESC_STR) { if(term.esc & ESC_STR) {
if(width == 1 && if(u == '\a' || u == 030 || u == 032 || u == 033 ||
(ascii == '\a' || ascii == 030 || ISCONTROLC1(u)) {
ascii == 032 || ascii == 033 ||
ISCONTROLC1(unicodep))) {
term.esc &= ~(ESC_START|ESC_STR); term.esc &= ~(ESC_START|ESC_STR);
term.esc |= ESC_STR_END; term.esc |= ESC_STR_END;
} else if(strescseq.len + len < sizeof(strescseq.buf) - 1) {
memmove(&strescseq.buf[strescseq.len], c, len);
strescseq.len += len;
return;
} else { } else {
/* /* TODO: make csiescseq.buf buffer of wchar_t */
* Here is a bug in terminals. If the user never sends len = utf8encode(u, s);
* some code to stop the str or esc command, then st if(strescseq.len + len < sizeof(strescseq.buf) - 1) {
* will stop responding. But this is better than memmove(&strescseq.buf[strescseq.len], s, len);
* silently failing with unknown characters. At least strescseq.len += len;
* then users will report back. return;
* } else {
* In the case users ever get fixed, here is the code: /*
*/ * Here is a bug in terminals. If the user never sends
/* * some code to stop the str or esc command, then st
* term.esc = 0; * will stop responding. But this is better than
* strhandle(); * silently failing with unknown characters. At least
*/ * then users will report back.
return; *
* In the case users ever get fixed, here is the code:
*/
/*
* term.esc = 0;
* strhandle();
*/
return;
}
} }
} }
@ -2618,15 +2638,16 @@ tputc(char *c, int len) {
* they must not cause conflicts with sequences. * they must not cause conflicts with sequences.
*/ */
if(control) { if(control) {
tcontrolcode(ascii); tcontrolcode(u);
/* /*
* control codes are not shown ever * control codes are not shown ever
*/ */
return; return;
} else if(term.esc & ESC_START) { } else if(term.esc & ESC_START) {
if(term.esc & ESC_CSI) { if(term.esc & ESC_CSI) {
csiescseq.buf[csiescseq.len++] = ascii; /* TODO: make csiescseq.buf buffer of wchar_t */
if(BETWEEN(ascii, 0x40, 0x7E) csiescseq.buf[csiescseq.len++] = u;
if(BETWEEN(u, 0x40, 0x7E)
|| csiescseq.len >= \ || csiescseq.len >= \
sizeof(csiescseq.buf)-1) { sizeof(csiescseq.buf)-1) {
term.esc = 0; term.esc = 0;
@ -2635,11 +2656,11 @@ tputc(char *c, int len) {
} }
return; return;
} else if(term.esc & ESC_ALTCHARSET) { } else if(term.esc & ESC_ALTCHARSET) {
tdeftran(ascii); tdeftran(u);
} else if(term.esc & ESC_TEST) { } else if(term.esc & ESC_TEST) {
tdectest(ascii); tdectest(u);
} else { } else {
if (!eschandle(ascii)) if (!eschandle(u))
return; return;
/* sequence already finished */ /* sequence already finished */
} }
@ -2665,7 +2686,7 @@ tputc(char *c, int len) {
if(term.c.x+width > term.col) if(term.c.x+width > term.col)
tnewline(1); tnewline(1);
tsetchar(c, &term.c.attr, term.c.x, term.c.y); tsetchar(u, &term.c.attr, term.c.x, term.c.y);
if(width == 2) { if(width == 2) {
gp->mode |= ATTR_WIDE; gp->mode |= ATTR_WIDE;
@ -3173,7 +3194,7 @@ xdraws(char *s, Glyph base, int x, int y, int charlen, int bytelen) {
int frcflags; int frcflags;
int u8fl, u8fblen, u8cblen, doesexist; int u8fl, u8fblen, u8cblen, doesexist;
char *u8c, *u8fs; char *u8c, *u8fs;
long unicodep; wchar_t unicodep;
Font *font = &dc.font; Font *font = &dc.font;
FcResult fcres; FcResult fcres;
FcPattern *fcpattern, *fontpattern; FcPattern *fcpattern, *fontpattern;
@ -3546,7 +3567,7 @@ drawregion(int x1, int y1, int x2, int y2) {
Glyph base, new; Glyph base, new;
char buf[DRAW_BUF_SIZ]; char buf[DRAW_BUF_SIZ];
bool ena_sel = sel.ob.x != -1 && sel.alt == IS_SET(MODE_ALTSCREEN); bool ena_sel = sel.ob.x != -1 && sel.alt == IS_SET(MODE_ALTSCREEN);
long unicodep; wchar_t unicodep;
if(!(xw.state & WIN_VISIBLE)) if(!(xw.state & WIN_VISIBLE))
return; return;
@ -3734,7 +3755,7 @@ kpress(XEvent *ev) {
if(IS_SET(MODE_8BIT)) { if(IS_SET(MODE_8BIT)) {
if(*buf < 0177) { if(*buf < 0177) {
c = *buf | 0x80; c = *buf | 0x80;
len = utf8encode(c, buf, UTF_SIZ); len = utf8encode(c, buf);
} }
} else { } else {
buf[1] = buf[0]; buf[1] = buf[0];