1
0
Fork 0
mirror of git://git.code.sf.net/p/zsh/code synced 2024-06-06 23:36:03 +02:00

22525: lengths and cases of multibyte strings in parameters and history

This commit is contained in:
Peter Stephenson 2006-06-28 13:12:55 +00:00
parent f95a6a913c
commit 6157c14d06
9 changed files with 224 additions and 113 deletions

View File

@ -1,3 +1,10 @@
2006-06-28 Peter Stephenson <pws@csr.com>
* 22525: Completion/compinit, Src/hist.c, Src/jobs.c,
Src/pattern.c, Src/subst.c, Src/utils.c, Src/zsh.h,
Test/D07multibyte.ztst: lengths and cases of multibyte strings
in parameters and history.
2006-06-27 Peter Stephenson <pws@csr.com>
* 22524: Src/params.c, Test/D07multibyte.ztst: searchable

View File

@ -128,11 +128,12 @@ fi
# The standard options set in completion functions.
_comp_options=(
glob
extendedglob
bareglobqual
glob
multibyte
nullglob
rcexpandparam
extendedglob
unset
NO_markdirs
NO_globsubst

View File

@ -635,10 +635,10 @@ histsubchar(int c)
quotebreak(&sline);
break;
case 'l':
downcase(&sline);
sline = casemodify(sline, CASMOD_LOWER);
break;
case 'u':
upcase(&sline);
sline = casemodify(sline, CASMOD_UPPER);
break;
default:
herrflush();
@ -1503,42 +1503,130 @@ remlpaths(char **junkptr)
return 0;
}
/**/
int
makeuppercase(char **junkptr)
{
char *str = *junkptr;
for (; *str; str++)
*str = tuupper(*str);
return 1;
}
/*
* Return modified version of str from the heap with modification
* according to one of the CASMOD_* types defined in zsh.h; CASMOD_NONE
* is not handled, for obvious reasons.
*/
/**/
int
makelowercase(char **junkptr)
char *
casemodify(char *str, int how)
{
char *str = *junkptr;
char *str2 = zhalloc(2 * strlen(str) + 1);
char *ptr2 = str2;
int nextupper = 1;
for (; *str; str++)
*str = tulower(*str);
return 1;
}
#ifdef MULTIBYTE_SUPPORT
if (isset(MULTIBYTE)) {
VARARR(char, mbstr, MB_CUR_MAX);
mbstate_t ps;
/**/
int
makecapitals(char **junkptr)
{
char *str = *junkptr;
mb_metacharinit();
memset(&ps, 0, sizeof(ps));
while (*str) {
wint_t wc;
int len = mb_metacharlenconv(str, &wc), mod = 0, len2;
/*
* wc is set to WEOF if the start of str couldn't be
* converted. Presumably WEOF doesn't match iswlower(), but
* better be safe.
*/
if (wc == WEOF) {
while (len--)
*ptr2++ = *str++;
/* not alphanumeric */
nextupper = 1;
continue;
}
switch (how) {
case CASMOD_LOWER:
if (iswupper(wc)) {
wc = towlower(wc);
mod = 1;
}
break;
for (; *str;) {
for (; *str && !ialnum(*str); str++);
if (*str)
*str = tuupper(*str), str++;
for (; *str && ialnum(*str); str++)
*str = tulower(*str);
case CASMOD_UPPER:
if (iswlower(wc)) {
wc = towupper(wc);
mod = 1;
}
break;
case CASMOD_CAPS:
default: /* shuts up compiler */
if (!iswalnum(wc))
nextupper = 1;
else if (nextupper) {
if (iswlower(wc)) {
wc = towupper(wc);
mod = 1;
}
nextupper = 0;
} else if (iswupper(wc)) {
wc = towlower(wc);
mod = 1;
}
break;
}
if (mod && (len2 = wcrtomb(mbstr, wc, &ps)) > 0) {
char *mbptr;
for (mbptr = mbstr; mbptr < mbstr + len2; mbptr++) {
if (imeta(STOUC(*mbptr))) {
*ptr2++ = Meta;
*ptr2++ = *mbptr ^ 32;
} else
*ptr2++ = *mbptr;
}
str += len;
} else {
while (len--)
*ptr2++ = *str++;
}
}
}
return 1;
else
#endif
while (*str) {
int c;
if (*str == Meta) {
c = str[1] ^ 32;
str += 2;
} else
c = *str++;
switch (how) {
case CASMOD_LOWER:
if (isupper(c))
c = tolower(c);
break;
case CASMOD_UPPER:
if (islower(c))
c = toupper(c);
break;
case CASMOD_CAPS:
default: /* shuts up compiler */
if (!ialnum(c))
nextupper = 1;
else if (nextupper) {
if (islower(c))
c = toupper(c);
nextupper = 0;
} else if (isupper(c))
c = tolower(c);
break;
}
if (imeta(c)) {
*ptr2++ = Meta;
*ptr2++ = c ^ 32;
} else
*ptr2++ = c;
}
*ptr2 = '\0';
return str2;
}
/**/
@ -1644,26 +1732,6 @@ getargs(Histent elist, int arg1, int arg2)
return dupstrpfx(elist->node.nam + pos1, words[2*arg2+1] - pos1);
}
/**/
void
upcase(char **x)
{
char *pp = *(char **)x;
for (; *pp; pp++)
*pp = tuupper(*pp);
}
/**/
void
downcase(char **x)
{
char *pp = *(char **)x;
for (; *pp; pp++)
*pp = tulower(*pp);
}
/**/
int
quote(char **tr)

View File

@ -2014,7 +2014,7 @@ bin_kill(char *nam, char **argv, UNUSED(Options ops), UNUSED(int func))
return 1;
} else
signame = *argv;
makeuppercase(&signame);
signame = casemodify(signame, CASMOD_UPPER);
if (!strncmp(signame, "SIG", 3))
signame+=3;

View File

@ -1644,17 +1644,12 @@ charrefinc(char **x, char *y)
}
#ifndef PARAMETER_CODE_HANDLES_MULTIBYTE
/*
* TODO: We should use the other branch, but currently
* the parameter code doesn't handle multibyte input,
* so this would produce the wrong subscripts,
* so just use a raw byte difference for now.
* Counter the number of characters between two pointers, smaller first
*
* This is used when setting values in parameters, so we obey
* the MULTIBYTE option (even if it's been overridden locally).
*/
/* Counter the number of characters between two pointers, smaller first */
# define CHARSUB(x,y) ((y) - (x))
#else
/* Counter the number of characters between two pointers, smaller first */
#define CHARSUB(x,y) charsub(x, y)
static ptrdiff_t
charsub(char *x, char *y)
@ -1663,6 +1658,9 @@ charsub(char *x, char *y)
size_t ret;
wchar_t wc;
if (!isset(MULTIBYTE))
return y - x;
while (x < y) {
ret = mbrtowc(&wc, x, y-x, &shiftstate);
@ -1674,13 +1672,12 @@ charsub(char *x, char *y)
/* Treat nulls as normal characters */
if (!ret)
ret = 1;
res += ret;
res++;
x += ret;
}
return res;
}
#endif
#else /* no MULTIBYTE_SUPPORT */

View File

@ -1019,7 +1019,7 @@ paramsubst(LinkList l, LinkNode n, char **str, int qt, int ssub)
/* (u): straightforward. */
int unique = 0;
/* combination of (L), (U) and (C) flags. */
int casmod = 0;
int casmod = CASMOD_NONE;
/*
* quotemod says we are doing either (q) (positive), (Q) (negative)
* or not (0). quotetype counts the q's for the first case.
@ -1211,13 +1211,13 @@ paramsubst(LinkList l, LinkNode n, char **str, int qt, int ssub)
break;
case 'L':
casmod = 2;
casmod = CASMOD_LOWER;
break;
case 'U':
casmod = 1;
casmod = CASMOD_UPPER;
break;
case 'C':
casmod = 3;
casmod = CASMOD_CAPS;
break;
case 'o':
@ -1819,17 +1819,13 @@ paramsubst(LinkList l, LinkNode n, char **str, int qt, int ssub)
break;
}
switch (v->pm->node.flags & (PM_LOWER | PM_UPPER)) {
char *t;
case PM_LOWER:
t = val;
for (; (c = *t); t++)
*t = tulower(c);
val = casemodify(val, CASMOD_LOWER);
copied = 1;
break;
case PM_UPPER:
t = val;
for (; (c = *t); t++)
*t = tuupper(c);
val = casemodify(val, CASMOD_UPPER);
copied = 1;
break;
}
}
@ -2316,14 +2312,14 @@ paramsubst(LinkList l, LinkNode n, char **str, int qt, int ssub)
if (isarr) {
char **ctr;
int sl = sep ? ztrlen(sep) : 1;
int sl = sep ? MB_METASTRLEN(sep) : 1;
if (getlen == 1)
for (ctr = aval; *ctr; ctr++, len++);
else if (getlen == 2) {
if (*aval)
for (len = -sl, ctr = aval;
len += sl + ztrlen(*ctr), *++ctr;);
len += sl + MB_METASTRLEN(*ctr), *++ctr;);
}
else
for (ctr = aval;
@ -2331,7 +2327,7 @@ paramsubst(LinkList l, LinkNode n, char **str, int qt, int ssub)
len += wordcount(*ctr, spsep, getlen > 3), ctr++);
} else {
if (getlen < 3)
len = ztrlen(val);
len = MB_METASTRLEN(val);
else
len = wordcount(val, spsep, getlen > 3);
}
@ -2387,33 +2383,19 @@ paramsubst(LinkList l, LinkNode n, char **str, int qt, int ssub)
/*
* Perform case modififications.
*/
if (casmod) {
if (casmod != CASMOD_NONE) {
copied = 1; /* string is always modified by copy */
if (isarr) {
char **ap;
char **ap, **ap2;
if (!copied)
aval = arrdup(aval), copied = 1;
ap = aval;
ap2 = aval = (char **) zhalloc(sizeof(char *) * (arrlen(aval)+1));
if (casmod == 1)
for (; *ap; ap++)
makeuppercase(ap);
else if (casmod == 2)
for (; *ap; ap++)
makelowercase(ap);
else
for (; *ap; ap++)
makecapitals(ap);
while (*ap)
*ap2++ = casemodify(*ap++, casmod);
*ap2++ = NULL;
} else {
if (!copied)
val = dupstring(val), copied = 1;
if (casmod == 1)
makeuppercase(&val);
else if (casmod == 2)
makelowercase(&val);
else
makecapitals(&val);
val = casemodify(val, casmod);
}
}
/*
@ -2975,7 +2957,8 @@ modify(char **str, char **ptr)
for (t = e = *str; (tt = findword(&e, sep));) {
tc = *e;
*e = '\0';
copy = dupstring(tt);
if (c != 'l' && c != 'u')
copy = dupstring(tt);
*e = tc;
switch (c) {
case 'h':
@ -2991,10 +2974,10 @@ modify(char **str, char **ptr)
remlpaths(&copy);
break;
case 'l':
downcase(&copy);
copy = casemodify(tt, CASMOD_LOWER);
break;
case 'u':
upcase(&copy);
copy = casemodify(tt, CASMOD_UPPER);
break;
case 's':
if (hsubl && hsubr)
@ -3050,10 +3033,10 @@ modify(char **str, char **ptr)
remlpaths(str);
break;
case 'l':
downcase(str);
*str = casemodify(*str, CASMOD_LOWER);
break;
case 'u':
upcase(str);
*str = casemodify(*str, CASMOD_UPPER);
break;
case 's':
if (hsubl && hsubr) {

View File

@ -3687,7 +3687,7 @@ static mbstate_t mb_shiftstate;
/*
* Initialise multibyte state: called before a sequence of
* mb_metacharlen().
* mb_metacharlenconv().
*/
/**/
@ -3703,18 +3703,24 @@ mb_metacharinit(void)
* but character is not valid (e.g. possibly incomplete at end of string).
* Returned value is guaranteed not to reach beyond the end of the
* string (assuming correct metafication).
*
* If wcp is not NULL, the converted wide character is stored there.
* If no conversion could be done WEOF is used.
*/
/**/
int
mb_metacharlen(char *s)
mb_metacharlenconv(char *s, wint_t *wcp)
{
char inchar, *ptr;
size_t ret;
wchar_t wc;
if (!isset(MULTIBYTE))
if (!isset(MULTIBYTE)) {
if (wcp)
*wcp = WEOF;
return 1 + (*s == Meta);
}
ret = MB_INVALID;
for (ptr = s; *ptr; ) {
@ -3729,14 +3735,18 @@ mb_metacharlen(char *s)
break;
if (ret == MB_INCOMPLETE)
continue;
if (wcp)
*wcp = wc;
return ptr - s;
}
if (wcp)
*wcp = WEOF;
/* No valid multibyte sequence */
memset(&mb_shiftstate, 0, sizeof(mb_shiftstate));
if (ptr > s)
if (ptr > s) {
return 1 + (*s == Meta); /* Treat as single byte character */
else
} else
return 0; /* Probably shouldn't happen */
}

View File

@ -1882,6 +1882,17 @@ struct heap {
#define ZSIG_ALIAS (1<<3) /* Trap is stored under an alias */
#define ZSIG_SHIFT 4
/************************/
/* Flags to casemodifiy */
/************************/
enum {
CASMOD_NONE, /* dummy for tests */
CASMOD_UPPER,
CASMOD_LOWER,
CASMOD_CAPS
};
/**********************************/
/* Flags to third argument of zle */
/**********************************/
@ -1927,7 +1938,7 @@ typedef char *(*ZleGetLineFn) _((int *, int *));
#ifdef MULTIBYTE_SUPPORT
#define nicezputs(str, outs) (void)mb_niceformat((str), (outs), NULL, 0)
#define MB_METACHARINIT() mb_metacharinit()
#define MB_METACHARLEN(str) mb_metacharlen(str)
#define MB_METACHARLEN(str) mb_metacharlenconv(str, NULL)
#define MB_METASTRLEN(str) mb_metastrlen(str)
#define MB_INCOMPLETE ((size_t)-2)

View File

@ -121,3 +121,37 @@
# Starting offsets with (R) seem to be so strange as to be hardly
# worth testing.
setopt extendedglob
[[ $a = (#b)t(én)(éb)reux ]] || print "Failed to match." >&2
for i in {1..${#match}}; do
print $match[i] $mbegin[i] $mend[i] ${a[$mbegin[i],$mend[i]]}
done
0:Multibyte offsets in pattern tests
>én 2 3 én
>éb 4 5 éb
b=${(U)a}
print $b
print ${(L)b}
desdichado="Je suis le $a, le veuf, l'inconsolé"
print ${(C)desdichado}
lxiv="l'état c'est moi"
print ${(C)lxiv}
0:Case modification of multibyte strings
>TÉNÉBREUX
>ténébreux
>Je Suis Le Ténébreux, Le Veuf, L'Inconsolé
>L'État C'Est Moi
array=(ølaf ødd øpened án encyclopædia)
barray=(${(U)array})
print $barray
print ${(L)barray}
print ${(C)array}
print ${(C)barray}
0:Case modification of arrays with multibyte strings
>ØLAF ØDD ØPENED ÁN ENCYCLOPÆDIA
>ølaf ødd øpened án encyclopædia
>Ølaf Ødd Øpened Án Encyclopædia
>Ølaf Ødd Øpened Án Encyclopædia