mirror of
git://git.code.sf.net/p/zsh/code
synced 2024-06-06 23:36:03 +02:00
22525: lengths and cases of multibyte strings in parameters and history
This commit is contained in:
parent
f95a6a913c
commit
6157c14d06
|
@ -1,3 +1,10 @@
|
|||
2006-06-28 Peter Stephenson <pws@csr.com>
|
||||
|
||||
* 22525: Completion/compinit, Src/hist.c, Src/jobs.c,
|
||||
Src/pattern.c, Src/subst.c, Src/utils.c, Src/zsh.h,
|
||||
Test/D07multibyte.ztst: lengths and cases of multibyte strings
|
||||
in parameters and history.
|
||||
|
||||
2006-06-27 Peter Stephenson <pws@csr.com>
|
||||
|
||||
* 22524: Src/params.c, Test/D07multibyte.ztst: searchable
|
||||
|
|
|
@ -128,11 +128,12 @@ fi
|
|||
# The standard options set in completion functions.
|
||||
|
||||
_comp_options=(
|
||||
glob
|
||||
extendedglob
|
||||
bareglobqual
|
||||
glob
|
||||
multibyte
|
||||
nullglob
|
||||
rcexpandparam
|
||||
extendedglob
|
||||
unset
|
||||
NO_markdirs
|
||||
NO_globsubst
|
||||
|
|
170
Src/hist.c
170
Src/hist.c
|
@ -635,10 +635,10 @@ histsubchar(int c)
|
|||
quotebreak(&sline);
|
||||
break;
|
||||
case 'l':
|
||||
downcase(&sline);
|
||||
sline = casemodify(sline, CASMOD_LOWER);
|
||||
break;
|
||||
case 'u':
|
||||
upcase(&sline);
|
||||
sline = casemodify(sline, CASMOD_UPPER);
|
||||
break;
|
||||
default:
|
||||
herrflush();
|
||||
|
@ -1503,42 +1503,130 @@ remlpaths(char **junkptr)
|
|||
return 0;
|
||||
}
|
||||
|
||||
/**/
|
||||
int
|
||||
makeuppercase(char **junkptr)
|
||||
{
|
||||
char *str = *junkptr;
|
||||
|
||||
for (; *str; str++)
|
||||
*str = tuupper(*str);
|
||||
return 1;
|
||||
}
|
||||
/*
|
||||
* Return modified version of str from the heap with modification
|
||||
* according to one of the CASMOD_* types defined in zsh.h; CASMOD_NONE
|
||||
* is not handled, for obvious reasons.
|
||||
*/
|
||||
|
||||
/**/
|
||||
int
|
||||
makelowercase(char **junkptr)
|
||||
char *
|
||||
casemodify(char *str, int how)
|
||||
{
|
||||
char *str = *junkptr;
|
||||
char *str2 = zhalloc(2 * strlen(str) + 1);
|
||||
char *ptr2 = str2;
|
||||
int nextupper = 1;
|
||||
|
||||
for (; *str; str++)
|
||||
*str = tulower(*str);
|
||||
return 1;
|
||||
}
|
||||
#ifdef MULTIBYTE_SUPPORT
|
||||
if (isset(MULTIBYTE)) {
|
||||
VARARR(char, mbstr, MB_CUR_MAX);
|
||||
mbstate_t ps;
|
||||
|
||||
/**/
|
||||
int
|
||||
makecapitals(char **junkptr)
|
||||
{
|
||||
char *str = *junkptr;
|
||||
mb_metacharinit();
|
||||
memset(&ps, 0, sizeof(ps));
|
||||
while (*str) {
|
||||
wint_t wc;
|
||||
int len = mb_metacharlenconv(str, &wc), mod = 0, len2;
|
||||
/*
|
||||
* wc is set to WEOF if the start of str couldn't be
|
||||
* converted. Presumably WEOF doesn't match iswlower(), but
|
||||
* better be safe.
|
||||
*/
|
||||
if (wc == WEOF) {
|
||||
while (len--)
|
||||
*ptr2++ = *str++;
|
||||
/* not alphanumeric */
|
||||
nextupper = 1;
|
||||
continue;
|
||||
}
|
||||
switch (how) {
|
||||
case CASMOD_LOWER:
|
||||
if (iswupper(wc)) {
|
||||
wc = towlower(wc);
|
||||
mod = 1;
|
||||
}
|
||||
break;
|
||||
|
||||
for (; *str;) {
|
||||
for (; *str && !ialnum(*str); str++);
|
||||
if (*str)
|
||||
*str = tuupper(*str), str++;
|
||||
for (; *str && ialnum(*str); str++)
|
||||
*str = tulower(*str);
|
||||
case CASMOD_UPPER:
|
||||
if (iswlower(wc)) {
|
||||
wc = towupper(wc);
|
||||
mod = 1;
|
||||
}
|
||||
break;
|
||||
|
||||
case CASMOD_CAPS:
|
||||
default: /* shuts up compiler */
|
||||
if (!iswalnum(wc))
|
||||
nextupper = 1;
|
||||
else if (nextupper) {
|
||||
if (iswlower(wc)) {
|
||||
wc = towupper(wc);
|
||||
mod = 1;
|
||||
}
|
||||
nextupper = 0;
|
||||
} else if (iswupper(wc)) {
|
||||
wc = towlower(wc);
|
||||
mod = 1;
|
||||
}
|
||||
break;
|
||||
}
|
||||
if (mod && (len2 = wcrtomb(mbstr, wc, &ps)) > 0) {
|
||||
char *mbptr;
|
||||
|
||||
for (mbptr = mbstr; mbptr < mbstr + len2; mbptr++) {
|
||||
if (imeta(STOUC(*mbptr))) {
|
||||
*ptr2++ = Meta;
|
||||
*ptr2++ = *mbptr ^ 32;
|
||||
} else
|
||||
*ptr2++ = *mbptr;
|
||||
}
|
||||
str += len;
|
||||
} else {
|
||||
while (len--)
|
||||
*ptr2++ = *str++;
|
||||
}
|
||||
}
|
||||
}
|
||||
return 1;
|
||||
else
|
||||
#endif
|
||||
while (*str) {
|
||||
int c;
|
||||
if (*str == Meta) {
|
||||
c = str[1] ^ 32;
|
||||
str += 2;
|
||||
} else
|
||||
c = *str++;
|
||||
switch (how) {
|
||||
case CASMOD_LOWER:
|
||||
if (isupper(c))
|
||||
c = tolower(c);
|
||||
break;
|
||||
|
||||
case CASMOD_UPPER:
|
||||
if (islower(c))
|
||||
c = toupper(c);
|
||||
break;
|
||||
|
||||
case CASMOD_CAPS:
|
||||
default: /* shuts up compiler */
|
||||
if (!ialnum(c))
|
||||
nextupper = 1;
|
||||
else if (nextupper) {
|
||||
if (islower(c))
|
||||
c = toupper(c);
|
||||
nextupper = 0;
|
||||
} else if (isupper(c))
|
||||
c = tolower(c);
|
||||
break;
|
||||
}
|
||||
if (imeta(c)) {
|
||||
*ptr2++ = Meta;
|
||||
*ptr2++ = c ^ 32;
|
||||
} else
|
||||
*ptr2++ = c;
|
||||
}
|
||||
*ptr2 = '\0';
|
||||
return str2;
|
||||
}
|
||||
|
||||
/**/
|
||||
|
@ -1644,26 +1732,6 @@ getargs(Histent elist, int arg1, int arg2)
|
|||
return dupstrpfx(elist->node.nam + pos1, words[2*arg2+1] - pos1);
|
||||
}
|
||||
|
||||
/**/
|
||||
void
|
||||
upcase(char **x)
|
||||
{
|
||||
char *pp = *(char **)x;
|
||||
|
||||
for (; *pp; pp++)
|
||||
*pp = tuupper(*pp);
|
||||
}
|
||||
|
||||
/**/
|
||||
void
|
||||
downcase(char **x)
|
||||
{
|
||||
char *pp = *(char **)x;
|
||||
|
||||
for (; *pp; pp++)
|
||||
*pp = tulower(*pp);
|
||||
}
|
||||
|
||||
/**/
|
||||
int
|
||||
quote(char **tr)
|
||||
|
|
|
@ -2014,7 +2014,7 @@ bin_kill(char *nam, char **argv, UNUSED(Options ops), UNUSED(int func))
|
|||
return 1;
|
||||
} else
|
||||
signame = *argv;
|
||||
makeuppercase(&signame);
|
||||
signame = casemodify(signame, CASMOD_UPPER);
|
||||
if (!strncmp(signame, "SIG", 3))
|
||||
signame+=3;
|
||||
|
||||
|
|
|
@ -1644,17 +1644,12 @@ charrefinc(char **x, char *y)
|
|||
}
|
||||
|
||||
|
||||
#ifndef PARAMETER_CODE_HANDLES_MULTIBYTE
|
||||
/*
|
||||
* TODO: We should use the other branch, but currently
|
||||
* the parameter code doesn't handle multibyte input,
|
||||
* so this would produce the wrong subscripts,
|
||||
* so just use a raw byte difference for now.
|
||||
* Counter the number of characters between two pointers, smaller first
|
||||
*
|
||||
* This is used when setting values in parameters, so we obey
|
||||
* the MULTIBYTE option (even if it's been overridden locally).
|
||||
*/
|
||||
/* Counter the number of characters between two pointers, smaller first */
|
||||
# define CHARSUB(x,y) ((y) - (x))
|
||||
#else
|
||||
/* Counter the number of characters between two pointers, smaller first */
|
||||
#define CHARSUB(x,y) charsub(x, y)
|
||||
static ptrdiff_t
|
||||
charsub(char *x, char *y)
|
||||
|
@ -1663,6 +1658,9 @@ charsub(char *x, char *y)
|
|||
size_t ret;
|
||||
wchar_t wc;
|
||||
|
||||
if (!isset(MULTIBYTE))
|
||||
return y - x;
|
||||
|
||||
while (x < y) {
|
||||
ret = mbrtowc(&wc, x, y-x, &shiftstate);
|
||||
|
||||
|
@ -1674,13 +1672,12 @@ charsub(char *x, char *y)
|
|||
/* Treat nulls as normal characters */
|
||||
if (!ret)
|
||||
ret = 1;
|
||||
res += ret;
|
||||
res++;
|
||||
x += ret;
|
||||
}
|
||||
|
||||
return res;
|
||||
}
|
||||
#endif
|
||||
|
||||
#else /* no MULTIBYTE_SUPPORT */
|
||||
|
||||
|
|
67
Src/subst.c
67
Src/subst.c
|
@ -1019,7 +1019,7 @@ paramsubst(LinkList l, LinkNode n, char **str, int qt, int ssub)
|
|||
/* (u): straightforward. */
|
||||
int unique = 0;
|
||||
/* combination of (L), (U) and (C) flags. */
|
||||
int casmod = 0;
|
||||
int casmod = CASMOD_NONE;
|
||||
/*
|
||||
* quotemod says we are doing either (q) (positive), (Q) (negative)
|
||||
* or not (0). quotetype counts the q's for the first case.
|
||||
|
@ -1211,13 +1211,13 @@ paramsubst(LinkList l, LinkNode n, char **str, int qt, int ssub)
|
|||
break;
|
||||
|
||||
case 'L':
|
||||
casmod = 2;
|
||||
casmod = CASMOD_LOWER;
|
||||
break;
|
||||
case 'U':
|
||||
casmod = 1;
|
||||
casmod = CASMOD_UPPER;
|
||||
break;
|
||||
case 'C':
|
||||
casmod = 3;
|
||||
casmod = CASMOD_CAPS;
|
||||
break;
|
||||
|
||||
case 'o':
|
||||
|
@ -1819,17 +1819,13 @@ paramsubst(LinkList l, LinkNode n, char **str, int qt, int ssub)
|
|||
break;
|
||||
}
|
||||
switch (v->pm->node.flags & (PM_LOWER | PM_UPPER)) {
|
||||
char *t;
|
||||
|
||||
case PM_LOWER:
|
||||
t = val;
|
||||
for (; (c = *t); t++)
|
||||
*t = tulower(c);
|
||||
val = casemodify(val, CASMOD_LOWER);
|
||||
copied = 1;
|
||||
break;
|
||||
case PM_UPPER:
|
||||
t = val;
|
||||
for (; (c = *t); t++)
|
||||
*t = tuupper(c);
|
||||
val = casemodify(val, CASMOD_UPPER);
|
||||
copied = 1;
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
@ -2316,14 +2312,14 @@ paramsubst(LinkList l, LinkNode n, char **str, int qt, int ssub)
|
|||
|
||||
if (isarr) {
|
||||
char **ctr;
|
||||
int sl = sep ? ztrlen(sep) : 1;
|
||||
int sl = sep ? MB_METASTRLEN(sep) : 1;
|
||||
|
||||
if (getlen == 1)
|
||||
for (ctr = aval; *ctr; ctr++, len++);
|
||||
else if (getlen == 2) {
|
||||
if (*aval)
|
||||
for (len = -sl, ctr = aval;
|
||||
len += sl + ztrlen(*ctr), *++ctr;);
|
||||
len += sl + MB_METASTRLEN(*ctr), *++ctr;);
|
||||
}
|
||||
else
|
||||
for (ctr = aval;
|
||||
|
@ -2331,7 +2327,7 @@ paramsubst(LinkList l, LinkNode n, char **str, int qt, int ssub)
|
|||
len += wordcount(*ctr, spsep, getlen > 3), ctr++);
|
||||
} else {
|
||||
if (getlen < 3)
|
||||
len = ztrlen(val);
|
||||
len = MB_METASTRLEN(val);
|
||||
else
|
||||
len = wordcount(val, spsep, getlen > 3);
|
||||
}
|
||||
|
@ -2387,33 +2383,19 @@ paramsubst(LinkList l, LinkNode n, char **str, int qt, int ssub)
|
|||
/*
|
||||
* Perform case modififications.
|
||||
*/
|
||||
if (casmod) {
|
||||
if (casmod != CASMOD_NONE) {
|
||||
copied = 1; /* string is always modified by copy */
|
||||
if (isarr) {
|
||||
char **ap;
|
||||
char **ap, **ap2;
|
||||
|
||||
if (!copied)
|
||||
aval = arrdup(aval), copied = 1;
|
||||
ap = aval;
|
||||
ap2 = aval = (char **) zhalloc(sizeof(char *) * (arrlen(aval)+1));
|
||||
|
||||
if (casmod == 1)
|
||||
for (; *ap; ap++)
|
||||
makeuppercase(ap);
|
||||
else if (casmod == 2)
|
||||
for (; *ap; ap++)
|
||||
makelowercase(ap);
|
||||
else
|
||||
for (; *ap; ap++)
|
||||
makecapitals(ap);
|
||||
|
||||
while (*ap)
|
||||
*ap2++ = casemodify(*ap++, casmod);
|
||||
*ap2++ = NULL;
|
||||
} else {
|
||||
if (!copied)
|
||||
val = dupstring(val), copied = 1;
|
||||
if (casmod == 1)
|
||||
makeuppercase(&val);
|
||||
else if (casmod == 2)
|
||||
makelowercase(&val);
|
||||
else
|
||||
makecapitals(&val);
|
||||
val = casemodify(val, casmod);
|
||||
}
|
||||
}
|
||||
/*
|
||||
|
@ -2975,7 +2957,8 @@ modify(char **str, char **ptr)
|
|||
for (t = e = *str; (tt = findword(&e, sep));) {
|
||||
tc = *e;
|
||||
*e = '\0';
|
||||
copy = dupstring(tt);
|
||||
if (c != 'l' && c != 'u')
|
||||
copy = dupstring(tt);
|
||||
*e = tc;
|
||||
switch (c) {
|
||||
case 'h':
|
||||
|
@ -2991,10 +2974,10 @@ modify(char **str, char **ptr)
|
|||
remlpaths(©);
|
||||
break;
|
||||
case 'l':
|
||||
downcase(©);
|
||||
copy = casemodify(tt, CASMOD_LOWER);
|
||||
break;
|
||||
case 'u':
|
||||
upcase(©);
|
||||
copy = casemodify(tt, CASMOD_UPPER);
|
||||
break;
|
||||
case 's':
|
||||
if (hsubl && hsubr)
|
||||
|
@ -3050,10 +3033,10 @@ modify(char **str, char **ptr)
|
|||
remlpaths(str);
|
||||
break;
|
||||
case 'l':
|
||||
downcase(str);
|
||||
*str = casemodify(*str, CASMOD_LOWER);
|
||||
break;
|
||||
case 'u':
|
||||
upcase(str);
|
||||
*str = casemodify(*str, CASMOD_UPPER);
|
||||
break;
|
||||
case 's':
|
||||
if (hsubl && hsubr) {
|
||||
|
|
20
Src/utils.c
20
Src/utils.c
|
@ -3687,7 +3687,7 @@ static mbstate_t mb_shiftstate;
|
|||
|
||||
/*
|
||||
* Initialise multibyte state: called before a sequence of
|
||||
* mb_metacharlen().
|
||||
* mb_metacharlenconv().
|
||||
*/
|
||||
|
||||
/**/
|
||||
|
@ -3703,18 +3703,24 @@ mb_metacharinit(void)
|
|||
* but character is not valid (e.g. possibly incomplete at end of string).
|
||||
* Returned value is guaranteed not to reach beyond the end of the
|
||||
* string (assuming correct metafication).
|
||||
*
|
||||
* If wcp is not NULL, the converted wide character is stored there.
|
||||
* If no conversion could be done WEOF is used.
|
||||
*/
|
||||
|
||||
/**/
|
||||
int
|
||||
mb_metacharlen(char *s)
|
||||
mb_metacharlenconv(char *s, wint_t *wcp)
|
||||
{
|
||||
char inchar, *ptr;
|
||||
size_t ret;
|
||||
wchar_t wc;
|
||||
|
||||
if (!isset(MULTIBYTE))
|
||||
if (!isset(MULTIBYTE)) {
|
||||
if (wcp)
|
||||
*wcp = WEOF;
|
||||
return 1 + (*s == Meta);
|
||||
}
|
||||
|
||||
ret = MB_INVALID;
|
||||
for (ptr = s; *ptr; ) {
|
||||
|
@ -3729,14 +3735,18 @@ mb_metacharlen(char *s)
|
|||
break;
|
||||
if (ret == MB_INCOMPLETE)
|
||||
continue;
|
||||
if (wcp)
|
||||
*wcp = wc;
|
||||
return ptr - s;
|
||||
}
|
||||
|
||||
if (wcp)
|
||||
*wcp = WEOF;
|
||||
/* No valid multibyte sequence */
|
||||
memset(&mb_shiftstate, 0, sizeof(mb_shiftstate));
|
||||
if (ptr > s)
|
||||
if (ptr > s) {
|
||||
return 1 + (*s == Meta); /* Treat as single byte character */
|
||||
else
|
||||
} else
|
||||
return 0; /* Probably shouldn't happen */
|
||||
}
|
||||
|
||||
|
|
13
Src/zsh.h
13
Src/zsh.h
|
@ -1882,6 +1882,17 @@ struct heap {
|
|||
#define ZSIG_ALIAS (1<<3) /* Trap is stored under an alias */
|
||||
#define ZSIG_SHIFT 4
|
||||
|
||||
/************************/
|
||||
/* Flags to casemodifiy */
|
||||
/************************/
|
||||
|
||||
enum {
|
||||
CASMOD_NONE, /* dummy for tests */
|
||||
CASMOD_UPPER,
|
||||
CASMOD_LOWER,
|
||||
CASMOD_CAPS
|
||||
};
|
||||
|
||||
/**********************************/
|
||||
/* Flags to third argument of zle */
|
||||
/**********************************/
|
||||
|
@ -1927,7 +1938,7 @@ typedef char *(*ZleGetLineFn) _((int *, int *));
|
|||
#ifdef MULTIBYTE_SUPPORT
|
||||
#define nicezputs(str, outs) (void)mb_niceformat((str), (outs), NULL, 0)
|
||||
#define MB_METACHARINIT() mb_metacharinit()
|
||||
#define MB_METACHARLEN(str) mb_metacharlen(str)
|
||||
#define MB_METACHARLEN(str) mb_metacharlenconv(str, NULL)
|
||||
#define MB_METASTRLEN(str) mb_metastrlen(str)
|
||||
|
||||
#define MB_INCOMPLETE ((size_t)-2)
|
||||
|
|
|
@ -121,3 +121,37 @@
|
|||
|
||||
# Starting offsets with (R) seem to be so strange as to be hardly
|
||||
# worth testing.
|
||||
|
||||
setopt extendedglob
|
||||
[[ $a = (#b)t(én)(éb)reux ]] || print "Failed to match." >&2
|
||||
for i in {1..${#match}}; do
|
||||
print $match[i] $mbegin[i] $mend[i] ${a[$mbegin[i],$mend[i]]}
|
||||
done
|
||||
0:Multibyte offsets in pattern tests
|
||||
>én 2 3 én
|
||||
>éb 4 5 éb
|
||||
|
||||
b=${(U)a}
|
||||
print $b
|
||||
print ${(L)b}
|
||||
desdichado="Je suis le $a, le veuf, l'inconsolé"
|
||||
print ${(C)desdichado}
|
||||
lxiv="l'état c'est moi"
|
||||
print ${(C)lxiv}
|
||||
0:Case modification of multibyte strings
|
||||
>TÉNÉBREUX
|
||||
>ténébreux
|
||||
>Je Suis Le Ténébreux, Le Veuf, L'Inconsolé
|
||||
>L'État C'Est Moi
|
||||
|
||||
array=(ølaf ødd øpened án encyclopædia)
|
||||
barray=(${(U)array})
|
||||
print $barray
|
||||
print ${(L)barray}
|
||||
print ${(C)array}
|
||||
print ${(C)barray}
|
||||
0:Case modification of arrays with multibyte strings
|
||||
>ØLAF ØDD ØPENED ÁN ENCYCLOPÆDIA
|
||||
>ølaf ødd øpened án encyclopædia
|
||||
>Ølaf Ødd Øpened Án Encyclopædia
|
||||
>Ølaf Ødd Øpened Án Encyclopædia
|
||||
|
|
Loading…
Reference in New Issue