1
0
Fork 0
mirror of git://git.code.sf.net/p/zsh/code synced 2024-06-08 00:06:04 +02:00

22952: fix some argument delimiters to work with multibyte characters

This commit is contained in:
Peter Stephenson 2006-11-02 18:43:19 +00:00
parent d94e67d6fd
commit d8207acddb
6 changed files with 196 additions and 75 deletions

View File

@ -1,5 +1,11 @@
2006-11-02 Peter Stephenson <pws@csr.com>
* 22952: Src/glob.c, Src/params.c, Src/subst.c,
Test/D04parameter.ztst: fix multibyte delimiters for
arguments to parameter flags and substitution modifiers
in parameters and glob qualifiers (but not yet substitution
modifiers in history).
* 22950: Src/Zle/zle_tricky.c: starting menu completion
with reverse-menu-complete used the first match instead
of the last.

View File

@ -1243,9 +1243,10 @@ zglob(LinkList list, LinkNode np, int nountok)
else {
/* ... or a user name */
char sav, *tt;
int arglen;
/* Find matching delimiters */
tt = get_strarg(s);
tt = get_strarg(s, &arglen);
if (!*tt) {
zerr("missing end of name");
data = 0;
@ -1255,7 +1256,7 @@ zglob(LinkList list, LinkNode np, int nountok)
sav = *tt;
*tt = '\0';
if ((pw = getpwnam(s + 1)))
if ((pw = getpwnam(s + arglen)))
data = pw->pw_uid;
else {
zerr("unknown user");
@ -1268,7 +1269,7 @@ zglob(LinkList list, LinkNode np, int nountok)
data = 0;
#endif /* !USE_GETPWNAM */
if (sav)
s = tt + 1;
s = tt + arglen;
else
s = tt;
}
@ -1283,8 +1284,9 @@ zglob(LinkList list, LinkNode np, int nountok)
else {
/* ...or a delimited group name. */
char sav, *tt;
int arglen;
tt = get_strarg(s);
tt = get_strarg(s, &arglen);
if (!*tt) {
zerr("missing end of name");
data = 0;
@ -1294,7 +1296,7 @@ zglob(LinkList list, LinkNode np, int nountok)
sav = *tt;
*tt = '\0';
if ((gr = getgrnam(s + 1)))
if ((gr = getgrnam(s + arglen)))
data = gr->gr_gid;
else {
zerr("unknown group");
@ -1307,7 +1309,7 @@ zglob(LinkList list, LinkNode np, int nountok)
data = 0;
#endif /* !USE_GETGRNAM */
if (sav)
s = tt + 1;
s = tt + arglen;
else
s = tt;
}
@ -1438,8 +1440,7 @@ zglob(LinkList list, LinkNode np, int nountok)
tt = NULL;
}
} else {
plus = 1;
tt = get_strarg(s);
tt = get_strarg(s, &plus);
if (!*tt)
{
zerr("missing end of string");

View File

@ -947,7 +947,7 @@ getarg(char **str, int *inv, Value v, int a2, zlong *w,
int *prevcharlen, int *nextcharlen)
{
int hasbeg = 0, word = 0, rev = 0, ind = 0, down = 0, l, i, ishash;
int keymatch = 0, needtok = 0;
int keymatch = 0, needtok = 0, arglen;
char *s = *str, *sep = NULL, *t, sav, *d, **ta, **p, *tt, c;
zlong num = 1, beg = 0, r = 0;
Patprog pprog = NULL;
@ -1004,28 +1004,28 @@ getarg(char **str, int *inv, Value v, int a2, zlong *w,
* special interpretation by getindex() of `*' or `@'. */
break;
case 'n':
t = get_strarg(++s);
t = get_strarg(++s, &arglen);
if (!*t)
goto flagerr;
sav = *t;
*t = '\0';
num = mathevalarg(s + 1, &d);
num = mathevalarg(s + arglen, &d);
if (!num)
num = 1;
*t = sav;
s = t;
s = t + arglen - 1;
break;
case 'b':
hasbeg = 1;
t = get_strarg(++s);
t = get_strarg(++s, &arglen);
if (!*t)
goto flagerr;
sav = *t;
*t = '\0';
if ((beg = mathevalarg(s + 1, &d)) > 0)
if ((beg = mathevalarg(s + arglen, &d)) > 0)
beg--;
*t = sav;
s = t;
s = t + arglen - 1;
break;
case 'p':
escapes = 1;
@ -1033,15 +1033,16 @@ getarg(char **str, int *inv, Value v, int a2, zlong *w,
case 's':
/* This gives the string that separates words *
* (for use with the `w' flag). */
t = get_strarg(++s);
t = get_strarg(++s, &arglen);
if (!*t)
goto flagerr;
sav = *t;
*t = '\0';
sep = escapes ? getkeystring(s + 1, &waste, GETKEYS_SEP, NULL)
: dupstring(s + 1);
s += arglen;
sep = escapes ? getkeystring(s, &waste, GETKEYS_SEP, NULL)
: dupstring(s);
*t = sav;
s = t;
s = t + arglen - 1;
break;
default:
flagerr:

View File

@ -1137,62 +1137,113 @@ dopadding(char *str, int prenum, int postnum, char *preone, char *postone,
return ret;
}
/*
* Look for a delimited portion of a string. The first (possibly
* multibyte) character at s is the delimiter. Various forms
* of brackets are treated separately, as documented.
*
* Returns a pointer to the final delimiter. Sets *len to the
* length of the final delimiter; a NULL causes *len to be set
* to zero since we shouldn't advance past it. (The string is
* tokenized, so a NULL is a real end of string.)
*/
/**/
char *
get_strarg(char *s)
get_strarg(char *s, int *lenp)
{
char t = *s++;
convchar_t del;
int len;
char tok = 0;
if (!t)
return s - 1;
MB_METACHARINIT();
len = MB_METACHARLENCONV(s, &del);
if (!len) {
*lenp = 0;
return s;
}
switch (t) {
case '(':
t = ')';
#ifdef MULTIBYTE_SUPPORT
if (del == WEOF)
del = (wint_t)((*s == Meta) ? s[1] ^ 32 : *s);
#endif
s += len;
switch (del) {
case ZWC('('):
del = ZWC(')');
break;
case '[':
t = ']';
del = ZWC(']');
break;
case '{':
t = '}';
del = ZWC('}');
break;
case '<':
t = '>';
del = ZWC('>');
break;
case Inpar:
t = Outpar;
tok = Outpar;
break;
case Inang:
t = Outang;
tok = Outang;
break;
case Inbrace:
t = Outbrace;
tok = Outbrace;
break;
case Inbrack:
t = Outbrack;
tok = Outbrack;
break;
}
while (*s && *s != t)
s++;
if (tok) {
/*
* Looking for a matching token; we want the literal byte,
* not a decoded multibyte character, so search specially.
*/
while (*s && *s != tok)
s++;
} else {
convchar_t del2;
len = 0;
while (*s) {
len = MB_METACHARLENCONV(s, &del2);
#ifdef MULTIBYTE_SUPPORT
if (del2 == WEOF)
del2 = (wint_t)((*s == Meta) ? s[1] ^ 32 : *s);
#endif
if (del == del2)
break;
s += len;
}
}
*lenp = len;
return s;
}
/*
* Get an integer argument; update *s to the end of the
* final delimiter. *delmatchp is set to 1 if we have matching
* delimiters and there was no error in the evaluation, else 0.
*/
/**/
static int
get_intarg(char **s)
get_intarg(char **s, int *delmatchp)
{
char *t = get_strarg(*s + 1);
int arglen;
char *t = get_strarg(*s, &arglen);
char *p, sav;
zlong ret;
*delmatchp = 0;
if (!*t)
return -1;
sav = *t;
*t = '\0';
p = dupstring(*s + 2);
*s = t;
p = dupstring(*s + arglen);
*s = t + arglen;
*t = sav;
if (parsestr(p))
return -1;
@ -1204,6 +1255,7 @@ get_intarg(char **s)
return -1;
if (ret < 0)
ret = -ret;
*delmatchp = 1;
return ret < 0 ? -ret : ret;
}
@ -1540,8 +1592,8 @@ paramsubst(LinkList l, LinkNode n, char **str, int qt, int ssub)
int escapes = 0;
int klen;
#define UNTOK(C) (itok(C) ? ztokens[(C) - Pound] : (C))
#define UNTOK_AND_ESCAPE(X) {\
untokenize(X = dupstring(s + 1));\
#define UNTOK_AND_ESCAPE(X, S) {\
untokenize(X = dupstring(S));\
if (escapes) {\
X = getkeystring(X, &klen, GETKEYS_SEP, NULL);\
X = metafy(X, klen, META_HREALLOC);\
@ -1549,6 +1601,9 @@ paramsubst(LinkList l, LinkNode n, char **str, int qt, int ssub)
}
for (s++; (c = *s) != ')' && c != Outpar; s++, tt = 0) {
int arglen; /* length of modifier argument */
int delmatch; /* integer delimiters matched OK */
switch (c) {
case ')':
case Outpar:
@ -1578,9 +1633,11 @@ paramsubst(LinkList l, LinkNode n, char **str, int qt, int ssub)
flags |= SUB_SUBSTR;
break;
case 'I':
flnum = get_intarg(&s);
s++;
flnum = get_intarg(&s, &delmatch);
if (flnum < 0)
goto flagerr;
s--;
break;
case 'L':
@ -1658,16 +1715,16 @@ paramsubst(LinkList l, LinkNode n, char **str, int qt, int ssub)
tt = 1;
/* fall through */
case 'j':
t = get_strarg(++s);
t = get_strarg(++s, &arglen);
if (*t) {
sav = *t;
*t = '\0';
if (tt)
UNTOK_AND_ESCAPE(spsep)
UNTOK_AND_ESCAPE(spsep, s + arglen)
else
UNTOK_AND_ESCAPE(sep)
UNTOK_AND_ESCAPE(sep, s + arglen)
*t = sav;
s = t;
s = t + arglen - 1;
} else
goto flagerr;
break;
@ -1676,43 +1733,43 @@ paramsubst(LinkList l, LinkNode n, char **str, int qt, int ssub)
tt = 1;
/* fall through */
case 'r':
sav = s[1];
num = get_intarg(&s);
s++;
num = get_intarg(&s, &delmatch);
if (num < 0)
goto flagerr;
if (tt)
prenum = num;
else
postnum = num;
if (UNTOK(s[1]) != UNTOK(sav))
if (!delmatch)
break;
t = get_strarg(++s);
t = get_strarg(s, &arglen);
if (!*t)
goto flagerr;
sav = *t;
*t = '\0';
if (tt)
UNTOK_AND_ESCAPE(premul)
UNTOK_AND_ESCAPE(premul, s + arglen)
else
UNTOK_AND_ESCAPE(postmul)
UNTOK_AND_ESCAPE(postmul, s + arglen)
*t = sav;
sav = *s;
s = t + 1;
s = t + arglen;
if (UNTOK(*s) != UNTOK(sav)) {
s--;
break;
}
t = get_strarg(s);
t = get_strarg(s, &arglen);
if (!*t)
goto flagerr;
sav = *t;
*t = '\0';
if (tt)
UNTOK_AND_ESCAPE(preone)
UNTOK_AND_ESCAPE(preone, s + arglen)
else
UNTOK_AND_ESCAPE(postone)
UNTOK_AND_ESCAPE(postone, s + arglen)
*t = sav;
s = t;
s = t + arglen - 1;
break;
case 'm':
@ -3251,9 +3308,10 @@ arithsubst(char *a, char **bptr, char *rest)
void
modify(char **str, char **ptr)
{
char *ptr1, *ptr2, *ptr3, del, *lptr, c, *test, *sep, *t, *tt, tc, *e;
char *copy, *all, *tmp, sav;
int gbal, wall, rec, al, nl;
char *ptr1, *ptr2, *ptr3, *lptr, c, *test, *sep, *t, *tt, tc, *e;
char *copy, *all, *tmp, sav, sav1, *ptr1end;
int gbal, wall, rec, al, nl, charlen, delmatch;
convchar_t del;
test = NULL;
@ -3282,20 +3340,48 @@ modify(char **str, char **ptr)
break;
case 's':
/* TODO: multibyte delimiter */
c = **ptr;
(*ptr)++;
ptr1 = *ptr;
del = *ptr1++;
for (ptr2 = ptr1; *ptr2 != del && *ptr2; ptr2++);
MB_METACHARINIT();
charlen = MB_METACHARLENCONV(ptr1, &del);
#ifdef MULTIBYTE_SUPPORT
if (del == WEOF)
del = (wint_t)((*ptr1 == Meta) ? ptr1[1] ^ 32 : *ptr1);
#endif
ptr1 += charlen;
for (ptr2 = ptr1, charlen = 0; *ptr2; ptr2 += charlen) {
convchar_t del2;
charlen = MB_METACHARLENCONV(ptr2, &del2);
#ifdef MULTIBYTE_SUPPORT
if (del2 == WEOF)
del2 = (wint_t)((*ptr2 == Meta) ?
ptr2[1] ^ 32 : *ptr2);
#endif
if (del2 == del)
break;
}
if (!*ptr2) {
zerr("bad substitution");
return;
}
*ptr2++ = '\0';
for (ptr3 = ptr2; *ptr3 != del && *ptr3; ptr3++);
if ((sav = *ptr3))
*ptr3++ = '\0';
ptr1end = ptr2;
ptr2 += charlen;
sav1 = *ptr1end;
*ptr1end = '\0';
for (ptr3 = ptr2, charlen = 0; *ptr3; ptr3 += charlen) {
convchar_t del3;
charlen = MB_METACHARLENCONV(ptr3, &del3);
#ifdef MULTIBYTE_SUPPORT
if (del3 == WEOF)
del3 = (wint_t)((*ptr3 == Meta) ?
ptr3[1] ^ 32 : *ptr3);
#endif
if (del3 == del)
break;
}
sav = *ptr3;
*ptr3 = '\0';
if (*ptr1) {
zsfree(hsubl);
hsubl = ztrdup(ptr1);
@ -3313,10 +3399,9 @@ modify(char **str, char **ptr)
for (tt = hsubr = ztrdup(ptr2); *tt; tt++)
if (inull(*tt) && *tt != Bnullkeep)
chuck(tt--);
ptr2[-1] = del;
if (sav)
ptr3[-1] = sav;
*ptr = ptr3 - 1;
*ptr1end = sav1;
*ptr3 = sav;
*ptr = ptr3 + charlen - 1;
break;
case '&':
@ -3335,13 +3420,13 @@ modify(char **str, char **ptr)
case 'W':
wall = 1;
(*ptr)++;
ptr1 = get_strarg(ptr2 = *ptr);
ptr1 = get_strarg(ptr2 = *ptr, &charlen);
if ((sav = *ptr1))
*ptr1 = '\0';
sep = dupstring(ptr2 + 1);
sep = dupstring(ptr2 + charlen);
if (sav)
*ptr1 = sav;
*ptr = ptr1 + 1;
*ptr = ptr1 + charlen;
c = '\0';
break;
@ -3350,8 +3435,8 @@ modify(char **str, char **ptr)
(*ptr)++;
break;
case 'F':
rec = get_intarg(ptr);
(*ptr)++;
rec = get_intarg(ptr, &delmatch);
break;
default:
*ptr = lptr;

View File

@ -867,3 +867,17 @@
>andsomekept
>andsomekept
file=/one/two/three/four
print ${file:fh}
print ${file:F.1.h}
print ${file:F+2+h}
print ${file:F(3)h}
print ${file:F<4>h}
print ${file:F{5}h}
0:Modifiers with repetition
>/
>/one/two/three
>/one/two
>/one
>/
>/

View File

@ -297,3 +297,17 @@
>«κατέβην ¥«χθὲς»£ ¥¥«εἰς»£ «Πειραιᾶ
>ςκατέβην ηςχθὲςΓλ τηςεἰςΓλ ςΠειραιᾶ
# er... yeah, that looks right...
foo=picobarn
print ${foo:s£bar£rod£:s¥rod¥stick¥}
0:Delimiters in modifiers
>picostickn
# TODO: if we get paired multibyte bracket delimiters to work
# (as Emacs does, the smug so-and-so), the following should change.
foo=bar
print ${(r£5£¥X¥)foo}
print ${(l«10«»Y»£HI£)foo}
0:Delimiters in parameter flags
>barXX
>YYYYYHIbar