mirror of
git://git.code.sf.net/p/zsh/code
synced 2024-06-08 00:06:04 +02:00
20500: Unmetafy patterns where possible and other minor pattern fixes
This commit is contained in:
parent
bc704ef05c
commit
b115ca307a
|
@ -1,3 +1,11 @@
|
||||||
|
2004-10-18 Peter Stephenson <pws@csr.com>
|
||||||
|
|
||||||
|
* 20500: Misc/globtests, Src/glob.c, Src/pattern.c, Src/zsh.h,
|
||||||
|
Src/Zle/complist.c, Test/D02glob.ztst: Use unmetafied strings
|
||||||
|
in patterns more; improve glob.c/pattern.c interface; fix
|
||||||
|
minor <num-> issue; add bogus quotation from Proust (it's
|
||||||
|
my file and I'll do what I like with it).
|
||||||
|
|
||||||
2004-10-17 Wayne Davison <wayned@users.sourceforge.net>
|
2004-10-17 Wayne Davison <wayned@users.sourceforge.net>
|
||||||
|
|
||||||
* 20496: Src/utils.c: made zclose() not call close() when the
|
* 20496: Src/utils.c: made zclose() not call close() when the
|
||||||
|
|
|
@ -134,6 +134,9 @@ t 633 <1-1000>33
|
||||||
t 633 <-1000>33
|
t 633 <-1000>33
|
||||||
t 633 <1->33
|
t 633 <1->33
|
||||||
t 633 <->33
|
t 633 <->33
|
||||||
|
# An open top end of a range will match any integer, even
|
||||||
|
# if not representable in the internal integer type.
|
||||||
|
t 12345678901234567890123456789012345678901234567890123456789012345678901234567890foo <42->foo
|
||||||
# Approximate matching
|
# Approximate matching
|
||||||
t READ.ME (#ia1)readme
|
t READ.ME (#ia1)readme
|
||||||
f READ..ME (#ia1)readme
|
f READ..ME (#ia1)readme
|
||||||
|
|
|
@ -601,7 +601,7 @@ putmatchcol(Listcols c, char *group, char *n)
|
||||||
|
|
||||||
for (pc = c->pats; pc; pc = pc->next)
|
for (pc = c->pats; pc; pc = pc->next)
|
||||||
if ((!pc->prog || !group || pattry(pc->prog, group)) &&
|
if ((!pc->prog || !group || pattry(pc->prog, group)) &&
|
||||||
pattryrefs(pc->pat, n, &nrefs, begpos, endpos)) {
|
pattryrefs(pc->pat, n, -1, 0, &nrefs, begpos, endpos)) {
|
||||||
if (pc->cols[1]) {
|
if (pc->cols[1]) {
|
||||||
patcols = pc->cols;
|
patcols = pc->cols;
|
||||||
|
|
||||||
|
@ -639,7 +639,7 @@ putfilecol(Listcols c, char *group, char *n, mode_t m)
|
||||||
|
|
||||||
for (pc = c->pats; pc; pc = pc->next)
|
for (pc = c->pats; pc; pc = pc->next)
|
||||||
if ((!pc->prog || !group || pattry(pc->prog, group)) &&
|
if ((!pc->prog || !group || pattry(pc->prog, group)) &&
|
||||||
pattryrefs(pc->pat, n, &nrefs, begpos, endpos)) {
|
pattryrefs(pc->pat, n, -1, 0, &nrefs, begpos, endpos)) {
|
||||||
if (pc->cols[1]) {
|
if (pc->cols[1]) {
|
||||||
patcols = pc->cols;
|
patcols = pc->cols;
|
||||||
|
|
||||||
|
|
94
Src/glob.c
94
Src/glob.c
|
@ -2195,8 +2195,13 @@ set_pat_end(Patprog p, char null_me)
|
||||||
static int
|
static int
|
||||||
igetmatch(char **sp, Patprog p, int fl, int n, char *replstr)
|
igetmatch(char **sp, Patprog p, int fl, int n, char *replstr)
|
||||||
{
|
{
|
||||||
char *s = *sp, *t, sav;
|
char *s = *sp, *t;
|
||||||
int i, l = strlen(*sp), ml = ztrlen(*sp), matched = 1;
|
/*
|
||||||
|
* Note that ioff and ml count characters in the character
|
||||||
|
* set (Meta's are not included), while l counts characters in the
|
||||||
|
* string.
|
||||||
|
*/
|
||||||
|
int ioff, l = strlen(*sp), ml = ztrlen(*sp), matched = 1;
|
||||||
|
|
||||||
repllist = NULL;
|
repllist = NULL;
|
||||||
|
|
||||||
|
@ -2208,7 +2213,7 @@ igetmatch(char **sp, Patprog p, int fl, int n, char *replstr)
|
||||||
p->flags &= ~(PAT_NOTSTART|PAT_NOTEND);
|
p->flags &= ~(PAT_NOTSTART|PAT_NOTEND);
|
||||||
|
|
||||||
if (fl & SUB_ALL) {
|
if (fl & SUB_ALL) {
|
||||||
i = matched && pattry(p, s);
|
int i = matched && pattry(p, s);
|
||||||
*sp = get_match_ret(*sp, 0, i ? l : 0, fl, i ? replstr : 0);
|
*sp = get_match_ret(*sp, 0, i ? l : 0, fl, i ? replstr : 0);
|
||||||
if (! **sp && (((fl & SUB_MATCH) && !i) || ((fl & SUB_REST) && i)))
|
if (! **sp && (((fl & SUB_MATCH) && !i) || ((fl & SUB_REST) && i)))
|
||||||
return 0;
|
return 0;
|
||||||
|
@ -2223,25 +2228,22 @@ igetmatch(char **sp, Patprog p, int fl, int n, char *replstr)
|
||||||
* First get the longest match...
|
* First get the longest match...
|
||||||
*/
|
*/
|
||||||
if (pattry(p, s)) {
|
if (pattry(p, s)) {
|
||||||
char *mpos = patinput;
|
/* patmatchlen returns metafied length, as we need */
|
||||||
|
int mlen = patmatchlen();
|
||||||
if (!(fl & SUB_LONG) && !(p->flags & PAT_PURES)) {
|
if (!(fl & SUB_LONG) && !(p->flags & PAT_PURES)) {
|
||||||
/*
|
/*
|
||||||
* ... now we know whether it's worth looking for the
|
* ... now we know whether it's worth looking for the
|
||||||
* shortest, which we do by brute force.
|
* shortest, which we do by brute force.
|
||||||
*/
|
*/
|
||||||
for (t = s; t < mpos; METAINC(t)) {
|
for (t = s; t < s + mlen; METAINC(t)) {
|
||||||
sav = *t;
|
set_pat_end(p, *t);
|
||||||
set_pat_end(p, sav);
|
if (pattrylen(p, s, t - s, 0)) {
|
||||||
*t = '\0';
|
mlen = patmatchlen();
|
||||||
if (pattry(p, s)) {
|
|
||||||
mpos = patinput;
|
|
||||||
*t = sav;
|
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
*t = sav;
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
*sp = get_match_ret(*sp, 0, mpos-s, fl, replstr);
|
*sp = get_match_ret(*sp, 0, mlen, fl, replstr);
|
||||||
return 1;
|
return 1;
|
||||||
}
|
}
|
||||||
break;
|
break;
|
||||||
|
@ -2250,35 +2252,30 @@ igetmatch(char **sp, Patprog p, int fl, int n, char *replstr)
|
||||||
/* Smallest possible match at tail of string: *
|
/* Smallest possible match at tail of string: *
|
||||||
* move back down string until we get a match. *
|
* move back down string until we get a match. *
|
||||||
* There's no optimization here. */
|
* There's no optimization here. */
|
||||||
patoffset = ml;
|
for (ioff = ml, t = s + l; t >= s; t--, ioff--) {
|
||||||
for (t = s + l; t >= s; t--, patoffset--) {
|
|
||||||
set_pat_start(p, t-s);
|
set_pat_start(p, t-s);
|
||||||
if (pattry(p, t)) {
|
if (pattrylen(p, t, -1, ioff)) {
|
||||||
*sp = get_match_ret(*sp, t - s, l, fl, replstr);
|
*sp = get_match_ret(*sp, t - s, l, fl, replstr);
|
||||||
patoffset = 0;
|
|
||||||
return 1;
|
return 1;
|
||||||
}
|
}
|
||||||
if (t > s+1 && t[-2] == Meta)
|
if (t > s+1 && t[-2] == Meta)
|
||||||
t--;
|
t--;
|
||||||
}
|
}
|
||||||
patoffset = 0;
|
|
||||||
break;
|
break;
|
||||||
|
|
||||||
case (SUB_END|SUB_LONG):
|
case (SUB_END|SUB_LONG):
|
||||||
/* Largest possible match at tail of string: *
|
/* Largest possible match at tail of string: *
|
||||||
* move forward along string until we get a match. *
|
* move forward along string until we get a match. *
|
||||||
* Again there's no optimisation. */
|
* Again there's no optimisation. */
|
||||||
for (i = 0, t = s; i < l; i++, t++, patoffset++) {
|
for (ioff = 0, t = s; t < s + l; ioff++, t++) {
|
||||||
set_pat_start(p, t-s);
|
set_pat_start(p, t-s);
|
||||||
if (pattry(p, t)) {
|
if (pattrylen(p, t, -1, ioff)) {
|
||||||
*sp = get_match_ret(*sp, i, l, fl, replstr);
|
*sp = get_match_ret(*sp, t-s, l, fl, replstr);
|
||||||
patoffset = 0;
|
|
||||||
return 1;
|
return 1;
|
||||||
}
|
}
|
||||||
if (*t == Meta)
|
if (*t == Meta)
|
||||||
i++, t++;
|
t++;
|
||||||
}
|
}
|
||||||
patoffset = 0;
|
|
||||||
break;
|
break;
|
||||||
|
|
||||||
case SUB_SUBSTR:
|
case SUB_SUBSTR:
|
||||||
|
@ -2293,26 +2290,23 @@ igetmatch(char **sp, Patprog p, int fl, int n, char *replstr)
|
||||||
t = s;
|
t = s;
|
||||||
if (fl & SUB_GLOBAL)
|
if (fl & SUB_GLOBAL)
|
||||||
repllist = newlinklist();
|
repllist = newlinklist();
|
||||||
|
ioff = 0; /* offset into string */
|
||||||
do {
|
do {
|
||||||
/* loop over all matches for global substitution */
|
/* loop over all matches for global substitution */
|
||||||
matched = 0;
|
matched = 0;
|
||||||
for (; t < s + l; t++, patoffset++) {
|
for (; t < s + l; t++, ioff++) {
|
||||||
/* Find the longest match from this position. */
|
/* Find the longest match from this position. */
|
||||||
set_pat_start(p, t-s);
|
set_pat_start(p, t-s);
|
||||||
if (pattry(p, t)) {
|
if (pattrylen(p, t, -1, ioff)) {
|
||||||
char *mpos = patinput;
|
char *mpos = t + patmatchlen();
|
||||||
if (!(fl & SUB_LONG) && !(p->flags & PAT_PURES)) {
|
if (!(fl & SUB_LONG) && !(p->flags & PAT_PURES)) {
|
||||||
char *ptr;
|
char *ptr;
|
||||||
for (ptr = t; ptr < mpos; METAINC(ptr)) {
|
for (ptr = t; ptr < mpos; METAINC(ptr)) {
|
||||||
sav = *ptr;
|
set_pat_end(p, *ptr);
|
||||||
set_pat_end(p, sav);
|
if (pattrylen(p, t, ptr - t, ioff)) {
|
||||||
*ptr = '\0';
|
mpos = t + patmatchlen();
|
||||||
if (pattry(p, t)) {
|
|
||||||
mpos = patinput;
|
|
||||||
*ptr = sav;
|
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
*ptr = sav;
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
if (!--n || (n <= 0 && (fl & SUB_GLOBAL))) {
|
if (!--n || (n <= 0 && (fl & SUB_GLOBAL))) {
|
||||||
|
@ -2330,7 +2324,6 @@ igetmatch(char **sp, Patprog p, int fl, int n, char *replstr)
|
||||||
*/
|
*/
|
||||||
continue;
|
continue;
|
||||||
} else {
|
} else {
|
||||||
patoffset = 0;
|
|
||||||
return 1;
|
return 1;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -2339,7 +2332,7 @@ igetmatch(char **sp, Patprog p, int fl, int n, char *replstr)
|
||||||
* which is already marked for replacement.
|
* which is already marked for replacement.
|
||||||
*/
|
*/
|
||||||
matched = 1;
|
matched = 1;
|
||||||
for ( ; t < mpos; t++, patoffset++)
|
for ( ; t < mpos; t++, ioff++)
|
||||||
if (*t == Meta)
|
if (*t == Meta)
|
||||||
t++;
|
t++;
|
||||||
break;
|
break;
|
||||||
|
@ -2348,7 +2341,6 @@ igetmatch(char **sp, Patprog p, int fl, int n, char *replstr)
|
||||||
t++;
|
t++;
|
||||||
}
|
}
|
||||||
} while (matched);
|
} while (matched);
|
||||||
patoffset = 0;
|
|
||||||
/*
|
/*
|
||||||
* check if we can match a blank string, if so do it
|
* check if we can match a blank string, if so do it
|
||||||
* at the start. Goodness knows if this is a good idea
|
* at the start. Goodness knows if this is a good idea
|
||||||
|
@ -2365,50 +2357,39 @@ igetmatch(char **sp, Patprog p, int fl, int n, char *replstr)
|
||||||
case (SUB_END|SUB_SUBSTR):
|
case (SUB_END|SUB_SUBSTR):
|
||||||
case (SUB_END|SUB_LONG|SUB_SUBSTR):
|
case (SUB_END|SUB_LONG|SUB_SUBSTR):
|
||||||
/* Longest/shortest at end, matching substrings. */
|
/* Longest/shortest at end, matching substrings. */
|
||||||
patoffset = ml;
|
|
||||||
if (!(fl & SUB_LONG)) {
|
if (!(fl & SUB_LONG)) {
|
||||||
set_pat_start(p, l);
|
set_pat_start(p, l);
|
||||||
if (pattry(p, s + l) && !--n) {
|
if (pattrylen(p, s + l, -1, ml) && !--n) {
|
||||||
*sp = get_match_ret(*sp, l, l, fl, replstr);
|
*sp = get_match_ret(*sp, l, l, fl, replstr);
|
||||||
patoffset = 0;
|
|
||||||
return 1;
|
return 1;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
patoffset--;
|
for (ioff = ml - 1, t = s + l - 1; t >= s; t--, ioff--) {
|
||||||
for (t = s + l - 1; t >= s; t--, patoffset--) {
|
|
||||||
if (t > s && t[-1] == Meta)
|
if (t > s && t[-1] == Meta)
|
||||||
t--;
|
t--;
|
||||||
set_pat_start(p, t-s);
|
set_pat_start(p, t-s);
|
||||||
if (pattry(p, t) && !--n) {
|
if (pattrylen(p, t, -1, ioff) && !--n) {
|
||||||
/* Found the longest match */
|
/* Found the longest match */
|
||||||
char *mpos = patinput;
|
char *mpos = t + patmatchlen();
|
||||||
if (!(fl & SUB_LONG) && !(p->flags & PAT_PURES)) {
|
if (!(fl & SUB_LONG) && !(p->flags & PAT_PURES)) {
|
||||||
char *ptr;
|
char *ptr;
|
||||||
for (ptr = t; ptr < mpos; METAINC(ptr)) {
|
for (ptr = t; ptr < mpos; METAINC(ptr)) {
|
||||||
sav = *ptr;
|
set_pat_end(p, *ptr);
|
||||||
set_pat_end(p, sav);
|
if (pattrylen(p, t, ptr - t, ioff)) {
|
||||||
*ptr = '\0';
|
mpos = t + patmatchlen();
|
||||||
if (pattry(p, t)) {
|
|
||||||
mpos = patinput;
|
|
||||||
*ptr = sav;
|
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
*ptr = sav;
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
*sp = get_match_ret(*sp, t-s, mpos-s, fl, replstr);
|
*sp = get_match_ret(*sp, t-s, mpos-s, fl, replstr);
|
||||||
patoffset = 0;
|
|
||||||
return 1;
|
return 1;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
patoffset = ml;
|
|
||||||
set_pat_start(p, l);
|
set_pat_start(p, l);
|
||||||
if ((fl & SUB_LONG) && pattry(p, s + l) && !--n) {
|
if ((fl & SUB_LONG) && pattrylen(p, s + l, -1, ml) && !--n) {
|
||||||
*sp = get_match_ret(*sp, l, l, fl, replstr);
|
*sp = get_match_ret(*sp, l, l, fl, replstr);
|
||||||
patoffset = 0;
|
|
||||||
return 1;
|
return 1;
|
||||||
}
|
}
|
||||||
patoffset = 0;
|
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -2419,6 +2400,7 @@ igetmatch(char **sp, Patprog p, int fl, int n, char *replstr)
|
||||||
Repldata rd;
|
Repldata rd;
|
||||||
int lleft = 0; /* size of returned string */
|
int lleft = 0; /* size of returned string */
|
||||||
char *ptr, *start;
|
char *ptr, *start;
|
||||||
|
int i;
|
||||||
|
|
||||||
i = 0; /* start of last chunk we got from *sp */
|
i = 0; /* start of last chunk we got from *sp */
|
||||||
for (nd = firstnode(repllist); nd; incnode(nd)) {
|
for (nd = firstnode(repllist); nd; incnode(nd)) {
|
||||||
|
|
675
Src/pattern.c
675
Src/pattern.c
|
@ -47,6 +47,26 @@
|
||||||
*
|
*
|
||||||
* Eagle-eyed readers will notice this is an altered version. Incredibly
|
* Eagle-eyed readers will notice this is an altered version. Incredibly
|
||||||
* sharp-eyed readers might even find bits that weren't altered.
|
* sharp-eyed readers might even find bits that weren't altered.
|
||||||
|
*
|
||||||
|
*
|
||||||
|
* And I experienced a sense that, like certain regular
|
||||||
|
* expressions, seemed to match the day from beginning to end, so
|
||||||
|
* that I did not need to identify the parenthesised subexpression
|
||||||
|
* that told of dawn, nor the group of characters that indicated
|
||||||
|
* the moment when my grandfather returned home with news of
|
||||||
|
* Swann's departure for Paris; and the whole length of the month
|
||||||
|
* of May, as if matched by a closure, fitted into the buffer of my
|
||||||
|
* life with no sign of overflowing, turning the days, like a
|
||||||
|
* procession of insects that could consist of this or that
|
||||||
|
* species, into a random and unstructured repetition of different
|
||||||
|
* sequences, anchored from the first day of the month to the last
|
||||||
|
* in the same fashion as the weeks when I knew I would not see
|
||||||
|
* Gilberte and would search in vain for any occurrences of the
|
||||||
|
* string in the avenue of hawthorns by Tansonville, without my
|
||||||
|
* having to delimit explicitly the start or finish of the pattern.
|
||||||
|
*
|
||||||
|
* M. Proust, "In Search of Lost Files",
|
||||||
|
* bk I, "The Walk by Bourne's Place".
|
||||||
*/
|
*/
|
||||||
|
|
||||||
#include "zsh.mdh"
|
#include "zsh.mdh"
|
||||||
|
@ -78,7 +98,7 @@ typedef union upat *Upat;
|
||||||
#define P_EXCSYNC 0x01 /* no Test if following exclude already failed */
|
#define P_EXCSYNC 0x01 /* no Test if following exclude already failed */
|
||||||
#define P_EXCEND 0x02 /* no Test if exclude matched orig branch */
|
#define P_EXCEND 0x02 /* no Test if exclude matched orig branch */
|
||||||
#define P_BACK 0x03 /* no Match "", "next" ptr points backward. */
|
#define P_BACK 0x03 /* no Match "", "next" ptr points backward. */
|
||||||
#define P_EXACTLY 0x04 /* str Match this string. */
|
#define P_EXACTLY 0x04 /* lstr Match this string. */
|
||||||
#define P_NOTHING 0x05 /* no Match empty string. */
|
#define P_NOTHING 0x05 /* no Match empty string. */
|
||||||
#define P_ONEHASH 0x06 /* node Match this (simple) thing 0 or more times. */
|
#define P_ONEHASH 0x06 /* node Match this (simple) thing 0 or more times. */
|
||||||
#define P_TWOHASH 0x07 /* node Match this (simple) thing 1 or more times. */
|
#define P_TWOHASH 0x07 /* node Match this (simple) thing 1 or more times. */
|
||||||
|
@ -103,10 +123,14 @@ typedef union upat *Upat;
|
||||||
/* spaces left for P_OPEN+n,... for backreferences */
|
/* spaces left for P_OPEN+n,... for backreferences */
|
||||||
#define P_OPEN 0x80 /* no Mark this point in input as start of n. */
|
#define P_OPEN 0x80 /* no Mark this point in input as start of n. */
|
||||||
#define P_CLOSE 0x90 /* no Analogous to OPEN. */
|
#define P_CLOSE 0x90 /* no Analogous to OPEN. */
|
||||||
/* zl is the range type zrange_t: may be zlong or unsigned long
|
/*
|
||||||
* char is a single char
|
* no no argument
|
||||||
* uc* is a pointer to unsigned char, used at run time and initialised
|
* zr the range type zrange_t: may be zlong or unsigned long
|
||||||
|
* char a single char
|
||||||
|
* uc* a pointer to unsigned char, used at run time and initialised
|
||||||
* to NULL.
|
* to NULL.
|
||||||
|
* str null-terminated, metafied string
|
||||||
|
* lstr length as long then string, not null-terminated, unmetafied.
|
||||||
*/
|
*/
|
||||||
|
|
||||||
/*
|
/*
|
||||||
|
@ -179,16 +203,24 @@ typedef union upat *Upat;
|
||||||
#define P_ISEXCLUDE(p) (((p)->l & 0x30) == 0x30)
|
#define P_ISEXCLUDE(p) (((p)->l & 0x30) == 0x30)
|
||||||
#define P_NOTDOT(p) ((p)->l & 0x40)
|
#define P_NOTDOT(p) ((p)->l & 0x40)
|
||||||
|
|
||||||
|
/* Specific to lstr type, i.e. P_EXACTLY. */
|
||||||
|
#define P_LS_LEN(p) ((p)[1].l) /* can be used as lvalue */
|
||||||
|
#define P_LS_STR(p) ((char *)((p) + 2))
|
||||||
|
|
||||||
/* Flags needed when pattern is executed */
|
/* Flags needed when pattern is executed */
|
||||||
#define P_SIMPLE 0x01 /* Simple enough to be #/## operand. */
|
#define P_SIMPLE 0x01 /* Simple enough to be #/## operand. */
|
||||||
#define P_HSTART 0x02 /* Starts with # or ##'d pattern. */
|
#define P_HSTART 0x02 /* Starts with # or ##'d pattern. */
|
||||||
#define P_PURESTR 0x04 /* Can be matched with a strcmp */
|
#define P_PURESTR 0x04 /* Can be matched with a strcmp */
|
||||||
|
|
||||||
/* Next character after one which may be a Meta (x is any char *) */
|
|
||||||
#define METANEXT(x) (*(x) == Meta ? (x)+2 : (x)+1)
|
|
||||||
/*
|
/*
|
||||||
* Increment pointer which may be on a Meta (x is a pointer variable),
|
* Increment pointer which may be on a Meta (x is a pointer variable),
|
||||||
* returning the incremented value (i.e. like pre-increment).
|
* returning the incremented value (i.e. like pre-increment).
|
||||||
|
*
|
||||||
|
* In future the following will need to refer to metafied multibyte
|
||||||
|
* characters. References to invidual characters are not turned
|
||||||
|
* into a macro when the characters is metafied (c.f. CHARREF()
|
||||||
|
* below then the character is not metafied) and will need tracking
|
||||||
|
* down.
|
||||||
*/
|
*/
|
||||||
#define METAINC(x) ((x) += (*(x) == Meta) ? 2 : 1)
|
#define METAINC(x) ((x) += (*(x) == Meta) ? 2 : 1)
|
||||||
/*
|
/*
|
||||||
|
@ -254,13 +286,18 @@ static int patglobflags; /* globbing flags & approx */
|
||||||
|
|
||||||
/* Add n more characters, ensuring there is enough space. */
|
/* Add n more characters, ensuring there is enough space. */
|
||||||
|
|
||||||
|
enum {
|
||||||
|
PA_NOALIGN = 1,
|
||||||
|
PA_UNMETA = 2
|
||||||
|
};
|
||||||
|
|
||||||
/**/
|
/**/
|
||||||
static void
|
static void
|
||||||
patadd(char *add, int ch, long n, int noalgn)
|
patadd(char *add, int ch, long n, int paflags)
|
||||||
{
|
{
|
||||||
/* Make sure everything gets aligned unless we get noalgn. */
|
/* Make sure everything gets aligned unless we get PA_NOALIGN. */
|
||||||
long newpatsize = patsize + n;
|
long newpatsize = patsize + n;
|
||||||
if (!noalgn)
|
if (!(paflags & PA_NOALIGN))
|
||||||
newpatsize = (newpatsize + sizeof(union upat) - 1) &
|
newpatsize = (newpatsize + sizeof(union upat) - 1) &
|
||||||
~(sizeof(union upat) - 1);
|
~(sizeof(union upat) - 1);
|
||||||
if (patalloc < newpatsize) {
|
if (patalloc < newpatsize) {
|
||||||
|
@ -272,8 +309,25 @@ patadd(char *add, int ch, long n, int noalgn)
|
||||||
}
|
}
|
||||||
patsize = newpatsize;
|
patsize = newpatsize;
|
||||||
if (add) {
|
if (add) {
|
||||||
while (n--)
|
if (paflags & PA_UNMETA) {
|
||||||
*patcode++ = *add++;
|
/*
|
||||||
|
* Unmetafy and untokenize the string as we go.
|
||||||
|
* The Meta characters in add aren't counted in n.
|
||||||
|
*/
|
||||||
|
while (n--) {
|
||||||
|
if (itok(*add))
|
||||||
|
*patcode++ = ztokens[*add++ - Pound];
|
||||||
|
else if (*add == Meta) {
|
||||||
|
add++;
|
||||||
|
*patcode++ = *add++ ^ 32;
|
||||||
|
} else {
|
||||||
|
*patcode++ = *add++;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
while (n--)
|
||||||
|
*patcode++ = *add++;
|
||||||
|
}
|
||||||
} else
|
} else
|
||||||
*patcode++ = ch;
|
*patcode++ = ch;
|
||||||
patcode = patout + patsize;
|
patcode = patout + patsize;
|
||||||
|
@ -297,13 +351,22 @@ patcompstart(void)
|
||||||
patglobflags = GF_IGNCASE;
|
patglobflags = GF_IGNCASE;
|
||||||
}
|
}
|
||||||
|
|
||||||
/* Top level pattern compilation subroutine */
|
/*
|
||||||
|
* Top level pattern compilation subroutine
|
||||||
|
* exp is a null-terminated, metafied string.
|
||||||
|
* inflags is an or of some PAT_* flags.
|
||||||
|
* endexp, if non-null, is set to a pointer to the end of the
|
||||||
|
* part of exp which was compiled. This is used when
|
||||||
|
* compiling patterns for directories which must be
|
||||||
|
* matched recursively.
|
||||||
|
*/
|
||||||
|
|
||||||
/**/
|
/**/
|
||||||
mod_export Patprog
|
mod_export Patprog
|
||||||
patcompile(char *exp, int inflags, char **endexp)
|
patcompile(char *exp, int inflags, char **endexp)
|
||||||
{
|
{
|
||||||
int flags = 0, len = 0;
|
int flags = 0;
|
||||||
|
long len = 0;
|
||||||
long startoff;
|
long startoff;
|
||||||
Upat pscan;
|
Upat pscan;
|
||||||
char *lng, *strp = NULL;
|
char *lng, *strp = NULL;
|
||||||
|
@ -324,7 +387,7 @@ patcompile(char *exp, int inflags, char **endexp)
|
||||||
* in struct is actual count of parentheses.
|
* in struct is actual count of parentheses.
|
||||||
*/
|
*/
|
||||||
patnpar = 1;
|
patnpar = 1;
|
||||||
patflags = inflags & ~PAT_PURES;
|
patflags = inflags & ~(PAT_PURES|PAT_HAS_EXCLUDP);
|
||||||
|
|
||||||
patendseg = endseg;
|
patendseg = endseg;
|
||||||
patendseglen = isset(EXTENDEDGLOB) ? PATENDSEGLEN_EXT : PATENDSEGLEN_NORM;
|
patendseglen = isset(EXTENDEDGLOB) ? PATENDSEGLEN_EXT : PATENDSEGLEN_NORM;
|
||||||
|
@ -366,7 +429,12 @@ patcompile(char *exp, int inflags, char **endexp)
|
||||||
if (patcompswitch(0, &flags) == 0)
|
if (patcompswitch(0, &flags) == 0)
|
||||||
return NULL;
|
return NULL;
|
||||||
} else {
|
} else {
|
||||||
/* Yes, copy the string and skip compilation altogether */
|
/*
|
||||||
|
* Yes, copy the string, and skip compilation altogether.
|
||||||
|
* Null terminate for the benefit of globbing.
|
||||||
|
* Leave metafied both for globbing and for our own
|
||||||
|
* efficiency.
|
||||||
|
*/
|
||||||
patparse = strp;
|
patparse = strp;
|
||||||
len = strp - exp;
|
len = strp - exp;
|
||||||
patadd(exp, 0, len + 1, 0);
|
patadd(exp, 0, len + 1, 0);
|
||||||
|
@ -404,19 +472,52 @@ patcompile(char *exp, int inflags, char **endexp)
|
||||||
for (; pscan; pscan = next) {
|
for (; pscan; pscan = next) {
|
||||||
next = PATNEXT(pscan);
|
next = PATNEXT(pscan);
|
||||||
if (P_OP(pscan) == P_EXACTLY) {
|
if (P_OP(pscan) == P_EXACTLY) {
|
||||||
char *opnd = (char *)P_OPERAND(pscan);
|
char *opnd = P_LS_STR(pscan), *mtest;
|
||||||
while ((*dst = *opnd++))
|
long oplen = P_LS_LEN(pscan), ilen;
|
||||||
dst++;
|
int nmeta = 0;
|
||||||
|
/*
|
||||||
|
* Unfortunately we unmetafied the string
|
||||||
|
* and we need to put any metacharacters
|
||||||
|
* back now we know it's a pure string.
|
||||||
|
* This shouldn't happen too often, it's
|
||||||
|
* just that there are some cases such
|
||||||
|
* as . and .. in files where we really
|
||||||
|
* need a pure string even if there are
|
||||||
|
* pattern characters flying around.
|
||||||
|
*/
|
||||||
|
for (mtest = opnd, ilen = oplen; ilen;
|
||||||
|
mtest++, ilen--)
|
||||||
|
if (imeta(*mtest))
|
||||||
|
nmeta++;
|
||||||
|
if (nmeta) {
|
||||||
|
char *oldpatout = patout;
|
||||||
|
patadd(NULL, 0, nmeta, 0);
|
||||||
|
/*
|
||||||
|
* Yuk.
|
||||||
|
*/
|
||||||
|
p = (Patprog)patout;
|
||||||
|
opnd = patout + (opnd - oldpatout);
|
||||||
|
dst = patout + startoff;
|
||||||
|
}
|
||||||
|
|
||||||
|
while (oplen--) {
|
||||||
|
if (imeta(*opnd)) {
|
||||||
|
*dst++ = Meta;
|
||||||
|
*dst++ = *opnd ^ 32;
|
||||||
|
} else {
|
||||||
|
*dst++ = *opnd++;
|
||||||
|
}
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
*dst++ = '\0';
|
|
||||||
p->size = dst - patout;
|
p->size = dst - patout;
|
||||||
/* patmlen is really strlen, don't include null byte */
|
/* patmlen is really strlen. We don't need a null. */
|
||||||
p->patmlen = p->size - startoff - 1;
|
p->patmlen = p->size - startoff;
|
||||||
} else {
|
} else {
|
||||||
/* starting point info */
|
/* starting point info */
|
||||||
if (P_OP(pscan) == P_EXACTLY && !p->globflags)
|
if (P_OP(pscan) == P_EXACTLY && !p->globflags &&
|
||||||
p->patstartch = *(char *)P_OPERAND(pscan);
|
P_LS_LEN(pscan))
|
||||||
|
p->patstartch = *P_LS_STR(pscan);
|
||||||
/*
|
/*
|
||||||
* Find the longest literal string in something expensive.
|
* Find the longest literal string in something expensive.
|
||||||
* This is itself not all that cheap if we have
|
* This is itself not all that cheap if we have
|
||||||
|
@ -427,9 +528,9 @@ patcompile(char *exp, int inflags, char **endexp)
|
||||||
len = 0;
|
len = 0;
|
||||||
for (; pscan; pscan = PATNEXT(pscan))
|
for (; pscan; pscan = PATNEXT(pscan))
|
||||||
if (P_OP(pscan) == P_EXACTLY &&
|
if (P_OP(pscan) == P_EXACTLY &&
|
||||||
(int)strlen((char *)P_OPERAND(pscan)) >= len) {
|
P_LS_LEN(pscan) >= len) {
|
||||||
lng = (char *)P_OPERAND(pscan);
|
lng = P_LS_STR(pscan);
|
||||||
len = strlen(lng);
|
len = P_LS_LEN(pscan);
|
||||||
}
|
}
|
||||||
if (lng) {
|
if (lng) {
|
||||||
p->mustoff = lng - patout;
|
p->mustoff = lng - patout;
|
||||||
|
@ -844,9 +945,9 @@ static long
|
||||||
patcomppiece(int *flagp)
|
patcomppiece(int *flagp)
|
||||||
{
|
{
|
||||||
long starter = 0, next, pound, op;
|
long starter = 0, next, pound, op;
|
||||||
int flags, flags2, kshchar, len, ch, patch;
|
int flags, flags2, kshchar, len, ch, patch, nmeta;
|
||||||
union upat up;
|
union upat up;
|
||||||
char *nptr, *str0, cbuf[2];
|
char *nptr, *str0, *ptr, cbuf[2];
|
||||||
zrange_t from, to;
|
zrange_t from, to;
|
||||||
|
|
||||||
flags = 0;
|
flags = 0;
|
||||||
|
@ -881,6 +982,9 @@ patcomppiece(int *flagp)
|
||||||
}
|
}
|
||||||
|
|
||||||
if (patparse > str0) {
|
if (patparse > str0) {
|
||||||
|
long slen = patparse - str0;
|
||||||
|
int morelen;
|
||||||
|
|
||||||
/* Ordinary string: cancel kshchar lookahead */
|
/* Ordinary string: cancel kshchar lookahead */
|
||||||
kshchar = '\0';
|
kshchar = '\0';
|
||||||
/*
|
/*
|
||||||
|
@ -889,25 +993,40 @@ patcomppiece(int *flagp)
|
||||||
flags |= P_PURESTR;
|
flags |= P_PURESTR;
|
||||||
DPUTS(patparse == str0, "BUG: matched nothing in patcomppiece.");
|
DPUTS(patparse == str0, "BUG: matched nothing in patcomppiece.");
|
||||||
/* more than one character matched? */
|
/* more than one character matched? */
|
||||||
len = str0 + (*str0 == Meta ? 2 : 1) < patparse;
|
morelen = str0 + (*str0 == Meta ? 2 : 1) < patparse;
|
||||||
/*
|
/*
|
||||||
* If we have more than one character, a following hash only
|
* If we have more than one character, a following hash only
|
||||||
* applies to the last, so decrement.
|
* applies to the last, so decrement.
|
||||||
*/
|
*/
|
||||||
if (isset(EXTENDEDGLOB) && *patparse == Pound && len)
|
if (isset(EXTENDEDGLOB) && *patparse == Pound && morelen)
|
||||||
patparse -= (patparse > str0 + 1 && patparse[-2] == Meta) ? 2 : 1;
|
patparse -= (patparse > str0 + 1 && patparse[-2] == Meta) ? 2 : 1;
|
||||||
/*
|
/*
|
||||||
* If len is 1, we can't have an active # following, so doesn't
|
* If len is 1, we can't have an active # following, so doesn't
|
||||||
* matter that we don't make X in `XX#' simple.
|
* matter that we don't make X in `XX#' simple.
|
||||||
*/
|
*/
|
||||||
if (!len)
|
if (!morelen)
|
||||||
flags |= P_SIMPLE;
|
flags |= P_SIMPLE;
|
||||||
starter = patnode(P_EXACTLY);
|
starter = patnode(P_EXACTLY);
|
||||||
/* add enough space including null byte */
|
|
||||||
len = patparse - str0;
|
/* Get length of string without metafication. */
|
||||||
patadd(str0, 0, len + 1, 0);
|
nmeta = 0;
|
||||||
nptr = (char *)P_OPERAND((Upat)patout + starter);
|
for (ptr = str0; ptr < patparse; ptr++) {
|
||||||
nptr[len] = '\0';
|
if (*ptr == Meta) {
|
||||||
|
nmeta++;
|
||||||
|
ptr++;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
slen = (patparse - str0) - nmeta;
|
||||||
|
/* First add length, which is a long */
|
||||||
|
patadd((char *)&slen, 0, sizeof(long), 0);
|
||||||
|
/*
|
||||||
|
* Then the string, not null terminated.
|
||||||
|
* Unmetafy and untokenize; pass the final length,
|
||||||
|
* which is what we need to allocate, i.e. not including
|
||||||
|
* a count for each Meta in the string.
|
||||||
|
*/
|
||||||
|
patadd(str0, 0, slen, PA_UNMETA);
|
||||||
|
nptr = P_LS_STR((Upat)patout + starter);
|
||||||
/*
|
/*
|
||||||
* It's much simpler to turn off pure string mode for
|
* It's much simpler to turn off pure string mode for
|
||||||
* any case-insensitive or approximate matching; usually,
|
* any case-insensitive or approximate matching; usually,
|
||||||
|
@ -918,13 +1037,13 @@ patcomppiece(int *flagp)
|
||||||
* ..(#a1).. (i.e. the (#a1) has no effect), but if you're
|
* ..(#a1).. (i.e. the (#a1) has no effect), but if you're
|
||||||
* going to write funny patterns, you get no sympathy from me.
|
* going to write funny patterns, you get no sympathy from me.
|
||||||
*/
|
*/
|
||||||
if (patglobflags &&
|
if (patglobflags) {
|
||||||
(!(patflags & PAT_FILE) || (strcmp(nptr, ".") &&
|
if (!(patflags & PAT_FILE))
|
||||||
strcmp(nptr, ".."))))
|
flags &= ~P_PURESTR;
|
||||||
flags &= ~P_PURESTR;
|
else if (!(nptr[0] == '.' &&
|
||||||
for (; *nptr; METAINC(nptr))
|
(slen == 1 || (nptr[1] == '.' && slen == 2))))
|
||||||
if (itok(*nptr))
|
flags &= ~P_PURESTR;
|
||||||
*nptr = ztokens[*nptr - Pound];
|
}
|
||||||
} else {
|
} else {
|
||||||
if (kshchar)
|
if (kshchar)
|
||||||
patparse++;
|
patparse++;
|
||||||
|
@ -950,7 +1069,7 @@ patcomppiece(int *flagp)
|
||||||
starter = patnode(P_ANYOF);
|
starter = patnode(P_ANYOF);
|
||||||
if (*patparse == Outbrack) {
|
if (*patparse == Outbrack) {
|
||||||
patparse++;
|
patparse++;
|
||||||
patadd(NULL, ']', 1, 1);
|
patadd(NULL, ']', 1, PA_NOALIGN);
|
||||||
}
|
}
|
||||||
while (*patparse && *patparse != Outbrack) {
|
while (*patparse && *patparse != Outbrack) {
|
||||||
/* Meta is not a token */
|
/* Meta is not a token */
|
||||||
|
@ -990,7 +1109,7 @@ patcomppiece(int *flagp)
|
||||||
ch = PP_UNKWN;
|
ch = PP_UNKWN;
|
||||||
patparse = nptr + 2;
|
patparse = nptr + 2;
|
||||||
if (ch != PP_UNKWN)
|
if (ch != PP_UNKWN)
|
||||||
patadd(NULL, STOUC(Meta+ch), 1, 1);
|
patadd(NULL, STOUC(Meta+ch), 1, PA_NOALIGN);
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
if (itok(*patparse)) {
|
if (itok(*patparse)) {
|
||||||
|
@ -1003,15 +1122,17 @@ patcomppiece(int *flagp)
|
||||||
patparse++;
|
patparse++;
|
||||||
|
|
||||||
if (*patparse == '-' && patparse[1] != Outbrack) {
|
if (*patparse == '-' && patparse[1] != Outbrack) {
|
||||||
patadd(NULL, STOUC(Meta+PP_RANGE), 1, 1);
|
patadd(NULL, STOUC(Meta+PP_RANGE), 1, PA_NOALIGN);
|
||||||
patadd(cbuf, 0, (cbuf[0] == Meta) ? 2 : 1, 1);
|
patadd(cbuf, 0, (cbuf[0] == Meta) ? 2 : 1, PA_NOALIGN);
|
||||||
if (itok(*++patparse)) {
|
if (itok(*++patparse)) {
|
||||||
patadd(0, STOUC(ztokens[*patparse - Pound]), 1, 1);
|
patadd(0, STOUC(ztokens[*patparse - Pound]), 1,
|
||||||
|
PA_NOALIGN);
|
||||||
} else
|
} else
|
||||||
patadd(patparse, 0, (*patparse == Meta) ? 2 : 1, 1);
|
patadd(patparse, 0, (*patparse == Meta) ? 2 : 1,
|
||||||
|
PA_NOALIGN);
|
||||||
METAINC(patparse);
|
METAINC(patparse);
|
||||||
} else
|
} else
|
||||||
patadd(cbuf, 0, (cbuf[0] == Meta) ? 2 : 1, 1);
|
patadd(cbuf, 0, (cbuf[0] == Meta) ? 2 : 1, PA_NOALIGN);
|
||||||
}
|
}
|
||||||
if (*patparse != Outbrack)
|
if (*patparse != Outbrack)
|
||||||
return 0;
|
return 0;
|
||||||
|
@ -1309,19 +1430,11 @@ static void patoptail(long p, long val)
|
||||||
*/
|
*/
|
||||||
static char *patinstart; /* Start of input string */
|
static char *patinstart; /* Start of input string */
|
||||||
static char *patinend; /* End of input string */
|
static char *patinend; /* End of input string */
|
||||||
|
static char *patinput; /* String input pointer */
|
||||||
static char *patinpath; /* Full path for use with ~ exclusions */
|
static char *patinpath; /* Full path for use with ~ exclusions */
|
||||||
|
static int patinlen; /* Length of last successful match.
|
||||||
/**/
|
* Includes count of Meta characters.
|
||||||
char *patinput; /* String input pointer */
|
*/
|
||||||
|
|
||||||
/*
|
|
||||||
* Offset of string at which we are trying to match.
|
|
||||||
* This is added in to the positions recorded in patbeginp and patendp
|
|
||||||
* when we are looking for substrings. Currently this only happens
|
|
||||||
* in the parameter substitution code.
|
|
||||||
*/
|
|
||||||
/**/
|
|
||||||
int patoffset;
|
|
||||||
|
|
||||||
static char *patbeginp[NSUBEXP]; /* Pointer to backref beginnings */
|
static char *patbeginp[NSUBEXP]; /* Pointer to backref beginnings */
|
||||||
static char *patendp[NSUBEXP]; /* Pointer to backref ends */
|
static char *patendp[NSUBEXP]; /* Pointer to backref ends */
|
||||||
|
@ -1329,9 +1442,24 @@ static int parsfound; /* parentheses (with backrefs) found */
|
||||||
|
|
||||||
static int globdots; /* Glob initial dots? */
|
static int globdots; /* Glob initial dots? */
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Macros which are currently trivial but are likely to be less
|
||||||
|
* so when we handle multibyte characters. They operate on
|
||||||
|
* umetafied strings.
|
||||||
|
*/
|
||||||
|
|
||||||
|
/* Get a character from the start point in a string */
|
||||||
|
#define CHARREF(x) (STOUC(*x))
|
||||||
|
/* Get a pointer to the next character */
|
||||||
|
#define CHARNEXT(x) (x+1)
|
||||||
|
/* Increment a pointer past the current character. */
|
||||||
|
#define CHARINC(x) (x++)
|
||||||
|
/* Counter the number of characters between two pointers, largest first */
|
||||||
|
#define CHARSUB(x,y) (x-y)
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* The following need to be accessed in the globbing scanner for
|
* The following need to be accessed in the globbing scanner for
|
||||||
* a multi-component file path. See horror story there.
|
* a multi-component file path. See horror story in glob.c.
|
||||||
*/
|
*/
|
||||||
/**/
|
/**/
|
||||||
int errsfound; /* Total error count so far */
|
int errsfound; /* Total error count so far */
|
||||||
|
@ -1347,23 +1475,59 @@ pattrystart(void)
|
||||||
errsfound = 0;
|
errsfound = 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Test prog against null-terminated, metafied string.
|
||||||
|
*/
|
||||||
|
|
||||||
/**/
|
/**/
|
||||||
mod_export int
|
mod_export int
|
||||||
pattry(Patprog prog, char *string)
|
pattry(Patprog prog, char *string)
|
||||||
{
|
{
|
||||||
return pattryrefs(prog, string, NULL, NULL, NULL);
|
return pattryrefs(prog, string, -1, 0, NULL, NULL, NULL);
|
||||||
}
|
}
|
||||||
|
|
||||||
/* The last three arguments are used to report the positions for the
|
/*
|
||||||
* backreferences. On entry, *nump should contain the maximum number
|
* Test prog against string of given length, no null termination
|
||||||
* positions to report. */
|
* but still metafied at this point. offset gives an offset
|
||||||
|
* to include in reported match indices
|
||||||
|
*/
|
||||||
|
|
||||||
/**/
|
/**/
|
||||||
mod_export int
|
mod_export int
|
||||||
pattryrefs(Patprog prog, char *string, int *nump, int *begp, int *endp)
|
pattrylen(Patprog prog, char *string, int len, int offset)
|
||||||
{
|
{
|
||||||
int i, maxnpos = 0, ret;
|
return pattryrefs(prog, string, len, offset, NULL, NULL, NULL);
|
||||||
char **sp, **ep;
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Test prog against string with given length stringlen, which
|
||||||
|
* may be -1 to indicate a null-terminated string. The input
|
||||||
|
* string is metafied; the length is the raw string length, not the
|
||||||
|
* number of possibly metafied characters.
|
||||||
|
*
|
||||||
|
* offset is the position in the original string (not seen by
|
||||||
|
* the patter module) at which we are trying to match.
|
||||||
|
* This is added in to the positions recorded in patbeginp and patendp
|
||||||
|
* when we are looking for substrings. Currently this only happens
|
||||||
|
* in the parameter substitution code.
|
||||||
|
*
|
||||||
|
* Note this is a character offset, i.e. a metafied character
|
||||||
|
* counts as 1.
|
||||||
|
*
|
||||||
|
* The last three arguments are used to report the positions for the
|
||||||
|
* backreferences. On entry, *nump should contain the maximum number
|
||||||
|
* of positions to report. In this case the match, mbegin, mend
|
||||||
|
* arrays are not altered.
|
||||||
|
*/
|
||||||
|
|
||||||
|
/**/
|
||||||
|
mod_export int
|
||||||
|
pattryrefs(Patprog prog, char *string, int stringlen, int patoffset,
|
||||||
|
int *nump, int *begp, int *endp)
|
||||||
|
{
|
||||||
|
int i, maxnpos = 0, ret, needfullpath, unmetalen, unmetalenp;
|
||||||
|
int origlen;
|
||||||
|
char **sp, **ep, *tryalloced, *ptr;
|
||||||
char *progstr = (char *)prog + prog->startoff;
|
char *progstr = (char *)prog + prog->startoff;
|
||||||
|
|
||||||
if (nump) {
|
if (nump) {
|
||||||
|
@ -1374,12 +1538,87 @@ pattryrefs(Patprog prog, char *string, int *nump, int *begp, int *endp)
|
||||||
if (*string == Nularg)
|
if (*string == Nularg)
|
||||||
string++;
|
string++;
|
||||||
|
|
||||||
patinstart = patinput = string;
|
if (stringlen < 0)
|
||||||
patinend = patinstart + strlen(patinstart);
|
stringlen = strlen(string);
|
||||||
|
origlen = stringlen;
|
||||||
|
|
||||||
|
patflags = prog->flags;
|
||||||
|
/*
|
||||||
|
* For a top-level ~-exclusion, we will need the full
|
||||||
|
* path to exclude, so copy the path so far and append the
|
||||||
|
* current test string.
|
||||||
|
*/
|
||||||
|
needfullpath = (patflags & PAT_HAS_EXCLUDP) && pathpos;
|
||||||
|
|
||||||
|
/* Get the length of the full string when unmetafied. */
|
||||||
|
unmetalen = ztrsub(string + stringlen, string);
|
||||||
|
if (needfullpath)
|
||||||
|
unmetalenp = ztrsub(pathbuf + pathpos, pathbuf);
|
||||||
|
else
|
||||||
|
unmetalenp = 0;
|
||||||
|
|
||||||
|
DPUTS(needfullpath && (patflags & (PAT_PURES|PAT_ANY)),
|
||||||
|
"rum sort of file exclusion");
|
||||||
|
/*
|
||||||
|
* Partly for efficiency, and partly for the convenience of
|
||||||
|
* globbing, we don't unmetafy pure string patterns, and
|
||||||
|
* there's no reason to if the pattern is just a *.
|
||||||
|
*/
|
||||||
|
if (!(patflags & (PAT_PURES|PAT_ANY))
|
||||||
|
&& (needfullpath || unmetalen != stringlen)) {
|
||||||
|
/*
|
||||||
|
* We need to copy if we need to prepend the path so far
|
||||||
|
* (in which case we copy both chunks), or if we have
|
||||||
|
* Meta characters.
|
||||||
|
*/
|
||||||
|
char *dst;
|
||||||
|
int icopy, ncopy;
|
||||||
|
|
||||||
|
dst = tryalloced = zalloc(unmetalen + unmetalenp);
|
||||||
|
|
||||||
|
if (needfullpath) {
|
||||||
|
/* loop twice, copy path buffer first time */
|
||||||
|
ptr = pathbuf;
|
||||||
|
ncopy = unmetalenp;
|
||||||
|
} else {
|
||||||
|
/* just loop once, copy string with unmetafication */
|
||||||
|
ptr = string;
|
||||||
|
ncopy = unmetalen;
|
||||||
|
}
|
||||||
|
for (icopy = 0; icopy < 2; icopy++) {
|
||||||
|
for (i = 0; i < ncopy; i++) {
|
||||||
|
if (*ptr == Meta) {
|
||||||
|
ptr++;
|
||||||
|
*dst++ = *ptr++ ^ 32;
|
||||||
|
} else {
|
||||||
|
*dst++ = *ptr++;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
if (!needfullpath)
|
||||||
|
break;
|
||||||
|
/* next time append test string to path so far */
|
||||||
|
ptr = string;
|
||||||
|
ncopy = unmetalen;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (needfullpath) {
|
||||||
|
patinstart = tryalloced + unmetalenp;
|
||||||
|
patinpath = tryalloced;
|
||||||
|
} else {
|
||||||
|
patinstart = tryalloced;
|
||||||
|
patinpath = NULL;
|
||||||
|
}
|
||||||
|
stringlen = unmetalen;
|
||||||
|
} else {
|
||||||
|
patinstart = string;
|
||||||
|
tryalloced = patinpath = NULL;
|
||||||
|
}
|
||||||
|
|
||||||
|
patinend = patinstart + stringlen;
|
||||||
/*
|
/*
|
||||||
* From now on we do not require NULL termination of
|
* From now on we do not require NULL termination of
|
||||||
* the test string. It is still metafied, as is string
|
* the test string. There should also be no more references
|
||||||
* data in the prog.
|
* to the variable string.
|
||||||
*/
|
*/
|
||||||
|
|
||||||
if (prog->flags & (PAT_PURES|PAT_ANY)) {
|
if (prog->flags & (PAT_PURES|PAT_ANY)) {
|
||||||
|
@ -1399,11 +1638,11 @@ pattryrefs(Patprog prog, char *string, int *nump, int *begp, int *endp)
|
||||||
* Testing a pure string. See if initial
|
* Testing a pure string. See if initial
|
||||||
* components match.
|
* components match.
|
||||||
*/
|
*/
|
||||||
int lendiff = (patinend - patinstart) - prog->patmlen;
|
int lendiff = stringlen - prog->patmlen;
|
||||||
if (lendiff < 0) {
|
if (lendiff < 0) {
|
||||||
/* No, the pattern string is too long. */
|
/* No, the pattern string is too long. */
|
||||||
ret = 0;
|
ret = 0;
|
||||||
} else if (!memcmp(progstr, string, prog->patmlen)) {
|
} else if (!memcmp(progstr, patinstart, prog->patmlen)) {
|
||||||
/*
|
/*
|
||||||
* Initial component matches. Matches either
|
* Initial component matches. Matches either
|
||||||
* if lengths are the same or we are not anchored
|
* if lengths are the same or we are not anchored
|
||||||
|
@ -1420,47 +1659,61 @@ pattryrefs(Patprog prog, char *string, int *nump, int *begp, int *endp)
|
||||||
* For files, we won't match initial "."s unless
|
* For files, we won't match initial "."s unless
|
||||||
* glob_dots is set.
|
* glob_dots is set.
|
||||||
*/
|
*/
|
||||||
if ((prog->flags & PAT_NOGLD) && *string == '.')
|
if ((prog->flags & PAT_NOGLD) && *patinstart == '.') {
|
||||||
return 0;
|
ret = 0;
|
||||||
/* in case used for ${..#..} etc. */
|
} else {
|
||||||
patinput = string + prog->patmlen;
|
/*
|
||||||
/* if matching files, must update globbing flags */
|
* Remember the length in case used for ${..#..} etc.
|
||||||
patglobflags = prog->globend;
|
* In this case, we didn't unmetafy the string.
|
||||||
return 1;
|
*/
|
||||||
} else
|
patinlen = (int)prog->patmlen;
|
||||||
return 0;
|
/* if matching files, must update globbing flags */
|
||||||
|
patglobflags = prog->globend;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if (tryalloced)
|
||||||
|
zfree(tryalloced, unmetalen + unmetalenp);
|
||||||
|
|
||||||
|
return ret;
|
||||||
} else {
|
} else {
|
||||||
/*
|
/*
|
||||||
* Test for a `must match' string, unless we're scanning for a match
|
* Test for a `must match' string, unless we're scanning for a match
|
||||||
* in which case we don't need to do this each time.
|
* in which case we don't need to do this each time.
|
||||||
*/
|
*/
|
||||||
|
ret = 1;
|
||||||
if (!(prog->flags & PAT_SCAN) && prog->mustoff)
|
if (!(prog->flags & PAT_SCAN) && prog->mustoff)
|
||||||
{
|
{
|
||||||
char *testptr; /* start pointer into test string */
|
char *testptr; /* start pointer into test string */
|
||||||
char *teststop; /* last point from which we can match */
|
char *teststop; /* last point from which we can match */
|
||||||
char *patptr = (char *)prog + prog->mustoff;
|
char *patptr = (char *)prog + prog->mustoff;
|
||||||
int patlen = strlen(patptr);
|
int patlen = prog->patmlen;
|
||||||
int found = 0;
|
int found = 0;
|
||||||
|
|
||||||
if (patlen > patinend - patinstart) {
|
if (patlen > stringlen) {
|
||||||
/* Too long, can't match. */
|
/* Too long, can't match. */
|
||||||
return 0;
|
ret = 0;
|
||||||
}
|
} else {
|
||||||
teststop = patinend - patlen;
|
teststop = patinend - patlen;
|
||||||
|
|
||||||
for (testptr = patinstart; testptr <= teststop; testptr++)
|
for (testptr = patinstart; testptr <= teststop; testptr++)
|
||||||
{
|
{
|
||||||
if (!memcmp(testptr, patptr, patlen)) {
|
if (!memcmp(testptr, patptr, patlen)) {
|
||||||
found = 1;
|
found = 1;
|
||||||
break;
|
break;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
|
||||||
|
|
||||||
if (!found)
|
if (!found)
|
||||||
return 0;
|
ret = 0;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
if (!ret) {
|
||||||
|
if (tryalloced)
|
||||||
|
zfree(tryalloced, unmetalen + unmetalenp);
|
||||||
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
patflags = prog->flags;
|
|
||||||
patglobflags = prog->globflags;
|
patglobflags = prog->globflags;
|
||||||
if (!(patflags & PAT_FILE)) {
|
if (!(patflags & PAT_FILE)) {
|
||||||
forceerrs = -1;
|
forceerrs = -1;
|
||||||
|
@ -1469,26 +1722,7 @@ pattryrefs(Patprog prog, char *string, int *nump, int *begp, int *endp)
|
||||||
globdots = !(patflags & PAT_NOGLD);
|
globdots = !(patflags & PAT_NOGLD);
|
||||||
parsfound = 0;
|
parsfound = 0;
|
||||||
|
|
||||||
if ((patflags & PAT_HAS_EXCLUDP) && pathpos) {
|
patinput = patinstart;
|
||||||
/*
|
|
||||||
* For a top-level ~-exclusion, we will need the full
|
|
||||||
* path to exclude, so copy the path so far and append the
|
|
||||||
* current test string.
|
|
||||||
*
|
|
||||||
* There are some advantages in making patinstart etc.
|
|
||||||
* point into this new string; however, that gets confusing
|
|
||||||
* if we need patinput outside this file. That's
|
|
||||||
* not likely for files but I don't think it's worth
|
|
||||||
* the risk.
|
|
||||||
*/
|
|
||||||
int len = patinend - patinstart;
|
|
||||||
|
|
||||||
patinpath = (char *)zalloc(pathpos + len);
|
|
||||||
memcpy(patinpath, pathbuf, pathpos);
|
|
||||||
memcpy(patinpath + pathpos, patinstart, len);
|
|
||||||
} else {
|
|
||||||
patinpath = NULL;
|
|
||||||
}
|
|
||||||
|
|
||||||
if (patmatch((Upat)progstr)) {
|
if (patmatch((Upat)progstr)) {
|
||||||
/*
|
/*
|
||||||
|
@ -1496,6 +1730,23 @@ pattryrefs(Patprog prog, char *string, int *nump, int *begp, int *endp)
|
||||||
* failed, so set it now
|
* failed, so set it now
|
||||||
*/
|
*/
|
||||||
patglobflags = prog->globend;
|
patglobflags = prog->globend;
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Record length of successful match, including Meta
|
||||||
|
* characters. Do it here so that patmatchlen() can return
|
||||||
|
* it even if we delete the pattern strings.
|
||||||
|
*/
|
||||||
|
patinlen = patinput - patinstart;
|
||||||
|
/*
|
||||||
|
* Optimization: if we didn't find any Meta characters
|
||||||
|
* to begin with, we don't need to look for them now.
|
||||||
|
*/
|
||||||
|
if (unmetalen != origlen) {
|
||||||
|
for (ptr = patinstart; ptr < patinput; ptr++)
|
||||||
|
if (imeta(*ptr))
|
||||||
|
patinlen++;
|
||||||
|
}
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Should we clear backreferences and matches on a failed
|
* Should we clear backreferences and matches on a failed
|
||||||
* match?
|
* match?
|
||||||
|
@ -1504,15 +1755,18 @@ pattryrefs(Patprog prog, char *string, int *nump, int *begp, int *endp)
|
||||||
/*
|
/*
|
||||||
* m flag: for global match. This carries no overhead
|
* m flag: for global match. This carries no overhead
|
||||||
* in the pattern matching part.
|
* in the pattern matching part.
|
||||||
|
*
|
||||||
|
* Remember the test pattern is already unmetafied.
|
||||||
*/
|
*/
|
||||||
char *str;
|
char *str;
|
||||||
int mlen = ztrsub(patinput, patinstart);
|
int mlen = CHARSUB(patinput, patinstart);
|
||||||
|
|
||||||
str = ztrduppfx(patinstart, patinput - patinstart);
|
str = metafy(patinstart, patinput - patinstart, META_DUP);
|
||||||
setsparam("MATCH", str);
|
setsparam("MATCH", str);
|
||||||
setiparam("MBEGIN", (zlong)(patoffset + !isset(KSHARRAYS)));
|
setiparam("MBEGIN", (zlong)(patoffset + !isset(KSHARRAYS)));
|
||||||
setiparam("MEND",
|
setiparam("MEND",
|
||||||
(zlong)(mlen + patoffset + !isset(KSHARRAYS) - 1));
|
(zlong)(mlen + patoffset +
|
||||||
|
!isset(KSHARRAYS) - 1));
|
||||||
}
|
}
|
||||||
if (prog->patnpar && nump) {
|
if (prog->patnpar && nump) {
|
||||||
/*
|
/*
|
||||||
|
@ -1527,9 +1781,10 @@ pattryrefs(Patprog prog, char *string, int *nump, int *begp, int *endp)
|
||||||
for (i = 0; i < prog->patnpar && i < maxnpos; i++) {
|
for (i = 0; i < prog->patnpar && i < maxnpos; i++) {
|
||||||
if (parsfound & (1 << i)) {
|
if (parsfound & (1 << i)) {
|
||||||
if (begp)
|
if (begp)
|
||||||
*begp++ = ztrsub(*sp, patinstart) + patoffset;
|
*begp++ = CHARSUB(*sp, patinstart) + patoffset;
|
||||||
if (endp)
|
if (endp)
|
||||||
*endp++ = ztrsub(*ep, patinstart) + patoffset - 1;
|
*endp++ = CHARSUB(*ep, patinstart) + patoffset
|
||||||
|
- 1;
|
||||||
} else {
|
} else {
|
||||||
if (begp)
|
if (begp)
|
||||||
*begp++ = -1;
|
*begp++ = -1;
|
||||||
|
@ -1557,7 +1812,7 @@ pattryrefs(Patprog prog, char *string, int *nump, int *begp, int *endp)
|
||||||
|
|
||||||
for (i = 0; i < prog->patnpar; i++) {
|
for (i = 0; i < prog->patnpar; i++) {
|
||||||
if (parsfound & (1 << i)) {
|
if (parsfound & (1 << i)) {
|
||||||
matcharr[i] = ztrduppfx(*sp, *ep - *sp);
|
matcharr[i] = metafy(*sp, *ep - *sp, META_DUP);
|
||||||
/*
|
/*
|
||||||
* mbegin and mend give indexes into the string
|
* mbegin and mend give indexes into the string
|
||||||
* in the standard notation, i.e. respecting
|
* in the standard notation, i.e. respecting
|
||||||
|
@ -1568,12 +1823,12 @@ pattryrefs(Patprog prog, char *string, int *nump, int *begp, int *endp)
|
||||||
* corresponds to indexing as ${foo[1,1]}.
|
* corresponds to indexing as ${foo[1,1]}.
|
||||||
*/
|
*/
|
||||||
sprintf(numbuf, "%ld",
|
sprintf(numbuf, "%ld",
|
||||||
(long)(ztrsub(*sp, patinstart) +
|
(long)(CHARSUB(*sp, patinstart) +
|
||||||
patoffset +
|
patoffset +
|
||||||
!isset(KSHARRAYS)));
|
!isset(KSHARRAYS)));
|
||||||
mbeginarr[i] = ztrdup(numbuf);
|
mbeginarr[i] = ztrdup(numbuf);
|
||||||
sprintf(numbuf, "%ld",
|
sprintf(numbuf, "%ld",
|
||||||
(long)(ztrsub(*ep, patinstart) +
|
(long)(CHARSUB(*ep, patinstart) +
|
||||||
patoffset +
|
patoffset +
|
||||||
!isset(KSHARRAYS) - 1));
|
!isset(KSHARRAYS) - 1));
|
||||||
mendarr[i] = ztrdup(numbuf);
|
mendarr[i] = ztrdup(numbuf);
|
||||||
|
@ -1593,19 +1848,31 @@ pattryrefs(Patprog prog, char *string, int *nump, int *begp, int *endp)
|
||||||
setaparam("mbegin", mbeginarr);
|
setaparam("mbegin", mbeginarr);
|
||||||
setaparam("mend", mendarr);
|
setaparam("mend", mendarr);
|
||||||
}
|
}
|
||||||
|
|
||||||
ret = 1;
|
ret = 1;
|
||||||
} else
|
} else
|
||||||
ret = 0;
|
ret = 0;
|
||||||
|
|
||||||
if (patinpath) {
|
if (tryalloced)
|
||||||
zfree(patinpath, pathpos + (patinend - patinstart));
|
zfree(tryalloced, unmetalen + unmetalenp);
|
||||||
patinpath = NULL;
|
|
||||||
}
|
|
||||||
|
|
||||||
return ret;
|
return ret;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Return length of previous succesful match. This is
|
||||||
|
* in metafied bytes, i.e. includes a count of Meta characters.
|
||||||
|
* Unusual and futile attempt at modular encapsulation.
|
||||||
|
*/
|
||||||
|
|
||||||
|
/**/
|
||||||
|
int
|
||||||
|
patmatchlen(void)
|
||||||
|
{
|
||||||
|
return patinlen;
|
||||||
|
}
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Match literal characters with case insensitivity test: the first
|
* Match literal characters with case insensitivity test: the first
|
||||||
* comes from the input string, the second the current pattern.
|
* comes from the input string, the second the current pattern.
|
||||||
|
@ -1627,8 +1894,11 @@ pattryrefs(Patprog prog, char *string, int *nump, int *begp, int *endp)
|
||||||
* exactpos is used to remember how far down an exact string we have
|
* exactpos is used to remember how far down an exact string we have
|
||||||
* matched, if we are doing approximation and can therefore redo from
|
* matched, if we are doing approximation and can therefore redo from
|
||||||
* the same point; we never need to otherwise.
|
* the same point; we never need to otherwise.
|
||||||
|
*
|
||||||
|
* exactend is a pointer to the end of the string, which isn't
|
||||||
|
* null-terminated.
|
||||||
*/
|
*/
|
||||||
static char *exactpos;
|
static char *exactpos, *exactend;
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Main matching routine.
|
* Main matching routine.
|
||||||
|
@ -1643,7 +1913,7 @@ patmatch(Upat prog)
|
||||||
{
|
{
|
||||||
/* Current and next nodes */
|
/* Current and next nodes */
|
||||||
Upat scan = prog, next, opnd;
|
Upat scan = prog, next, opnd;
|
||||||
char *start, *save, *chrop, *compend;
|
char *start, *save, *chrop, *chrend, *compend;
|
||||||
int savglobflags, op, no, min, nextch, fail = 0, saverrsfound;
|
int savglobflags, op, no, min, nextch, fail = 0, saverrsfound;
|
||||||
zrange_t from, to, comp;
|
zrange_t from, to, comp;
|
||||||
|
|
||||||
|
@ -1659,45 +1929,52 @@ patmatch(Upat prog)
|
||||||
if (patinput == patinend)
|
if (patinput == patinend)
|
||||||
fail = 1;
|
fail = 1;
|
||||||
else
|
else
|
||||||
METAINC(patinput);
|
CHARINC(patinput);
|
||||||
break;
|
break;
|
||||||
case P_EXACTLY:
|
case P_EXACTLY:
|
||||||
/*
|
/*
|
||||||
* acts as nothing if *chrop is null: this is used by
|
* acts as nothing if *chrop is null: this is used by
|
||||||
* approx code.
|
* approx code.
|
||||||
*/
|
*/
|
||||||
chrop = exactpos ? exactpos : (char *)P_OPERAND(scan);
|
if (exactpos) {
|
||||||
|
chrop = exactpos;
|
||||||
|
chrend = exactend;
|
||||||
|
} else {
|
||||||
|
chrop = P_LS_STR(scan);
|
||||||
|
chrend = chrop + P_LS_LEN(scan);
|
||||||
|
}
|
||||||
exactpos = NULL;
|
exactpos = NULL;
|
||||||
while (*chrop && patinput < patinend) {
|
while (chrop < chrend && patinput < patinend) {
|
||||||
int chin = STOUC(UNMETA(patinput));
|
int chin = CHARREF(patinput);
|
||||||
int chpa = STOUC(UNMETA(chrop));
|
int chpa = CHARREF(chrop);
|
||||||
if (!CHARMATCH(chin, chpa)) {
|
if (!CHARMATCH(chin, chpa)) {
|
||||||
fail = 1;
|
fail = 1;
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
METAINC(chrop);
|
CHARINC(chrop);
|
||||||
METAINC(patinput);
|
CHARINC(patinput);
|
||||||
}
|
}
|
||||||
if (*chrop) {
|
if (chrop < chrend) {
|
||||||
exactpos = chrop;
|
exactpos = chrop;
|
||||||
|
exactend = chrend;
|
||||||
fail = 1;
|
fail = 1;
|
||||||
}
|
}
|
||||||
break;
|
break;
|
||||||
case P_ANYOF:
|
case P_ANYOF:
|
||||||
if (patinput == patinend ||
|
if (patinput == patinend ||
|
||||||
!patmatchrange((char *)P_OPERAND(scan),
|
!patmatchrange((char *)P_OPERAND(scan),
|
||||||
STOUC(UNMETA(patinput))))
|
CHARREF(patinput)))
|
||||||
fail = 1;
|
fail = 1;
|
||||||
else
|
else
|
||||||
METAINC(patinput);
|
CHARINC(patinput);
|
||||||
break;
|
break;
|
||||||
case P_ANYBUT:
|
case P_ANYBUT:
|
||||||
if (patinput == patinend ||
|
if (patinput == patinend ||
|
||||||
patmatchrange((char *)P_OPERAND(scan),
|
patmatchrange((char *)P_OPERAND(scan),
|
||||||
STOUC(UNMETA(patinput))))
|
CHARREF(patinput)))
|
||||||
fail = 1;
|
fail = 1;
|
||||||
else
|
else
|
||||||
METAINC(patinput);
|
CHARINC(patinput);
|
||||||
break;
|
break;
|
||||||
case P_NUMRNG:
|
case P_NUMRNG:
|
||||||
case P_NUMFROM:
|
case P_NUMFROM:
|
||||||
|
@ -1771,7 +2048,8 @@ patmatch(Upat prog)
|
||||||
return 1;
|
return 1;
|
||||||
}
|
}
|
||||||
if (!no && P_OP(next) == P_EXACTLY &&
|
if (!no && P_OP(next) == P_EXACTLY &&
|
||||||
!idigit(STOUC(*(char *)P_OPERAND(next))) &&
|
(!P_LS_LEN(next) ||
|
||||||
|
!idigit(STOUC(*P_LS_STR(next)))) &&
|
||||||
!(patglobflags & 0xff))
|
!(patglobflags & 0xff))
|
||||||
return 0;
|
return 0;
|
||||||
patinput = --save;
|
patinput = --save;
|
||||||
|
@ -1791,7 +2069,7 @@ patmatch(Upat prog)
|
||||||
case P_NUMANY:
|
case P_NUMANY:
|
||||||
/* This is <->: any old set of digits, don't bother comparing */
|
/* This is <->: any old set of digits, don't bother comparing */
|
||||||
start = patinput;
|
start = patinput;
|
||||||
while (patinput < patinend && idigit(STOUC(*patinput)))
|
while (patinput < patinend && idigit(CHARREF(patinput)))
|
||||||
patinput++;
|
patinput++;
|
||||||
save = patinput;
|
save = patinput;
|
||||||
no = 0;
|
no = 0;
|
||||||
|
@ -1799,7 +2077,8 @@ patmatch(Upat prog)
|
||||||
if (patmatch(next))
|
if (patmatch(next))
|
||||||
return 1;
|
return 1;
|
||||||
if (!no && P_OP(next) == P_EXACTLY &&
|
if (!no && P_OP(next) == P_EXACTLY &&
|
||||||
!idigit(STOUC(*(char *)P_OPERAND(next))) &&
|
(!P_LS_LEN(next) ||
|
||||||
|
!idigit(CHARREF(P_LS_STR(next)))) &&
|
||||||
!(patglobflags & 0xff))
|
!(patglobflags & 0xff))
|
||||||
return 0;
|
return 0;
|
||||||
patinput = --save;
|
patinput = --save;
|
||||||
|
@ -1963,7 +2242,7 @@ patmatch(Upat prog)
|
||||||
origpatinend = patinend;
|
origpatinend = patinend;
|
||||||
while ((ret = patmatch(P_OPERAND(scan)))) {
|
while ((ret = patmatch(P_OPERAND(scan)))) {
|
||||||
unsigned char *syncpt;
|
unsigned char *syncpt;
|
||||||
char *savpatinstart, *savpatinend;
|
char *savpatinstart;
|
||||||
int savforce = forceerrs;
|
int savforce = forceerrs;
|
||||||
int savpatflags = patflags, synclen;
|
int savpatflags = patflags, synclen;
|
||||||
forceerrs = -1;
|
forceerrs = -1;
|
||||||
|
@ -1998,7 +2277,6 @@ patmatch(Upat prog)
|
||||||
patflags |= PAT_NOTEND;
|
patflags |= PAT_NOTEND;
|
||||||
}
|
}
|
||||||
savpatinstart = patinstart;
|
savpatinstart = patinstart;
|
||||||
savpatinend = patinend;
|
|
||||||
next = PATNEXT(scan);
|
next = PATNEXT(scan);
|
||||||
while (next && P_ISEXCLUDE(next)) {
|
while (next && P_ISEXCLUDE(next)) {
|
||||||
patinput = save;
|
patinput = save;
|
||||||
|
@ -2013,14 +2291,15 @@ patmatch(Upat prog)
|
||||||
opnd = P_OPERAND(next) + 1;
|
opnd = P_OPERAND(next) + 1;
|
||||||
if (P_OP(next) == P_EXCLUDP && patinpath) {
|
if (P_OP(next) == P_EXCLUDP && patinpath) {
|
||||||
/*
|
/*
|
||||||
* top level exclusion with a file,
|
* Top level exclusion with a file,
|
||||||
* applies to whole path so add the
|
* applies to whole path so add the
|
||||||
* segments already matched
|
* segments already matched.
|
||||||
|
* We copied these in front of the
|
||||||
|
* test pattern, so patinend doesn't
|
||||||
|
* need moving.
|
||||||
*/
|
*/
|
||||||
DPUTS(patinput != patinstart,
|
DPUTS(patinput != patinstart,
|
||||||
"BUG: not at start excluding path");
|
"BUG: not at start excluding path");
|
||||||
patinend = patinpath + pathpos +
|
|
||||||
(patinend - patinstart);
|
|
||||||
patinput = patinstart = patinpath;
|
patinput = patinstart = patinpath;
|
||||||
}
|
}
|
||||||
if (patmatch(opnd)) {
|
if (patmatch(opnd)) {
|
||||||
|
@ -2036,7 +2315,6 @@ patmatch(Upat prog)
|
||||||
patinput = savpatinstart +
|
patinput = savpatinstart +
|
||||||
(patinput - patinstart);
|
(patinput - patinstart);
|
||||||
patinstart = savpatinstart;
|
patinstart = savpatinstart;
|
||||||
patinend = savpatinend;
|
|
||||||
}
|
}
|
||||||
if (!ret)
|
if (!ret)
|
||||||
break;
|
break;
|
||||||
|
@ -2146,7 +2424,7 @@ patmatch(Upat prog)
|
||||||
/* Note that no counts possibly metafied characters */
|
/* Note that no counts possibly metafied characters */
|
||||||
start = patinput;
|
start = patinput;
|
||||||
if (op == P_STAR) {
|
if (op == P_STAR) {
|
||||||
for (no = 0; patinput < patinend; METAINC(patinput))
|
for (no = 0; patinput < patinend; CHARINC(patinput))
|
||||||
no++;
|
no++;
|
||||||
/* simple optimization for reasonably common case */
|
/* simple optimization for reasonably common case */
|
||||||
if (P_OP(next) == P_END)
|
if (P_OP(next) == P_END)
|
||||||
|
@ -2156,7 +2434,7 @@ patmatch(Upat prog)
|
||||||
"BUG: wrong backtracking with approximation.");
|
"BUG: wrong backtracking with approximation.");
|
||||||
if (!globdots && P_NOTDOT(P_OPERAND(scan)) &&
|
if (!globdots && P_NOTDOT(P_OPERAND(scan)) &&
|
||||||
patinput == patinstart && patinput < patinend &&
|
patinput == patinstart && patinput < patinend &&
|
||||||
*patinput == '.')
|
CHARREF(patinput) == '.')
|
||||||
return 0;
|
return 0;
|
||||||
no = patrepeat(P_OPERAND(scan));
|
no = patrepeat(P_OPERAND(scan));
|
||||||
}
|
}
|
||||||
|
@ -2165,8 +2443,9 @@ patmatch(Upat prog)
|
||||||
* Lookahead to avoid useless matches. This is not possible
|
* Lookahead to avoid useless matches. This is not possible
|
||||||
* with approximation.
|
* with approximation.
|
||||||
*/
|
*/
|
||||||
if (P_OP(next) == P_EXACTLY && !(patglobflags & 0xff)) {
|
if (P_OP(next) == P_EXACTLY && P_LS_LEN(next) &&
|
||||||
char *nextop = (char *)P_OPERAND(next);
|
!(patglobflags & 0xff)) {
|
||||||
|
char *nextop = P_LS_STR(next);
|
||||||
/*
|
/*
|
||||||
* If that P_EXACTLY is last (common in simple patterns,
|
* If that P_EXACTLY is last (common in simple patterns,
|
||||||
* such as *.c), then it can be only be matched at one
|
* such as *.c), then it can be only be matched at one
|
||||||
|
@ -2175,21 +2454,20 @@ patmatch(Upat prog)
|
||||||
if (P_OP(PATNEXT(next)) == P_END &&
|
if (P_OP(PATNEXT(next)) == P_END &&
|
||||||
!(patflags & PAT_NOANCH)) {
|
!(patflags & PAT_NOANCH)) {
|
||||||
int ptlen = patinend - patinput;
|
int ptlen = patinend - patinput;
|
||||||
int oplen = strlen(nextop);
|
int lenmatch = patinend - (min ? CHARNEXT(start) : start);
|
||||||
int lenmatch = patinend - (min ? METANEXT(start) : start);
|
|
||||||
/* Are we in the right range? */
|
/* Are we in the right range? */
|
||||||
if (oplen > lenmatch || oplen < ptlen)
|
if (P_LS_LEN(next) > lenmatch || P_LS_LEN(next) < ptlen)
|
||||||
return 0;
|
return 0;
|
||||||
/* Yes, just position appropriately and test. */
|
/* Yes, just position appropriately and test. */
|
||||||
patinput += ptlen - oplen;
|
patinput += ptlen - P_LS_LEN(next);
|
||||||
if (patinput > start && patinput[-1] == Meta) {
|
/*
|
||||||
/* doesn't align properly, no go */
|
* Here we will need to be careful that patinput is not
|
||||||
return 0;
|
* in the middle of a multibyte character.
|
||||||
}
|
*/
|
||||||
/* Continue loop with P_EXACTLY test. */
|
/* Continue loop with P_EXACTLY test. */
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
nextch = STOUC(UNMETA(nextop));
|
nextch = CHARREF(nextop);
|
||||||
} else
|
} else
|
||||||
nextch = -1;
|
nextch = -1;
|
||||||
save = patinput;
|
save = patinput;
|
||||||
|
@ -2199,14 +2477,17 @@ patmatch(Upat prog)
|
||||||
int charmatch_cache;
|
int charmatch_cache;
|
||||||
if (nextch < 0 ||
|
if (nextch < 0 ||
|
||||||
(patinput < patinend &&
|
(patinput < patinend &&
|
||||||
CHARMATCH_EXPR(STOUC(UNMETA(patinput)), nextch))) {
|
CHARMATCH_EXPR(CHARREF(patinput), nextch))) {
|
||||||
if (patmatch(next))
|
if (patmatch(next))
|
||||||
return 1;
|
return 1;
|
||||||
}
|
}
|
||||||
no--;
|
no--;
|
||||||
save--;
|
save--;
|
||||||
if (save > start && save[-1] == Meta)
|
/*
|
||||||
save--;
|
* Here we will need to make sure save is
|
||||||
|
* decremented properly to the start of
|
||||||
|
* the preceeding multibyte character.
|
||||||
|
*/
|
||||||
patinput = save;
|
patinput = save;
|
||||||
patglobflags = savglobflags;
|
patglobflags = savglobflags;
|
||||||
errsfound = saverrsfound;
|
errsfound = saverrsfound;
|
||||||
|
@ -2270,7 +2551,7 @@ patmatch(Upat prog)
|
||||||
|
|
||||||
/* Try omitting a character from the input string */
|
/* Try omitting a character from the input string */
|
||||||
if (patinput < patinend) {
|
if (patinput < patinend) {
|
||||||
METAINC(patinput);
|
CHARINC(patinput);
|
||||||
/* If we are not on an exact match, then this is
|
/* If we are not on an exact match, then this is
|
||||||
* our last gasp effort, so we can optimize out
|
* our last gasp effort, so we can optimize out
|
||||||
* the recursive call.
|
* the recursive call.
|
||||||
|
@ -2285,11 +2566,11 @@ patmatch(Upat prog)
|
||||||
char *nextexact = savexact;
|
char *nextexact = savexact;
|
||||||
DPUTS(!savexact || !*savexact,
|
DPUTS(!savexact || !*savexact,
|
||||||
"BUG: exact match has not set exactpos");
|
"BUG: exact match has not set exactpos");
|
||||||
METAINC(nextexact);
|
CHARINC(nextexact);
|
||||||
|
|
||||||
if (save < patinend) {
|
if (save < patinend) {
|
||||||
char *nextin = save;
|
char *nextin = save;
|
||||||
METAINC(nextin);
|
CHARINC(nextin);
|
||||||
patglobflags = savglobflags;
|
patglobflags = savglobflags;
|
||||||
errsfound = saverrsfound;
|
errsfound = saverrsfound;
|
||||||
exactpos = savexact;
|
exactpos = savexact;
|
||||||
|
@ -2299,18 +2580,18 @@ patmatch(Upat prog)
|
||||||
* exactpos
|
* exactpos
|
||||||
*/
|
*/
|
||||||
if (save < patinend && nextin < patinend &&
|
if (save < patinend && nextin < patinend &&
|
||||||
*nextexact) {
|
nextexact < exactend) {
|
||||||
int cin0 = UNMETA(save);
|
int cin0 = CHARREF(save);
|
||||||
int cpa0 = UNMETA(exactpos);
|
int cpa0 = CHARREF(exactpos);
|
||||||
int cin1 = UNMETA(nextin);
|
int cin1 = CHARREF(nextin);
|
||||||
int cpa1 = UNMETA(nextexact);
|
int cpa1 = CHARREF(nextexact);
|
||||||
|
|
||||||
if (CHARMATCH(cin0, cpa1) &&
|
if (CHARMATCH(cin0, cpa1) &&
|
||||||
CHARMATCH(cin1, cpa0)) {
|
CHARMATCH(cin1, cpa0)) {
|
||||||
patinput = nextin;
|
patinput = nextin;
|
||||||
METAINC(patinput);
|
CHARINC(patinput);
|
||||||
exactpos = nextexact;
|
exactpos = nextexact;
|
||||||
METAINC(exactpos);
|
CHARINC(exactpos);
|
||||||
if (patmatch(scan))
|
if (patmatch(scan))
|
||||||
return 1;
|
return 1;
|
||||||
|
|
||||||
|
@ -2333,12 +2614,13 @@ patmatch(Upat prog)
|
||||||
exactpos = savexact;
|
exactpos = savexact;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
DPUTS(exactpos == exactend, "approximating too far");
|
||||||
/*
|
/*
|
||||||
* Try moving up the exact match pattern.
|
* Try moving up the exact match pattern.
|
||||||
* This must be the last attempt, so just loop
|
* This must be the last attempt, so just loop
|
||||||
* instead of calling recursively.
|
* instead of calling recursively.
|
||||||
*/
|
*/
|
||||||
METAINC(exactpos);
|
CHARINC(exactpos);
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -2358,6 +2640,11 @@ patmatchrange(char *range, int ch)
|
||||||
{
|
{
|
||||||
int r1, r2;
|
int r1, r2;
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Careful here: unlike other strings, range is a NULL-terminated,
|
||||||
|
* metafied string, because we need to treat the Posix and hyphenated
|
||||||
|
* ranges specially.
|
||||||
|
*/
|
||||||
for (; *range; range++) {
|
for (; *range; range++) {
|
||||||
if (imeta(STOUC(*range))) {
|
if (imeta(STOUC(*range))) {
|
||||||
switch (STOUC(*range)-STOUC(Meta)) {
|
switch (STOUC(*range)-STOUC(Meta)) {
|
||||||
|
@ -2459,23 +2746,24 @@ static int patrepeat(Upat p)
|
||||||
break;
|
break;
|
||||||
#endif
|
#endif
|
||||||
case P_EXACTLY:
|
case P_EXACTLY:
|
||||||
tch = STOUC(UNMETA(opnd));
|
DPUTS(P_LS_LEN(p) != 1, "closure following more than one character");
|
||||||
|
tch = CHARREF(P_LS_STR(p));
|
||||||
while (scan < patinend &&
|
while (scan < patinend &&
|
||||||
CHARMATCH_EXPR(STOUC(UNMETA(scan)), tch)) {
|
CHARMATCH_EXPR(CHARREF(scan), tch)) {
|
||||||
count++;
|
count++;
|
||||||
METAINC(scan);
|
CHARINC(scan);
|
||||||
}
|
}
|
||||||
break;
|
break;
|
||||||
case P_ANYOF:
|
case P_ANYOF:
|
||||||
while (scan < patinend && patmatchrange(opnd, STOUC(UNMETA(scan)))) {
|
while (scan < patinend && patmatchrange(opnd, CHARREF(scan))) {
|
||||||
count++;
|
count++;
|
||||||
METAINC(scan);
|
CHARINC(scan);
|
||||||
}
|
}
|
||||||
break;
|
break;
|
||||||
case P_ANYBUT:
|
case P_ANYBUT:
|
||||||
while (scan < patinend && !patmatchrange(opnd, STOUC(UNMETA(scan)))) {
|
while (scan < patinend && !patmatchrange(opnd, CHARREF(scan))) {
|
||||||
count++;
|
count++;
|
||||||
METAINC(scan);
|
CHARINC(scan);
|
||||||
}
|
}
|
||||||
break;
|
break;
|
||||||
#ifdef DEBUG
|
#ifdef DEBUG
|
||||||
|
@ -2528,7 +2816,14 @@ patdump(Patprog r)
|
||||||
next = PATNEXT(up);
|
next = PATNEXT(up);
|
||||||
printf("(%d)", next ? next-codestart : 0);
|
printf("(%d)", next ? next-codestart : 0);
|
||||||
s += sizeof(union upat);
|
s += sizeof(union upat);
|
||||||
if (op == P_ANYOF || op == P_ANYBUT || op == P_EXACTLY) {
|
if (op == P_EXACTLY) {
|
||||||
|
long llen = *(long *)s;
|
||||||
|
s += sizeof(long);
|
||||||
|
while (llen--) {
|
||||||
|
putchar(CHARREF(s));
|
||||||
|
CHARINC(s);
|
||||||
|
}
|
||||||
|
} else if (op == P_ANYOF || op == P_ANYBUT) {
|
||||||
while (*s != '\0') {
|
while (*s != '\0') {
|
||||||
if (itok(*s)) {
|
if (itok(*s)) {
|
||||||
if (*s == Meta + PP_RANGE) {
|
if (*s == Meta + PP_RANGE) {
|
||||||
|
|
|
@ -1088,10 +1088,10 @@ struct patprog {
|
||||||
long startoff; /* length before start of programme */
|
long startoff; /* length before start of programme */
|
||||||
long size; /* total size from start of struct */
|
long size; /* total size from start of struct */
|
||||||
long mustoff; /* offset to string that must be present */
|
long mustoff; /* offset to string that must be present */
|
||||||
|
long patmlen; /* length of pure string or longest match */
|
||||||
int globflags; /* globbing flags to set at start */
|
int globflags; /* globbing flags to set at start */
|
||||||
int globend; /* globbing flags set after finish */
|
int globend; /* globbing flags set after finish */
|
||||||
int flags; /* PAT_* flags */
|
int flags; /* PAT_* flags */
|
||||||
int patmlen; /* length of pure string or longest match */
|
|
||||||
int patnpar; /* number of active parentheses */
|
int patnpar; /* number of active parentheses */
|
||||||
char patstartch;
|
char patstartch;
|
||||||
};
|
};
|
||||||
|
|
|
@ -132,6 +132,7 @@
|
||||||
>0: [[ 633 = <-1000>33 ]]
|
>0: [[ 633 = <-1000>33 ]]
|
||||||
>0: [[ 633 = <1->33 ]]
|
>0: [[ 633 = <1->33 ]]
|
||||||
>0: [[ 633 = <->33 ]]
|
>0: [[ 633 = <->33 ]]
|
||||||
|
>0: [[ 12345678901234567890123456789012345678901234567890123456789012345678901234567890foo = <42->foo ]]
|
||||||
>0: [[ READ.ME = (#ia1)readme ]]
|
>0: [[ READ.ME = (#ia1)readme ]]
|
||||||
>1: [[ READ..ME = (#ia1)readme ]]
|
>1: [[ READ..ME = (#ia1)readme ]]
|
||||||
>0: [[ README = (#ia1)readm ]]
|
>0: [[ README = (#ia1)readm ]]
|
||||||
|
|
Loading…
Reference in New Issue