mirror of
git://git.code.sf.net/p/zsh/code
synced 2024-05-17 21:46:04 +02:00
20500: Unmetafy patterns where possible and other minor pattern fixes
This commit is contained in:
parent
bc704ef05c
commit
b115ca307a
|
@ -1,3 +1,11 @@
|
|||
2004-10-18 Peter Stephenson <pws@csr.com>
|
||||
|
||||
* 20500: Misc/globtests, Src/glob.c, Src/pattern.c, Src/zsh.h,
|
||||
Src/Zle/complist.c, Test/D02glob.ztst: Use unmetafied strings
|
||||
in patterns more; improve glob.c/pattern.c interface; fix
|
||||
minor <num-> issue; add bogus quotation from Proust (it's
|
||||
my file and I'll do what I like with it).
|
||||
|
||||
2004-10-17 Wayne Davison <wayned@users.sourceforge.net>
|
||||
|
||||
* 20496: Src/utils.c: made zclose() not call close() when the
|
||||
|
|
|
@ -134,6 +134,9 @@ t 633 <1-1000>33
|
|||
t 633 <-1000>33
|
||||
t 633 <1->33
|
||||
t 633 <->33
|
||||
# An open top end of a range will match any integer, even
|
||||
# if not representable in the internal integer type.
|
||||
t 12345678901234567890123456789012345678901234567890123456789012345678901234567890foo <42->foo
|
||||
# Approximate matching
|
||||
t READ.ME (#ia1)readme
|
||||
f READ..ME (#ia1)readme
|
||||
|
|
|
@ -601,7 +601,7 @@ putmatchcol(Listcols c, char *group, char *n)
|
|||
|
||||
for (pc = c->pats; pc; pc = pc->next)
|
||||
if ((!pc->prog || !group || pattry(pc->prog, group)) &&
|
||||
pattryrefs(pc->pat, n, &nrefs, begpos, endpos)) {
|
||||
pattryrefs(pc->pat, n, -1, 0, &nrefs, begpos, endpos)) {
|
||||
if (pc->cols[1]) {
|
||||
patcols = pc->cols;
|
||||
|
||||
|
@ -639,7 +639,7 @@ putfilecol(Listcols c, char *group, char *n, mode_t m)
|
|||
|
||||
for (pc = c->pats; pc; pc = pc->next)
|
||||
if ((!pc->prog || !group || pattry(pc->prog, group)) &&
|
||||
pattryrefs(pc->pat, n, &nrefs, begpos, endpos)) {
|
||||
pattryrefs(pc->pat, n, -1, 0, &nrefs, begpos, endpos)) {
|
||||
if (pc->cols[1]) {
|
||||
patcols = pc->cols;
|
||||
|
||||
|
|
94
Src/glob.c
94
Src/glob.c
|
@ -2195,8 +2195,13 @@ set_pat_end(Patprog p, char null_me)
|
|||
static int
|
||||
igetmatch(char **sp, Patprog p, int fl, int n, char *replstr)
|
||||
{
|
||||
char *s = *sp, *t, sav;
|
||||
int i, l = strlen(*sp), ml = ztrlen(*sp), matched = 1;
|
||||
char *s = *sp, *t;
|
||||
/*
|
||||
* Note that ioff and ml count characters in the character
|
||||
* set (Meta's are not included), while l counts characters in the
|
||||
* string.
|
||||
*/
|
||||
int ioff, l = strlen(*sp), ml = ztrlen(*sp), matched = 1;
|
||||
|
||||
repllist = NULL;
|
||||
|
||||
|
@ -2208,7 +2213,7 @@ igetmatch(char **sp, Patprog p, int fl, int n, char *replstr)
|
|||
p->flags &= ~(PAT_NOTSTART|PAT_NOTEND);
|
||||
|
||||
if (fl & SUB_ALL) {
|
||||
i = matched && pattry(p, s);
|
||||
int i = matched && pattry(p, s);
|
||||
*sp = get_match_ret(*sp, 0, i ? l : 0, fl, i ? replstr : 0);
|
||||
if (! **sp && (((fl & SUB_MATCH) && !i) || ((fl & SUB_REST) && i)))
|
||||
return 0;
|
||||
|
@ -2223,25 +2228,22 @@ igetmatch(char **sp, Patprog p, int fl, int n, char *replstr)
|
|||
* First get the longest match...
|
||||
*/
|
||||
if (pattry(p, s)) {
|
||||
char *mpos = patinput;
|
||||
/* patmatchlen returns metafied length, as we need */
|
||||
int mlen = patmatchlen();
|
||||
if (!(fl & SUB_LONG) && !(p->flags & PAT_PURES)) {
|
||||
/*
|
||||
* ... now we know whether it's worth looking for the
|
||||
* shortest, which we do by brute force.
|
||||
*/
|
||||
for (t = s; t < mpos; METAINC(t)) {
|
||||
sav = *t;
|
||||
set_pat_end(p, sav);
|
||||
*t = '\0';
|
||||
if (pattry(p, s)) {
|
||||
mpos = patinput;
|
||||
*t = sav;
|
||||
for (t = s; t < s + mlen; METAINC(t)) {
|
||||
set_pat_end(p, *t);
|
||||
if (pattrylen(p, s, t - s, 0)) {
|
||||
mlen = patmatchlen();
|
||||
break;
|
||||
}
|
||||
*t = sav;
|
||||
}
|
||||
}
|
||||
*sp = get_match_ret(*sp, 0, mpos-s, fl, replstr);
|
||||
*sp = get_match_ret(*sp, 0, mlen, fl, replstr);
|
||||
return 1;
|
||||
}
|
||||
break;
|
||||
|
@ -2250,35 +2252,30 @@ igetmatch(char **sp, Patprog p, int fl, int n, char *replstr)
|
|||
/* Smallest possible match at tail of string: *
|
||||
* move back down string until we get a match. *
|
||||
* There's no optimization here. */
|
||||
patoffset = ml;
|
||||
for (t = s + l; t >= s; t--, patoffset--) {
|
||||
for (ioff = ml, t = s + l; t >= s; t--, ioff--) {
|
||||
set_pat_start(p, t-s);
|
||||
if (pattry(p, t)) {
|
||||
if (pattrylen(p, t, -1, ioff)) {
|
||||
*sp = get_match_ret(*sp, t - s, l, fl, replstr);
|
||||
patoffset = 0;
|
||||
return 1;
|
||||
}
|
||||
if (t > s+1 && t[-2] == Meta)
|
||||
t--;
|
||||
}
|
||||
patoffset = 0;
|
||||
break;
|
||||
|
||||
case (SUB_END|SUB_LONG):
|
||||
/* Largest possible match at tail of string: *
|
||||
* move forward along string until we get a match. *
|
||||
* Again there's no optimisation. */
|
||||
for (i = 0, t = s; i < l; i++, t++, patoffset++) {
|
||||
for (ioff = 0, t = s; t < s + l; ioff++, t++) {
|
||||
set_pat_start(p, t-s);
|
||||
if (pattry(p, t)) {
|
||||
*sp = get_match_ret(*sp, i, l, fl, replstr);
|
||||
patoffset = 0;
|
||||
if (pattrylen(p, t, -1, ioff)) {
|
||||
*sp = get_match_ret(*sp, t-s, l, fl, replstr);
|
||||
return 1;
|
||||
}
|
||||
if (*t == Meta)
|
||||
i++, t++;
|
||||
t++;
|
||||
}
|
||||
patoffset = 0;
|
||||
break;
|
||||
|
||||
case SUB_SUBSTR:
|
||||
|
@ -2293,26 +2290,23 @@ igetmatch(char **sp, Patprog p, int fl, int n, char *replstr)
|
|||
t = s;
|
||||
if (fl & SUB_GLOBAL)
|
||||
repllist = newlinklist();
|
||||
ioff = 0; /* offset into string */
|
||||
do {
|
||||
/* loop over all matches for global substitution */
|
||||
matched = 0;
|
||||
for (; t < s + l; t++, patoffset++) {
|
||||
for (; t < s + l; t++, ioff++) {
|
||||
/* Find the longest match from this position. */
|
||||
set_pat_start(p, t-s);
|
||||
if (pattry(p, t)) {
|
||||
char *mpos = patinput;
|
||||
if (pattrylen(p, t, -1, ioff)) {
|
||||
char *mpos = t + patmatchlen();
|
||||
if (!(fl & SUB_LONG) && !(p->flags & PAT_PURES)) {
|
||||
char *ptr;
|
||||
for (ptr = t; ptr < mpos; METAINC(ptr)) {
|
||||
sav = *ptr;
|
||||
set_pat_end(p, sav);
|
||||
*ptr = '\0';
|
||||
if (pattry(p, t)) {
|
||||
mpos = patinput;
|
||||
*ptr = sav;
|
||||
set_pat_end(p, *ptr);
|
||||
if (pattrylen(p, t, ptr - t, ioff)) {
|
||||
mpos = t + patmatchlen();
|
||||
break;
|
||||
}
|
||||
*ptr = sav;
|
||||
}
|
||||
}
|
||||
if (!--n || (n <= 0 && (fl & SUB_GLOBAL))) {
|
||||
|
@ -2330,7 +2324,6 @@ igetmatch(char **sp, Patprog p, int fl, int n, char *replstr)
|
|||
*/
|
||||
continue;
|
||||
} else {
|
||||
patoffset = 0;
|
||||
return 1;
|
||||
}
|
||||
}
|
||||
|
@ -2339,7 +2332,7 @@ igetmatch(char **sp, Patprog p, int fl, int n, char *replstr)
|
|||
* which is already marked for replacement.
|
||||
*/
|
||||
matched = 1;
|
||||
for ( ; t < mpos; t++, patoffset++)
|
||||
for ( ; t < mpos; t++, ioff++)
|
||||
if (*t == Meta)
|
||||
t++;
|
||||
break;
|
||||
|
@ -2348,7 +2341,6 @@ igetmatch(char **sp, Patprog p, int fl, int n, char *replstr)
|
|||
t++;
|
||||
}
|
||||
} while (matched);
|
||||
patoffset = 0;
|
||||
/*
|
||||
* check if we can match a blank string, if so do it
|
||||
* at the start. Goodness knows if this is a good idea
|
||||
|
@ -2365,50 +2357,39 @@ igetmatch(char **sp, Patprog p, int fl, int n, char *replstr)
|
|||
case (SUB_END|SUB_SUBSTR):
|
||||
case (SUB_END|SUB_LONG|SUB_SUBSTR):
|
||||
/* Longest/shortest at end, matching substrings. */
|
||||
patoffset = ml;
|
||||
if (!(fl & SUB_LONG)) {
|
||||
set_pat_start(p, l);
|
||||
if (pattry(p, s + l) && !--n) {
|
||||
if (pattrylen(p, s + l, -1, ml) && !--n) {
|
||||
*sp = get_match_ret(*sp, l, l, fl, replstr);
|
||||
patoffset = 0;
|
||||
return 1;
|
||||
}
|
||||
}
|
||||
patoffset--;
|
||||
for (t = s + l - 1; t >= s; t--, patoffset--) {
|
||||
for (ioff = ml - 1, t = s + l - 1; t >= s; t--, ioff--) {
|
||||
if (t > s && t[-1] == Meta)
|
||||
t--;
|
||||
set_pat_start(p, t-s);
|
||||
if (pattry(p, t) && !--n) {
|
||||
if (pattrylen(p, t, -1, ioff) && !--n) {
|
||||
/* Found the longest match */
|
||||
char *mpos = patinput;
|
||||
char *mpos = t + patmatchlen();
|
||||
if (!(fl & SUB_LONG) && !(p->flags & PAT_PURES)) {
|
||||
char *ptr;
|
||||
for (ptr = t; ptr < mpos; METAINC(ptr)) {
|
||||
sav = *ptr;
|
||||
set_pat_end(p, sav);
|
||||
*ptr = '\0';
|
||||
if (pattry(p, t)) {
|
||||
mpos = patinput;
|
||||
*ptr = sav;
|
||||
set_pat_end(p, *ptr);
|
||||
if (pattrylen(p, t, ptr - t, ioff)) {
|
||||
mpos = t + patmatchlen();
|
||||
break;
|
||||
}
|
||||
*ptr = sav;
|
||||
}
|
||||
}
|
||||
*sp = get_match_ret(*sp, t-s, mpos-s, fl, replstr);
|
||||
patoffset = 0;
|
||||
return 1;
|
||||
}
|
||||
}
|
||||
patoffset = ml;
|
||||
set_pat_start(p, l);
|
||||
if ((fl & SUB_LONG) && pattry(p, s + l) && !--n) {
|
||||
if ((fl & SUB_LONG) && pattrylen(p, s + l, -1, ml) && !--n) {
|
||||
*sp = get_match_ret(*sp, l, l, fl, replstr);
|
||||
patoffset = 0;
|
||||
return 1;
|
||||
}
|
||||
patoffset = 0;
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
@ -2419,6 +2400,7 @@ igetmatch(char **sp, Patprog p, int fl, int n, char *replstr)
|
|||
Repldata rd;
|
||||
int lleft = 0; /* size of returned string */
|
||||
char *ptr, *start;
|
||||
int i;
|
||||
|
||||
i = 0; /* start of last chunk we got from *sp */
|
||||
for (nd = firstnode(repllist); nd; incnode(nd)) {
|
||||
|
|
675
Src/pattern.c
675
Src/pattern.c
|
@ -47,6 +47,26 @@
|
|||
*
|
||||
* Eagle-eyed readers will notice this is an altered version. Incredibly
|
||||
* sharp-eyed readers might even find bits that weren't altered.
|
||||
*
|
||||
*
|
||||
* And I experienced a sense that, like certain regular
|
||||
* expressions, seemed to match the day from beginning to end, so
|
||||
* that I did not need to identify the parenthesised subexpression
|
||||
* that told of dawn, nor the group of characters that indicated
|
||||
* the moment when my grandfather returned home with news of
|
||||
* Swann's departure for Paris; and the whole length of the month
|
||||
* of May, as if matched by a closure, fitted into the buffer of my
|
||||
* life with no sign of overflowing, turning the days, like a
|
||||
* procession of insects that could consist of this or that
|
||||
* species, into a random and unstructured repetition of different
|
||||
* sequences, anchored from the first day of the month to the last
|
||||
* in the same fashion as the weeks when I knew I would not see
|
||||
* Gilberte and would search in vain for any occurrences of the
|
||||
* string in the avenue of hawthorns by Tansonville, without my
|
||||
* having to delimit explicitly the start or finish of the pattern.
|
||||
*
|
||||
* M. Proust, "In Search of Lost Files",
|
||||
* bk I, "The Walk by Bourne's Place".
|
||||
*/
|
||||
|
||||
#include "zsh.mdh"
|
||||
|
@ -78,7 +98,7 @@ typedef union upat *Upat;
|
|||
#define P_EXCSYNC 0x01 /* no Test if following exclude already failed */
|
||||
#define P_EXCEND 0x02 /* no Test if exclude matched orig branch */
|
||||
#define P_BACK 0x03 /* no Match "", "next" ptr points backward. */
|
||||
#define P_EXACTLY 0x04 /* str Match this string. */
|
||||
#define P_EXACTLY 0x04 /* lstr Match this string. */
|
||||
#define P_NOTHING 0x05 /* no Match empty string. */
|
||||
#define P_ONEHASH 0x06 /* node Match this (simple) thing 0 or more times. */
|
||||
#define P_TWOHASH 0x07 /* node Match this (simple) thing 1 or more times. */
|
||||
|
@ -103,10 +123,14 @@ typedef union upat *Upat;
|
|||
/* spaces left for P_OPEN+n,... for backreferences */
|
||||
#define P_OPEN 0x80 /* no Mark this point in input as start of n. */
|
||||
#define P_CLOSE 0x90 /* no Analogous to OPEN. */
|
||||
/* zl is the range type zrange_t: may be zlong or unsigned long
|
||||
* char is a single char
|
||||
* uc* is a pointer to unsigned char, used at run time and initialised
|
||||
/*
|
||||
* no no argument
|
||||
* zr the range type zrange_t: may be zlong or unsigned long
|
||||
* char a single char
|
||||
* uc* a pointer to unsigned char, used at run time and initialised
|
||||
* to NULL.
|
||||
* str null-terminated, metafied string
|
||||
* lstr length as long then string, not null-terminated, unmetafied.
|
||||
*/
|
||||
|
||||
/*
|
||||
|
@ -179,16 +203,24 @@ typedef union upat *Upat;
|
|||
#define P_ISEXCLUDE(p) (((p)->l & 0x30) == 0x30)
|
||||
#define P_NOTDOT(p) ((p)->l & 0x40)
|
||||
|
||||
/* Specific to lstr type, i.e. P_EXACTLY. */
|
||||
#define P_LS_LEN(p) ((p)[1].l) /* can be used as lvalue */
|
||||
#define P_LS_STR(p) ((char *)((p) + 2))
|
||||
|
||||
/* Flags needed when pattern is executed */
|
||||
#define P_SIMPLE 0x01 /* Simple enough to be #/## operand. */
|
||||
#define P_HSTART 0x02 /* Starts with # or ##'d pattern. */
|
||||
#define P_PURESTR 0x04 /* Can be matched with a strcmp */
|
||||
|
||||
/* Next character after one which may be a Meta (x is any char *) */
|
||||
#define METANEXT(x) (*(x) == Meta ? (x)+2 : (x)+1)
|
||||
/*
|
||||
* Increment pointer which may be on a Meta (x is a pointer variable),
|
||||
* returning the incremented value (i.e. like pre-increment).
|
||||
*
|
||||
* In future the following will need to refer to metafied multibyte
|
||||
* characters. References to invidual characters are not turned
|
||||
* into a macro when the characters is metafied (c.f. CHARREF()
|
||||
* below then the character is not metafied) and will need tracking
|
||||
* down.
|
||||
*/
|
||||
#define METAINC(x) ((x) += (*(x) == Meta) ? 2 : 1)
|
||||
/*
|
||||
|
@ -254,13 +286,18 @@ static int patglobflags; /* globbing flags & approx */
|
|||
|
||||
/* Add n more characters, ensuring there is enough space. */
|
||||
|
||||
enum {
|
||||
PA_NOALIGN = 1,
|
||||
PA_UNMETA = 2
|
||||
};
|
||||
|
||||
/**/
|
||||
static void
|
||||
patadd(char *add, int ch, long n, int noalgn)
|
||||
patadd(char *add, int ch, long n, int paflags)
|
||||
{
|
||||
/* Make sure everything gets aligned unless we get noalgn. */
|
||||
/* Make sure everything gets aligned unless we get PA_NOALIGN. */
|
||||
long newpatsize = patsize + n;
|
||||
if (!noalgn)
|
||||
if (!(paflags & PA_NOALIGN))
|
||||
newpatsize = (newpatsize + sizeof(union upat) - 1) &
|
||||
~(sizeof(union upat) - 1);
|
||||
if (patalloc < newpatsize) {
|
||||
|
@ -272,8 +309,25 @@ patadd(char *add, int ch, long n, int noalgn)
|
|||
}
|
||||
patsize = newpatsize;
|
||||
if (add) {
|
||||
while (n--)
|
||||
*patcode++ = *add++;
|
||||
if (paflags & PA_UNMETA) {
|
||||
/*
|
||||
* Unmetafy and untokenize the string as we go.
|
||||
* The Meta characters in add aren't counted in n.
|
||||
*/
|
||||
while (n--) {
|
||||
if (itok(*add))
|
||||
*patcode++ = ztokens[*add++ - Pound];
|
||||
else if (*add == Meta) {
|
||||
add++;
|
||||
*patcode++ = *add++ ^ 32;
|
||||
} else {
|
||||
*patcode++ = *add++;
|
||||
}
|
||||
}
|
||||
} else {
|
||||
while (n--)
|
||||
*patcode++ = *add++;
|
||||
}
|
||||
} else
|
||||
*patcode++ = ch;
|
||||
patcode = patout + patsize;
|
||||
|
@ -297,13 +351,22 @@ patcompstart(void)
|
|||
patglobflags = GF_IGNCASE;
|
||||
}
|
||||
|
||||
/* Top level pattern compilation subroutine */
|
||||
/*
|
||||
* Top level pattern compilation subroutine
|
||||
* exp is a null-terminated, metafied string.
|
||||
* inflags is an or of some PAT_* flags.
|
||||
* endexp, if non-null, is set to a pointer to the end of the
|
||||
* part of exp which was compiled. This is used when
|
||||
* compiling patterns for directories which must be
|
||||
* matched recursively.
|
||||
*/
|
||||
|
||||
/**/
|
||||
mod_export Patprog
|
||||
patcompile(char *exp, int inflags, char **endexp)
|
||||
{
|
||||
int flags = 0, len = 0;
|
||||
int flags = 0;
|
||||
long len = 0;
|
||||
long startoff;
|
||||
Upat pscan;
|
||||
char *lng, *strp = NULL;
|
||||
|
@ -324,7 +387,7 @@ patcompile(char *exp, int inflags, char **endexp)
|
|||
* in struct is actual count of parentheses.
|
||||
*/
|
||||
patnpar = 1;
|
||||
patflags = inflags & ~PAT_PURES;
|
||||
patflags = inflags & ~(PAT_PURES|PAT_HAS_EXCLUDP);
|
||||
|
||||
patendseg = endseg;
|
||||
patendseglen = isset(EXTENDEDGLOB) ? PATENDSEGLEN_EXT : PATENDSEGLEN_NORM;
|
||||
|
@ -366,7 +429,12 @@ patcompile(char *exp, int inflags, char **endexp)
|
|||
if (patcompswitch(0, &flags) == 0)
|
||||
return NULL;
|
||||
} else {
|
||||
/* Yes, copy the string and skip compilation altogether */
|
||||
/*
|
||||
* Yes, copy the string, and skip compilation altogether.
|
||||
* Null terminate for the benefit of globbing.
|
||||
* Leave metafied both for globbing and for our own
|
||||
* efficiency.
|
||||
*/
|
||||
patparse = strp;
|
||||
len = strp - exp;
|
||||
patadd(exp, 0, len + 1, 0);
|
||||
|
@ -404,19 +472,52 @@ patcompile(char *exp, int inflags, char **endexp)
|
|||
for (; pscan; pscan = next) {
|
||||
next = PATNEXT(pscan);
|
||||
if (P_OP(pscan) == P_EXACTLY) {
|
||||
char *opnd = (char *)P_OPERAND(pscan);
|
||||
while ((*dst = *opnd++))
|
||||
dst++;
|
||||
char *opnd = P_LS_STR(pscan), *mtest;
|
||||
long oplen = P_LS_LEN(pscan), ilen;
|
||||
int nmeta = 0;
|
||||
/*
|
||||
* Unfortunately we unmetafied the string
|
||||
* and we need to put any metacharacters
|
||||
* back now we know it's a pure string.
|
||||
* This shouldn't happen too often, it's
|
||||
* just that there are some cases such
|
||||
* as . and .. in files where we really
|
||||
* need a pure string even if there are
|
||||
* pattern characters flying around.
|
||||
*/
|
||||
for (mtest = opnd, ilen = oplen; ilen;
|
||||
mtest++, ilen--)
|
||||
if (imeta(*mtest))
|
||||
nmeta++;
|
||||
if (nmeta) {
|
||||
char *oldpatout = patout;
|
||||
patadd(NULL, 0, nmeta, 0);
|
||||
/*
|
||||
* Yuk.
|
||||
*/
|
||||
p = (Patprog)patout;
|
||||
opnd = patout + (opnd - oldpatout);
|
||||
dst = patout + startoff;
|
||||
}
|
||||
|
||||
while (oplen--) {
|
||||
if (imeta(*opnd)) {
|
||||
*dst++ = Meta;
|
||||
*dst++ = *opnd ^ 32;
|
||||
} else {
|
||||
*dst++ = *opnd++;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
*dst++ = '\0';
|
||||
p->size = dst - patout;
|
||||
/* patmlen is really strlen, don't include null byte */
|
||||
p->patmlen = p->size - startoff - 1;
|
||||
/* patmlen is really strlen. We don't need a null. */
|
||||
p->patmlen = p->size - startoff;
|
||||
} else {
|
||||
/* starting point info */
|
||||
if (P_OP(pscan) == P_EXACTLY && !p->globflags)
|
||||
p->patstartch = *(char *)P_OPERAND(pscan);
|
||||
if (P_OP(pscan) == P_EXACTLY && !p->globflags &&
|
||||
P_LS_LEN(pscan))
|
||||
p->patstartch = *P_LS_STR(pscan);
|
||||
/*
|
||||
* Find the longest literal string in something expensive.
|
||||
* This is itself not all that cheap if we have
|
||||
|
@ -427,9 +528,9 @@ patcompile(char *exp, int inflags, char **endexp)
|
|||
len = 0;
|
||||
for (; pscan; pscan = PATNEXT(pscan))
|
||||
if (P_OP(pscan) == P_EXACTLY &&
|
||||
(int)strlen((char *)P_OPERAND(pscan)) >= len) {
|
||||
lng = (char *)P_OPERAND(pscan);
|
||||
len = strlen(lng);
|
||||
P_LS_LEN(pscan) >= len) {
|
||||
lng = P_LS_STR(pscan);
|
||||
len = P_LS_LEN(pscan);
|
||||
}
|
||||
if (lng) {
|
||||
p->mustoff = lng - patout;
|
||||
|
@ -844,9 +945,9 @@ static long
|
|||
patcomppiece(int *flagp)
|
||||
{
|
||||
long starter = 0, next, pound, op;
|
||||
int flags, flags2, kshchar, len, ch, patch;
|
||||
int flags, flags2, kshchar, len, ch, patch, nmeta;
|
||||
union upat up;
|
||||
char *nptr, *str0, cbuf[2];
|
||||
char *nptr, *str0, *ptr, cbuf[2];
|
||||
zrange_t from, to;
|
||||
|
||||
flags = 0;
|
||||
|
@ -881,6 +982,9 @@ patcomppiece(int *flagp)
|
|||
}
|
||||
|
||||
if (patparse > str0) {
|
||||
long slen = patparse - str0;
|
||||
int morelen;
|
||||
|
||||
/* Ordinary string: cancel kshchar lookahead */
|
||||
kshchar = '\0';
|
||||
/*
|
||||
|
@ -889,25 +993,40 @@ patcomppiece(int *flagp)
|
|||
flags |= P_PURESTR;
|
||||
DPUTS(patparse == str0, "BUG: matched nothing in patcomppiece.");
|
||||
/* more than one character matched? */
|
||||
len = str0 + (*str0 == Meta ? 2 : 1) < patparse;
|
||||
morelen = str0 + (*str0 == Meta ? 2 : 1) < patparse;
|
||||
/*
|
||||
* If we have more than one character, a following hash only
|
||||
* applies to the last, so decrement.
|
||||
*/
|
||||
if (isset(EXTENDEDGLOB) && *patparse == Pound && len)
|
||||
if (isset(EXTENDEDGLOB) && *patparse == Pound && morelen)
|
||||
patparse -= (patparse > str0 + 1 && patparse[-2] == Meta) ? 2 : 1;
|
||||
/*
|
||||
* If len is 1, we can't have an active # following, so doesn't
|
||||
* matter that we don't make X in `XX#' simple.
|
||||
*/
|
||||
if (!len)
|
||||
if (!morelen)
|
||||
flags |= P_SIMPLE;
|
||||
starter = patnode(P_EXACTLY);
|
||||
/* add enough space including null byte */
|
||||
len = patparse - str0;
|
||||
patadd(str0, 0, len + 1, 0);
|
||||
nptr = (char *)P_OPERAND((Upat)patout + starter);
|
||||
nptr[len] = '\0';
|
||||
|
||||
/* Get length of string without metafication. */
|
||||
nmeta = 0;
|
||||
for (ptr = str0; ptr < patparse; ptr++) {
|
||||
if (*ptr == Meta) {
|
||||
nmeta++;
|
||||
ptr++;
|
||||
}
|
||||
}
|
||||
slen = (patparse - str0) - nmeta;
|
||||
/* First add length, which is a long */
|
||||
patadd((char *)&slen, 0, sizeof(long), 0);
|
||||
/*
|
||||
* Then the string, not null terminated.
|
||||
* Unmetafy and untokenize; pass the final length,
|
||||
* which is what we need to allocate, i.e. not including
|
||||
* a count for each Meta in the string.
|
||||
*/
|
||||
patadd(str0, 0, slen, PA_UNMETA);
|
||||
nptr = P_LS_STR((Upat)patout + starter);
|
||||
/*
|
||||
* It's much simpler to turn off pure string mode for
|
||||
* any case-insensitive or approximate matching; usually,
|
||||
|
@ -918,13 +1037,13 @@ patcomppiece(int *flagp)
|
|||
* ..(#a1).. (i.e. the (#a1) has no effect), but if you're
|
||||
* going to write funny patterns, you get no sympathy from me.
|
||||
*/
|
||||
if (patglobflags &&
|
||||
(!(patflags & PAT_FILE) || (strcmp(nptr, ".") &&
|
||||
strcmp(nptr, ".."))))
|
||||
flags &= ~P_PURESTR;
|
||||
for (; *nptr; METAINC(nptr))
|
||||
if (itok(*nptr))
|
||||
*nptr = ztokens[*nptr - Pound];
|
||||
if (patglobflags) {
|
||||
if (!(patflags & PAT_FILE))
|
||||
flags &= ~P_PURESTR;
|
||||
else if (!(nptr[0] == '.' &&
|
||||
(slen == 1 || (nptr[1] == '.' && slen == 2))))
|
||||
flags &= ~P_PURESTR;
|
||||
}
|
||||
} else {
|
||||
if (kshchar)
|
||||
patparse++;
|
||||
|
@ -950,7 +1069,7 @@ patcomppiece(int *flagp)
|
|||
starter = patnode(P_ANYOF);
|
||||
if (*patparse == Outbrack) {
|
||||
patparse++;
|
||||
patadd(NULL, ']', 1, 1);
|
||||
patadd(NULL, ']', 1, PA_NOALIGN);
|
||||
}
|
||||
while (*patparse && *patparse != Outbrack) {
|
||||
/* Meta is not a token */
|
||||
|
@ -990,7 +1109,7 @@ patcomppiece(int *flagp)
|
|||
ch = PP_UNKWN;
|
||||
patparse = nptr + 2;
|
||||
if (ch != PP_UNKWN)
|
||||
patadd(NULL, STOUC(Meta+ch), 1, 1);
|
||||
patadd(NULL, STOUC(Meta+ch), 1, PA_NOALIGN);
|
||||
continue;
|
||||
}
|
||||
if (itok(*patparse)) {
|
||||
|
@ -1003,15 +1122,17 @@ patcomppiece(int *flagp)
|
|||
patparse++;
|
||||
|
||||
if (*patparse == '-' && patparse[1] != Outbrack) {
|
||||
patadd(NULL, STOUC(Meta+PP_RANGE), 1, 1);
|
||||
patadd(cbuf, 0, (cbuf[0] == Meta) ? 2 : 1, 1);
|
||||
patadd(NULL, STOUC(Meta+PP_RANGE), 1, PA_NOALIGN);
|
||||
patadd(cbuf, 0, (cbuf[0] == Meta) ? 2 : 1, PA_NOALIGN);
|
||||
if (itok(*++patparse)) {
|
||||
patadd(0, STOUC(ztokens[*patparse - Pound]), 1, 1);
|
||||
patadd(0, STOUC(ztokens[*patparse - Pound]), 1,
|
||||
PA_NOALIGN);
|
||||
} else
|
||||
patadd(patparse, 0, (*patparse == Meta) ? 2 : 1, 1);
|
||||
patadd(patparse, 0, (*patparse == Meta) ? 2 : 1,
|
||||
PA_NOALIGN);
|
||||
METAINC(patparse);
|
||||
} else
|
||||
patadd(cbuf, 0, (cbuf[0] == Meta) ? 2 : 1, 1);
|
||||
patadd(cbuf, 0, (cbuf[0] == Meta) ? 2 : 1, PA_NOALIGN);
|
||||
}
|
||||
if (*patparse != Outbrack)
|
||||
return 0;
|
||||
|
@ -1309,19 +1430,11 @@ static void patoptail(long p, long val)
|
|||
*/
|
||||
static char *patinstart; /* Start of input string */
|
||||
static char *patinend; /* End of input string */
|
||||
static char *patinput; /* String input pointer */
|
||||
static char *patinpath; /* Full path for use with ~ exclusions */
|
||||
|
||||
/**/
|
||||
char *patinput; /* String input pointer */
|
||||
|
||||
/*
|
||||
* Offset of string at which we are trying to match.
|
||||
* This is added in to the positions recorded in patbeginp and patendp
|
||||
* when we are looking for substrings. Currently this only happens
|
||||
* in the parameter substitution code.
|
||||
*/
|
||||
/**/
|
||||
int patoffset;
|
||||
static int patinlen; /* Length of last successful match.
|
||||
* Includes count of Meta characters.
|
||||
*/
|
||||
|
||||
static char *patbeginp[NSUBEXP]; /* Pointer to backref beginnings */
|
||||
static char *patendp[NSUBEXP]; /* Pointer to backref ends */
|
||||
|
@ -1329,9 +1442,24 @@ static int parsfound; /* parentheses (with backrefs) found */
|
|||
|
||||
static int globdots; /* Glob initial dots? */
|
||||
|
||||
/*
|
||||
* Macros which are currently trivial but are likely to be less
|
||||
* so when we handle multibyte characters. They operate on
|
||||
* umetafied strings.
|
||||
*/
|
||||
|
||||
/* Get a character from the start point in a string */
|
||||
#define CHARREF(x) (STOUC(*x))
|
||||
/* Get a pointer to the next character */
|
||||
#define CHARNEXT(x) (x+1)
|
||||
/* Increment a pointer past the current character. */
|
||||
#define CHARINC(x) (x++)
|
||||
/* Counter the number of characters between two pointers, largest first */
|
||||
#define CHARSUB(x,y) (x-y)
|
||||
|
||||
/*
|
||||
* The following need to be accessed in the globbing scanner for
|
||||
* a multi-component file path. See horror story there.
|
||||
* a multi-component file path. See horror story in glob.c.
|
||||
*/
|
||||
/**/
|
||||
int errsfound; /* Total error count so far */
|
||||
|
@ -1347,23 +1475,59 @@ pattrystart(void)
|
|||
errsfound = 0;
|
||||
}
|
||||
|
||||
/*
|
||||
* Test prog against null-terminated, metafied string.
|
||||
*/
|
||||
|
||||
/**/
|
||||
mod_export int
|
||||
pattry(Patprog prog, char *string)
|
||||
{
|
||||
return pattryrefs(prog, string, NULL, NULL, NULL);
|
||||
return pattryrefs(prog, string, -1, 0, NULL, NULL, NULL);
|
||||
}
|
||||
|
||||
/* The last three arguments are used to report the positions for the
|
||||
* backreferences. On entry, *nump should contain the maximum number
|
||||
* positions to report. */
|
||||
/*
|
||||
* Test prog against string of given length, no null termination
|
||||
* but still metafied at this point. offset gives an offset
|
||||
* to include in reported match indices
|
||||
*/
|
||||
|
||||
/**/
|
||||
mod_export int
|
||||
pattryrefs(Patprog prog, char *string, int *nump, int *begp, int *endp)
|
||||
pattrylen(Patprog prog, char *string, int len, int offset)
|
||||
{
|
||||
int i, maxnpos = 0, ret;
|
||||
char **sp, **ep;
|
||||
return pattryrefs(prog, string, len, offset, NULL, NULL, NULL);
|
||||
}
|
||||
|
||||
/*
|
||||
* Test prog against string with given length stringlen, which
|
||||
* may be -1 to indicate a null-terminated string. The input
|
||||
* string is metafied; the length is the raw string length, not the
|
||||
* number of possibly metafied characters.
|
||||
*
|
||||
* offset is the position in the original string (not seen by
|
||||
* the patter module) at which we are trying to match.
|
||||
* This is added in to the positions recorded in patbeginp and patendp
|
||||
* when we are looking for substrings. Currently this only happens
|
||||
* in the parameter substitution code.
|
||||
*
|
||||
* Note this is a character offset, i.e. a metafied character
|
||||
* counts as 1.
|
||||
*
|
||||
* The last three arguments are used to report the positions for the
|
||||
* backreferences. On entry, *nump should contain the maximum number
|
||||
* of positions to report. In this case the match, mbegin, mend
|
||||
* arrays are not altered.
|
||||
*/
|
||||
|
||||
/**/
|
||||
mod_export int
|
||||
pattryrefs(Patprog prog, char *string, int stringlen, int patoffset,
|
||||
int *nump, int *begp, int *endp)
|
||||
{
|
||||
int i, maxnpos = 0, ret, needfullpath, unmetalen, unmetalenp;
|
||||
int origlen;
|
||||
char **sp, **ep, *tryalloced, *ptr;
|
||||
char *progstr = (char *)prog + prog->startoff;
|
||||
|
||||
if (nump) {
|
||||
|
@ -1374,12 +1538,87 @@ pattryrefs(Patprog prog, char *string, int *nump, int *begp, int *endp)
|
|||
if (*string == Nularg)
|
||||
string++;
|
||||
|
||||
patinstart = patinput = string;
|
||||
patinend = patinstart + strlen(patinstart);
|
||||
if (stringlen < 0)
|
||||
stringlen = strlen(string);
|
||||
origlen = stringlen;
|
||||
|
||||
patflags = prog->flags;
|
||||
/*
|
||||
* For a top-level ~-exclusion, we will need the full
|
||||
* path to exclude, so copy the path so far and append the
|
||||
* current test string.
|
||||
*/
|
||||
needfullpath = (patflags & PAT_HAS_EXCLUDP) && pathpos;
|
||||
|
||||
/* Get the length of the full string when unmetafied. */
|
||||
unmetalen = ztrsub(string + stringlen, string);
|
||||
if (needfullpath)
|
||||
unmetalenp = ztrsub(pathbuf + pathpos, pathbuf);
|
||||
else
|
||||
unmetalenp = 0;
|
||||
|
||||
DPUTS(needfullpath && (patflags & (PAT_PURES|PAT_ANY)),
|
||||
"rum sort of file exclusion");
|
||||
/*
|
||||
* Partly for efficiency, and partly for the convenience of
|
||||
* globbing, we don't unmetafy pure string patterns, and
|
||||
* there's no reason to if the pattern is just a *.
|
||||
*/
|
||||
if (!(patflags & (PAT_PURES|PAT_ANY))
|
||||
&& (needfullpath || unmetalen != stringlen)) {
|
||||
/*
|
||||
* We need to copy if we need to prepend the path so far
|
||||
* (in which case we copy both chunks), or if we have
|
||||
* Meta characters.
|
||||
*/
|
||||
char *dst;
|
||||
int icopy, ncopy;
|
||||
|
||||
dst = tryalloced = zalloc(unmetalen + unmetalenp);
|
||||
|
||||
if (needfullpath) {
|
||||
/* loop twice, copy path buffer first time */
|
||||
ptr = pathbuf;
|
||||
ncopy = unmetalenp;
|
||||
} else {
|
||||
/* just loop once, copy string with unmetafication */
|
||||
ptr = string;
|
||||
ncopy = unmetalen;
|
||||
}
|
||||
for (icopy = 0; icopy < 2; icopy++) {
|
||||
for (i = 0; i < ncopy; i++) {
|
||||
if (*ptr == Meta) {
|
||||
ptr++;
|
||||
*dst++ = *ptr++ ^ 32;
|
||||
} else {
|
||||
*dst++ = *ptr++;
|
||||
}
|
||||
}
|
||||
if (!needfullpath)
|
||||
break;
|
||||
/* next time append test string to path so far */
|
||||
ptr = string;
|
||||
ncopy = unmetalen;
|
||||
}
|
||||
|
||||
if (needfullpath) {
|
||||
patinstart = tryalloced + unmetalenp;
|
||||
patinpath = tryalloced;
|
||||
} else {
|
||||
patinstart = tryalloced;
|
||||
patinpath = NULL;
|
||||
}
|
||||
stringlen = unmetalen;
|
||||
} else {
|
||||
patinstart = string;
|
||||
tryalloced = patinpath = NULL;
|
||||
}
|
||||
|
||||
patinend = patinstart + stringlen;
|
||||
/*
|
||||
* From now on we do not require NULL termination of
|
||||
* the test string. It is still metafied, as is string
|
||||
* data in the prog.
|
||||
* the test string. There should also be no more references
|
||||
* to the variable string.
|
||||
*/
|
||||
|
||||
if (prog->flags & (PAT_PURES|PAT_ANY)) {
|
||||
|
@ -1399,11 +1638,11 @@ pattryrefs(Patprog prog, char *string, int *nump, int *begp, int *endp)
|
|||
* Testing a pure string. See if initial
|
||||
* components match.
|
||||
*/
|
||||
int lendiff = (patinend - patinstart) - prog->patmlen;
|
||||
int lendiff = stringlen - prog->patmlen;
|
||||
if (lendiff < 0) {
|
||||
/* No, the pattern string is too long. */
|
||||
ret = 0;
|
||||
} else if (!memcmp(progstr, string, prog->patmlen)) {
|
||||
} else if (!memcmp(progstr, patinstart, prog->patmlen)) {
|
||||
/*
|
||||
* Initial component matches. Matches either
|
||||
* if lengths are the same or we are not anchored
|
||||
|
@ -1420,47 +1659,61 @@ pattryrefs(Patprog prog, char *string, int *nump, int *begp, int *endp)
|
|||
* For files, we won't match initial "."s unless
|
||||
* glob_dots is set.
|
||||
*/
|
||||
if ((prog->flags & PAT_NOGLD) && *string == '.')
|
||||
return 0;
|
||||
/* in case used for ${..#..} etc. */
|
||||
patinput = string + prog->patmlen;
|
||||
/* if matching files, must update globbing flags */
|
||||
patglobflags = prog->globend;
|
||||
return 1;
|
||||
} else
|
||||
return 0;
|
||||
if ((prog->flags & PAT_NOGLD) && *patinstart == '.') {
|
||||
ret = 0;
|
||||
} else {
|
||||
/*
|
||||
* Remember the length in case used for ${..#..} etc.
|
||||
* In this case, we didn't unmetafy the string.
|
||||
*/
|
||||
patinlen = (int)prog->patmlen;
|
||||
/* if matching files, must update globbing flags */
|
||||
patglobflags = prog->globend;
|
||||
}
|
||||
}
|
||||
|
||||
if (tryalloced)
|
||||
zfree(tryalloced, unmetalen + unmetalenp);
|
||||
|
||||
return ret;
|
||||
} else {
|
||||
/*
|
||||
* Test for a `must match' string, unless we're scanning for a match
|
||||
* in which case we don't need to do this each time.
|
||||
*/
|
||||
ret = 1;
|
||||
if (!(prog->flags & PAT_SCAN) && prog->mustoff)
|
||||
{
|
||||
char *testptr; /* start pointer into test string */
|
||||
char *teststop; /* last point from which we can match */
|
||||
char *patptr = (char *)prog + prog->mustoff;
|
||||
int patlen = strlen(patptr);
|
||||
int patlen = prog->patmlen;
|
||||
int found = 0;
|
||||
|
||||
if (patlen > patinend - patinstart) {
|
||||
if (patlen > stringlen) {
|
||||
/* Too long, can't match. */
|
||||
return 0;
|
||||
}
|
||||
teststop = patinend - patlen;
|
||||
ret = 0;
|
||||
} else {
|
||||
teststop = patinend - patlen;
|
||||
|
||||
for (testptr = patinstart; testptr <= teststop; testptr++)
|
||||
{
|
||||
if (!memcmp(testptr, patptr, patlen)) {
|
||||
found = 1;
|
||||
break;
|
||||
for (testptr = patinstart; testptr <= teststop; testptr++)
|
||||
{
|
||||
if (!memcmp(testptr, patptr, patlen)) {
|
||||
found = 1;
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if (!found)
|
||||
return 0;
|
||||
if (!found)
|
||||
ret = 0;
|
||||
}
|
||||
}
|
||||
if (!ret) {
|
||||
if (tryalloced)
|
||||
zfree(tryalloced, unmetalen + unmetalenp);
|
||||
return 0;
|
||||
}
|
||||
|
||||
patflags = prog->flags;
|
||||
patglobflags = prog->globflags;
|
||||
if (!(patflags & PAT_FILE)) {
|
||||
forceerrs = -1;
|
||||
|
@ -1469,26 +1722,7 @@ pattryrefs(Patprog prog, char *string, int *nump, int *begp, int *endp)
|
|||
globdots = !(patflags & PAT_NOGLD);
|
||||
parsfound = 0;
|
||||
|
||||
if ((patflags & PAT_HAS_EXCLUDP) && pathpos) {
|
||||
/*
|
||||
* For a top-level ~-exclusion, we will need the full
|
||||
* path to exclude, so copy the path so far and append the
|
||||
* current test string.
|
||||
*
|
||||
* There are some advantages in making patinstart etc.
|
||||
* point into this new string; however, that gets confusing
|
||||
* if we need patinput outside this file. That's
|
||||
* not likely for files but I don't think it's worth
|
||||
* the risk.
|
||||
*/
|
||||
int len = patinend - patinstart;
|
||||
|
||||
patinpath = (char *)zalloc(pathpos + len);
|
||||
memcpy(patinpath, pathbuf, pathpos);
|
||||
memcpy(patinpath + pathpos, patinstart, len);
|
||||
} else {
|
||||
patinpath = NULL;
|
||||
}
|
||||
patinput = patinstart;
|
||||
|
||||
if (patmatch((Upat)progstr)) {
|
||||
/*
|
||||
|
@ -1496,6 +1730,23 @@ pattryrefs(Patprog prog, char *string, int *nump, int *begp, int *endp)
|
|||
* failed, so set it now
|
||||
*/
|
||||
patglobflags = prog->globend;
|
||||
|
||||
/*
|
||||
* Record length of successful match, including Meta
|
||||
* characters. Do it here so that patmatchlen() can return
|
||||
* it even if we delete the pattern strings.
|
||||
*/
|
||||
patinlen = patinput - patinstart;
|
||||
/*
|
||||
* Optimization: if we didn't find any Meta characters
|
||||
* to begin with, we don't need to look for them now.
|
||||
*/
|
||||
if (unmetalen != origlen) {
|
||||
for (ptr = patinstart; ptr < patinput; ptr++)
|
||||
if (imeta(*ptr))
|
||||
patinlen++;
|
||||
}
|
||||
|
||||
/*
|
||||
* Should we clear backreferences and matches on a failed
|
||||
* match?
|
||||
|
@ -1504,15 +1755,18 @@ pattryrefs(Patprog prog, char *string, int *nump, int *begp, int *endp)
|
|||
/*
|
||||
* m flag: for global match. This carries no overhead
|
||||
* in the pattern matching part.
|
||||
*
|
||||
* Remember the test pattern is already unmetafied.
|
||||
*/
|
||||
char *str;
|
||||
int mlen = ztrsub(patinput, patinstart);
|
||||
int mlen = CHARSUB(patinput, patinstart);
|
||||
|
||||
str = ztrduppfx(patinstart, patinput - patinstart);
|
||||
str = metafy(patinstart, patinput - patinstart, META_DUP);
|
||||
setsparam("MATCH", str);
|
||||
setiparam("MBEGIN", (zlong)(patoffset + !isset(KSHARRAYS)));
|
||||
setiparam("MEND",
|
||||
(zlong)(mlen + patoffset + !isset(KSHARRAYS) - 1));
|
||||
(zlong)(mlen + patoffset +
|
||||
!isset(KSHARRAYS) - 1));
|
||||
}
|
||||
if (prog->patnpar && nump) {
|
||||
/*
|
||||
|
@ -1527,9 +1781,10 @@ pattryrefs(Patprog prog, char *string, int *nump, int *begp, int *endp)
|
|||
for (i = 0; i < prog->patnpar && i < maxnpos; i++) {
|
||||
if (parsfound & (1 << i)) {
|
||||
if (begp)
|
||||
*begp++ = ztrsub(*sp, patinstart) + patoffset;
|
||||
*begp++ = CHARSUB(*sp, patinstart) + patoffset;
|
||||
if (endp)
|
||||
*endp++ = ztrsub(*ep, patinstart) + patoffset - 1;
|
||||
*endp++ = CHARSUB(*ep, patinstart) + patoffset
|
||||
- 1;
|
||||
} else {
|
||||
if (begp)
|
||||
*begp++ = -1;
|
||||
|
@ -1557,7 +1812,7 @@ pattryrefs(Patprog prog, char *string, int *nump, int *begp, int *endp)
|
|||
|
||||
for (i = 0; i < prog->patnpar; i++) {
|
||||
if (parsfound & (1 << i)) {
|
||||
matcharr[i] = ztrduppfx(*sp, *ep - *sp);
|
||||
matcharr[i] = metafy(*sp, *ep - *sp, META_DUP);
|
||||
/*
|
||||
* mbegin and mend give indexes into the string
|
||||
* in the standard notation, i.e. respecting
|
||||
|
@ -1568,12 +1823,12 @@ pattryrefs(Patprog prog, char *string, int *nump, int *begp, int *endp)
|
|||
* corresponds to indexing as ${foo[1,1]}.
|
||||
*/
|
||||
sprintf(numbuf, "%ld",
|
||||
(long)(ztrsub(*sp, patinstart) +
|
||||
(long)(CHARSUB(*sp, patinstart) +
|
||||
patoffset +
|
||||
!isset(KSHARRAYS)));
|
||||
mbeginarr[i] = ztrdup(numbuf);
|
||||
sprintf(numbuf, "%ld",
|
||||
(long)(ztrsub(*ep, patinstart) +
|
||||
(long)(CHARSUB(*ep, patinstart) +
|
||||
patoffset +
|
||||
!isset(KSHARRAYS) - 1));
|
||||
mendarr[i] = ztrdup(numbuf);
|
||||
|
@ -1593,19 +1848,31 @@ pattryrefs(Patprog prog, char *string, int *nump, int *begp, int *endp)
|
|||
setaparam("mbegin", mbeginarr);
|
||||
setaparam("mend", mendarr);
|
||||
}
|
||||
|
||||
ret = 1;
|
||||
} else
|
||||
ret = 0;
|
||||
|
||||
if (patinpath) {
|
||||
zfree(patinpath, pathpos + (patinend - patinstart));
|
||||
patinpath = NULL;
|
||||
}
|
||||
if (tryalloced)
|
||||
zfree(tryalloced, unmetalen + unmetalenp);
|
||||
|
||||
return ret;
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
* Return length of previous succesful match. This is
|
||||
* in metafied bytes, i.e. includes a count of Meta characters.
|
||||
* Unusual and futile attempt at modular encapsulation.
|
||||
*/
|
||||
|
||||
/**/
|
||||
int
|
||||
patmatchlen(void)
|
||||
{
|
||||
return patinlen;
|
||||
}
|
||||
|
||||
/*
|
||||
* Match literal characters with case insensitivity test: the first
|
||||
* comes from the input string, the second the current pattern.
|
||||
|
@ -1627,8 +1894,11 @@ pattryrefs(Patprog prog, char *string, int *nump, int *begp, int *endp)
|
|||
* exactpos is used to remember how far down an exact string we have
|
||||
* matched, if we are doing approximation and can therefore redo from
|
||||
* the same point; we never need to otherwise.
|
||||
*
|
||||
* exactend is a pointer to the end of the string, which isn't
|
||||
* null-terminated.
|
||||
*/
|
||||
static char *exactpos;
|
||||
static char *exactpos, *exactend;
|
||||
|
||||
/*
|
||||
* Main matching routine.
|
||||
|
@ -1643,7 +1913,7 @@ patmatch(Upat prog)
|
|||
{
|
||||
/* Current and next nodes */
|
||||
Upat scan = prog, next, opnd;
|
||||
char *start, *save, *chrop, *compend;
|
||||
char *start, *save, *chrop, *chrend, *compend;
|
||||
int savglobflags, op, no, min, nextch, fail = 0, saverrsfound;
|
||||
zrange_t from, to, comp;
|
||||
|
||||
|
@ -1659,45 +1929,52 @@ patmatch(Upat prog)
|
|||
if (patinput == patinend)
|
||||
fail = 1;
|
||||
else
|
||||
METAINC(patinput);
|
||||
CHARINC(patinput);
|
||||
break;
|
||||
case P_EXACTLY:
|
||||
/*
|
||||
* acts as nothing if *chrop is null: this is used by
|
||||
* approx code.
|
||||
*/
|
||||
chrop = exactpos ? exactpos : (char *)P_OPERAND(scan);
|
||||
if (exactpos) {
|
||||
chrop = exactpos;
|
||||
chrend = exactend;
|
||||
} else {
|
||||
chrop = P_LS_STR(scan);
|
||||
chrend = chrop + P_LS_LEN(scan);
|
||||
}
|
||||
exactpos = NULL;
|
||||
while (*chrop && patinput < patinend) {
|
||||
int chin = STOUC(UNMETA(patinput));
|
||||
int chpa = STOUC(UNMETA(chrop));
|
||||
while (chrop < chrend && patinput < patinend) {
|
||||
int chin = CHARREF(patinput);
|
||||
int chpa = CHARREF(chrop);
|
||||
if (!CHARMATCH(chin, chpa)) {
|
||||
fail = 1;
|
||||
break;
|
||||
}
|
||||
METAINC(chrop);
|
||||
METAINC(patinput);
|
||||
CHARINC(chrop);
|
||||
CHARINC(patinput);
|
||||
}
|
||||
if (*chrop) {
|
||||
if (chrop < chrend) {
|
||||
exactpos = chrop;
|
||||
exactend = chrend;
|
||||
fail = 1;
|
||||
}
|
||||
break;
|
||||
case P_ANYOF:
|
||||
if (patinput == patinend ||
|
||||
!patmatchrange((char *)P_OPERAND(scan),
|
||||
STOUC(UNMETA(patinput))))
|
||||
CHARREF(patinput)))
|
||||
fail = 1;
|
||||
else
|
||||
METAINC(patinput);
|
||||
CHARINC(patinput);
|
||||
break;
|
||||
case P_ANYBUT:
|
||||
if (patinput == patinend ||
|
||||
patmatchrange((char *)P_OPERAND(scan),
|
||||
STOUC(UNMETA(patinput))))
|
||||
CHARREF(patinput)))
|
||||
fail = 1;
|
||||
else
|
||||
METAINC(patinput);
|
||||
CHARINC(patinput);
|
||||
break;
|
||||
case P_NUMRNG:
|
||||
case P_NUMFROM:
|
||||
|
@ -1771,7 +2048,8 @@ patmatch(Upat prog)
|
|||
return 1;
|
||||
}
|
||||
if (!no && P_OP(next) == P_EXACTLY &&
|
||||
!idigit(STOUC(*(char *)P_OPERAND(next))) &&
|
||||
(!P_LS_LEN(next) ||
|
||||
!idigit(STOUC(*P_LS_STR(next)))) &&
|
||||
!(patglobflags & 0xff))
|
||||
return 0;
|
||||
patinput = --save;
|
||||
|
@ -1791,7 +2069,7 @@ patmatch(Upat prog)
|
|||
case P_NUMANY:
|
||||
/* This is <->: any old set of digits, don't bother comparing */
|
||||
start = patinput;
|
||||
while (patinput < patinend && idigit(STOUC(*patinput)))
|
||||
while (patinput < patinend && idigit(CHARREF(patinput)))
|
||||
patinput++;
|
||||
save = patinput;
|
||||
no = 0;
|
||||
|
@ -1799,7 +2077,8 @@ patmatch(Upat prog)
|
|||
if (patmatch(next))
|
||||
return 1;
|
||||
if (!no && P_OP(next) == P_EXACTLY &&
|
||||
!idigit(STOUC(*(char *)P_OPERAND(next))) &&
|
||||
(!P_LS_LEN(next) ||
|
||||
!idigit(CHARREF(P_LS_STR(next)))) &&
|
||||
!(patglobflags & 0xff))
|
||||
return 0;
|
||||
patinput = --save;
|
||||
|
@ -1963,7 +2242,7 @@ patmatch(Upat prog)
|
|||
origpatinend = patinend;
|
||||
while ((ret = patmatch(P_OPERAND(scan)))) {
|
||||
unsigned char *syncpt;
|
||||
char *savpatinstart, *savpatinend;
|
||||
char *savpatinstart;
|
||||
int savforce = forceerrs;
|
||||
int savpatflags = patflags, synclen;
|
||||
forceerrs = -1;
|
||||
|
@ -1998,7 +2277,6 @@ patmatch(Upat prog)
|
|||
patflags |= PAT_NOTEND;
|
||||
}
|
||||
savpatinstart = patinstart;
|
||||
savpatinend = patinend;
|
||||
next = PATNEXT(scan);
|
||||
while (next && P_ISEXCLUDE(next)) {
|
||||
patinput = save;
|
||||
|
@ -2013,14 +2291,15 @@ patmatch(Upat prog)
|
|||
opnd = P_OPERAND(next) + 1;
|
||||
if (P_OP(next) == P_EXCLUDP && patinpath) {
|
||||
/*
|
||||
* top level exclusion with a file,
|
||||
* Top level exclusion with a file,
|
||||
* applies to whole path so add the
|
||||
* segments already matched
|
||||
* segments already matched.
|
||||
* We copied these in front of the
|
||||
* test pattern, so patinend doesn't
|
||||
* need moving.
|
||||
*/
|
||||
DPUTS(patinput != patinstart,
|
||||
"BUG: not at start excluding path");
|
||||
patinend = patinpath + pathpos +
|
||||
(patinend - patinstart);
|
||||
patinput = patinstart = patinpath;
|
||||
}
|
||||
if (patmatch(opnd)) {
|
||||
|
@ -2036,7 +2315,6 @@ patmatch(Upat prog)
|
|||
patinput = savpatinstart +
|
||||
(patinput - patinstart);
|
||||
patinstart = savpatinstart;
|
||||
patinend = savpatinend;
|
||||
}
|
||||
if (!ret)
|
||||
break;
|
||||
|
@ -2146,7 +2424,7 @@ patmatch(Upat prog)
|
|||
/* Note that no counts possibly metafied characters */
|
||||
start = patinput;
|
||||
if (op == P_STAR) {
|
||||
for (no = 0; patinput < patinend; METAINC(patinput))
|
||||
for (no = 0; patinput < patinend; CHARINC(patinput))
|
||||
no++;
|
||||
/* simple optimization for reasonably common case */
|
||||
if (P_OP(next) == P_END)
|
||||
|
@ -2156,7 +2434,7 @@ patmatch(Upat prog)
|
|||
"BUG: wrong backtracking with approximation.");
|
||||
if (!globdots && P_NOTDOT(P_OPERAND(scan)) &&
|
||||
patinput == patinstart && patinput < patinend &&
|
||||
*patinput == '.')
|
||||
CHARREF(patinput) == '.')
|
||||
return 0;
|
||||
no = patrepeat(P_OPERAND(scan));
|
||||
}
|
||||
|
@ -2165,8 +2443,9 @@ patmatch(Upat prog)
|
|||
* Lookahead to avoid useless matches. This is not possible
|
||||
* with approximation.
|
||||
*/
|
||||
if (P_OP(next) == P_EXACTLY && !(patglobflags & 0xff)) {
|
||||
char *nextop = (char *)P_OPERAND(next);
|
||||
if (P_OP(next) == P_EXACTLY && P_LS_LEN(next) &&
|
||||
!(patglobflags & 0xff)) {
|
||||
char *nextop = P_LS_STR(next);
|
||||
/*
|
||||
* If that P_EXACTLY is last (common in simple patterns,
|
||||
* such as *.c), then it can be only be matched at one
|
||||
|
@ -2175,21 +2454,20 @@ patmatch(Upat prog)
|
|||
if (P_OP(PATNEXT(next)) == P_END &&
|
||||
!(patflags & PAT_NOANCH)) {
|
||||
int ptlen = patinend - patinput;
|
||||
int oplen = strlen(nextop);
|
||||
int lenmatch = patinend - (min ? METANEXT(start) : start);
|
||||
int lenmatch = patinend - (min ? CHARNEXT(start) : start);
|
||||
/* Are we in the right range? */
|
||||
if (oplen > lenmatch || oplen < ptlen)
|
||||
if (P_LS_LEN(next) > lenmatch || P_LS_LEN(next) < ptlen)
|
||||
return 0;
|
||||
/* Yes, just position appropriately and test. */
|
||||
patinput += ptlen - oplen;
|
||||
if (patinput > start && patinput[-1] == Meta) {
|
||||
/* doesn't align properly, no go */
|
||||
return 0;
|
||||
}
|
||||
patinput += ptlen - P_LS_LEN(next);
|
||||
/*
|
||||
* Here we will need to be careful that patinput is not
|
||||
* in the middle of a multibyte character.
|
||||
*/
|
||||
/* Continue loop with P_EXACTLY test. */
|
||||
break;
|
||||
}
|
||||
nextch = STOUC(UNMETA(nextop));
|
||||
nextch = CHARREF(nextop);
|
||||
} else
|
||||
nextch = -1;
|
||||
save = patinput;
|
||||
|
@ -2199,14 +2477,17 @@ patmatch(Upat prog)
|
|||
int charmatch_cache;
|
||||
if (nextch < 0 ||
|
||||
(patinput < patinend &&
|
||||
CHARMATCH_EXPR(STOUC(UNMETA(patinput)), nextch))) {
|
||||
CHARMATCH_EXPR(CHARREF(patinput), nextch))) {
|
||||
if (patmatch(next))
|
||||
return 1;
|
||||
}
|
||||
no--;
|
||||
save--;
|
||||
if (save > start && save[-1] == Meta)
|
||||
save--;
|
||||
/*
|
||||
* Here we will need to make sure save is
|
||||
* decremented properly to the start of
|
||||
* the preceeding multibyte character.
|
||||
*/
|
||||
patinput = save;
|
||||
patglobflags = savglobflags;
|
||||
errsfound = saverrsfound;
|
||||
|
@ -2270,7 +2551,7 @@ patmatch(Upat prog)
|
|||
|
||||
/* Try omitting a character from the input string */
|
||||
if (patinput < patinend) {
|
||||
METAINC(patinput);
|
||||
CHARINC(patinput);
|
||||
/* If we are not on an exact match, then this is
|
||||
* our last gasp effort, so we can optimize out
|
||||
* the recursive call.
|
||||
|
@ -2285,11 +2566,11 @@ patmatch(Upat prog)
|
|||
char *nextexact = savexact;
|
||||
DPUTS(!savexact || !*savexact,
|
||||
"BUG: exact match has not set exactpos");
|
||||
METAINC(nextexact);
|
||||
CHARINC(nextexact);
|
||||
|
||||
if (save < patinend) {
|
||||
char *nextin = save;
|
||||
METAINC(nextin);
|
||||
CHARINC(nextin);
|
||||
patglobflags = savglobflags;
|
||||
errsfound = saverrsfound;
|
||||
exactpos = savexact;
|
||||
|
@ -2299,18 +2580,18 @@ patmatch(Upat prog)
|
|||
* exactpos
|
||||
*/
|
||||
if (save < patinend && nextin < patinend &&
|
||||
*nextexact) {
|
||||
int cin0 = UNMETA(save);
|
||||
int cpa0 = UNMETA(exactpos);
|
||||
int cin1 = UNMETA(nextin);
|
||||
int cpa1 = UNMETA(nextexact);
|
||||
nextexact < exactend) {
|
||||
int cin0 = CHARREF(save);
|
||||
int cpa0 = CHARREF(exactpos);
|
||||
int cin1 = CHARREF(nextin);
|
||||
int cpa1 = CHARREF(nextexact);
|
||||
|
||||
if (CHARMATCH(cin0, cpa1) &&
|
||||
CHARMATCH(cin1, cpa0)) {
|
||||
patinput = nextin;
|
||||
METAINC(patinput);
|
||||
CHARINC(patinput);
|
||||
exactpos = nextexact;
|
||||
METAINC(exactpos);
|
||||
CHARINC(exactpos);
|
||||
if (patmatch(scan))
|
||||
return 1;
|
||||
|
||||
|
@ -2333,12 +2614,13 @@ patmatch(Upat prog)
|
|||
exactpos = savexact;
|
||||
}
|
||||
|
||||
DPUTS(exactpos == exactend, "approximating too far");
|
||||
/*
|
||||
* Try moving up the exact match pattern.
|
||||
* This must be the last attempt, so just loop
|
||||
* instead of calling recursively.
|
||||
*/
|
||||
METAINC(exactpos);
|
||||
CHARINC(exactpos);
|
||||
continue;
|
||||
}
|
||||
}
|
||||
|
@ -2358,6 +2640,11 @@ patmatchrange(char *range, int ch)
|
|||
{
|
||||
int r1, r2;
|
||||
|
||||
/*
|
||||
* Careful here: unlike other strings, range is a NULL-terminated,
|
||||
* metafied string, because we need to treat the Posix and hyphenated
|
||||
* ranges specially.
|
||||
*/
|
||||
for (; *range; range++) {
|
||||
if (imeta(STOUC(*range))) {
|
||||
switch (STOUC(*range)-STOUC(Meta)) {
|
||||
|
@ -2459,23 +2746,24 @@ static int patrepeat(Upat p)
|
|||
break;
|
||||
#endif
|
||||
case P_EXACTLY:
|
||||
tch = STOUC(UNMETA(opnd));
|
||||
DPUTS(P_LS_LEN(p) != 1, "closure following more than one character");
|
||||
tch = CHARREF(P_LS_STR(p));
|
||||
while (scan < patinend &&
|
||||
CHARMATCH_EXPR(STOUC(UNMETA(scan)), tch)) {
|
||||
CHARMATCH_EXPR(CHARREF(scan), tch)) {
|
||||
count++;
|
||||
METAINC(scan);
|
||||
CHARINC(scan);
|
||||
}
|
||||
break;
|
||||
case P_ANYOF:
|
||||
while (scan < patinend && patmatchrange(opnd, STOUC(UNMETA(scan)))) {
|
||||
while (scan < patinend && patmatchrange(opnd, CHARREF(scan))) {
|
||||
count++;
|
||||
METAINC(scan);
|
||||
CHARINC(scan);
|
||||
}
|
||||
break;
|
||||
case P_ANYBUT:
|
||||
while (scan < patinend && !patmatchrange(opnd, STOUC(UNMETA(scan)))) {
|
||||
while (scan < patinend && !patmatchrange(opnd, CHARREF(scan))) {
|
||||
count++;
|
||||
METAINC(scan);
|
||||
CHARINC(scan);
|
||||
}
|
||||
break;
|
||||
#ifdef DEBUG
|
||||
|
@ -2528,7 +2816,14 @@ patdump(Patprog r)
|
|||
next = PATNEXT(up);
|
||||
printf("(%d)", next ? next-codestart : 0);
|
||||
s += sizeof(union upat);
|
||||
if (op == P_ANYOF || op == P_ANYBUT || op == P_EXACTLY) {
|
||||
if (op == P_EXACTLY) {
|
||||
long llen = *(long *)s;
|
||||
s += sizeof(long);
|
||||
while (llen--) {
|
||||
putchar(CHARREF(s));
|
||||
CHARINC(s);
|
||||
}
|
||||
} else if (op == P_ANYOF || op == P_ANYBUT) {
|
||||
while (*s != '\0') {
|
||||
if (itok(*s)) {
|
||||
if (*s == Meta + PP_RANGE) {
|
||||
|
|
|
@ -1088,10 +1088,10 @@ struct patprog {
|
|||
long startoff; /* length before start of programme */
|
||||
long size; /* total size from start of struct */
|
||||
long mustoff; /* offset to string that must be present */
|
||||
long patmlen; /* length of pure string or longest match */
|
||||
int globflags; /* globbing flags to set at start */
|
||||
int globend; /* globbing flags set after finish */
|
||||
int flags; /* PAT_* flags */
|
||||
int patmlen; /* length of pure string or longest match */
|
||||
int patnpar; /* number of active parentheses */
|
||||
char patstartch;
|
||||
};
|
||||
|
|
|
@ -132,6 +132,7 @@
|
|||
>0: [[ 633 = <-1000>33 ]]
|
||||
>0: [[ 633 = <1->33 ]]
|
||||
>0: [[ 633 = <->33 ]]
|
||||
>0: [[ 12345678901234567890123456789012345678901234567890123456789012345678901234567890foo = <42->foo ]]
|
||||
>0: [[ READ.ME = (#ia1)readme ]]
|
||||
>1: [[ READ..ME = (#ia1)readme ]]
|
||||
>0: [[ README = (#ia1)readm ]]
|
||||
|
|
Loading…
Reference in New Issue