1
0
Fork 0
mirror of git://git.code.sf.net/p/zsh/code synced 2024-05-13 11:06:17 +02:00
zsh/Src/Zle/compmatch.c
2021-11-12 23:54:34 +01:00

2973 lines
72 KiB
C

/*
* compmatch.c - the complete module, completion matching code
*
* This file is part of zsh, the Z shell.
*
* Copyright (c) 1999 Sven Wischnowsky
* All rights reserved.
*
* Permission is hereby granted, without written agreement and without
* license or royalty fees, to use, copy, modify, and distribute this
* software and to distribute modified versions of this software for any
* purpose, provided that the above copyright notice and the following
* two paragraphs appear in all copies of this software.
*
* In no event shall Sven Wischnowsky or the Zsh Development Group be liable
* to any party for direct, indirect, special, incidental, or consequential
* damages arising out of the use of this software and its documentation,
* even if Sven Wischnowsky and the Zsh Development Group have been advised of
* the possibility of such damage.
*
* Sven Wischnowsky and the Zsh Development Group specifically disclaim any
* warranties, including, but not limited to, the implied warranties of
* merchantability and fitness for a particular purpose. The software
* provided hereunder is on an "as is" basis, and Sven Wischnowsky and the
* Zsh Development Group have no obligation to provide maintenance,
* support, updates, enhancements, or modifications.
*
*/
#include "complete.mdh"
#include "compmatch.pro"
/*
* This compares two cpattern lists and returns non-zero if they are
* equal (N.B. opposite sense to usual *cmp()).
*
* The old version of this didn't worry about whether the lists
* were the same length. This one does. It's hard to see how
* that can be wrong even if it's unnecessary.
*/
/**/
static int
cpatterns_same(Cpattern a, Cpattern b)
{
while (a) {
if (!b)
return 0;
if (a->tp != b->tp)
return 0;
switch (a->tp) {
case CPAT_CCLASS:
case CPAT_NCLASS:
case CPAT_EQUIV:
/*
* Patterns can actually match the same even if
* the range strings don't compare differently, but
* I don't think we need to handle that subtlety.
*/
if (strcmp(a->u.str, b->u.str) != 0)
return 0;
break;
case CPAT_CHAR:
if (a->u.chr != b->u.chr)
return 0;
break;
default:
/* here to silence compiler */
break;
}
a = a->next;
b = b->next;
}
return !b;
}
/* This compares two cmatchers and returns non-zero if they are equal. */
/**/
static int
cmatchers_same(Cmatcher a, Cmatcher b)
{
return (a == b ||
(a->flags == b->flags &&
a->llen == b->llen && a->wlen == b->wlen &&
(!a->llen || cpatterns_same(a->line, b->line)) &&
(a->wlen <= 0 || cpatterns_same(a->word, b->word)) &&
(!(a->flags & (CMF_LEFT | CMF_RIGHT)) ||
(a->lalen == b->lalen && a->ralen == b->ralen &&
(!a->lalen || cpatterns_same(a->left, b->left)) &&
(!a->ralen || cpatterns_same(a->right, b->right))))));
}
/* Add the given matchers to the bmatcher list. */
/**/
mod_export void
add_bmatchers(Cmatcher m)
{
Cmlist old = bmatchers, *q = &bmatchers, n;
for (; m; m = m->next) {
if ((!m->flags && m->wlen > 0 && m->llen > 0) ||
(m->flags == CMF_RIGHT && m->wlen < 0 && !m->llen)) {
*q = n = (Cmlist) zhalloc(sizeof(struct cmlist));
n->matcher = m;
q = &(n->next);
}
}
*q = old;
}
/* This is called when the matchers in the mstack have changed to
* ensure that the bmatchers list contains no matchers not in mstack. */
/**/
mod_export void
update_bmatchers(void)
{
Cmlist p = bmatchers, ms;
Cmatcher mp;
int t;
while (p) {
t = 0;
for (ms = mstack; ms && !t; ms = ms->next)
for (mp = ms->matcher; mp && !t; mp = mp->next)
t = cmatchers_same(mp, p->matcher);
p = p->next;
if (!t) {
bmatchers = p;
}
}
}
/* This returns a new Cline structure. */
/**/
Cline
get_cline(char *l, int ll, char *w, int wl, char *o, int ol, int fl)
{
Cline r;
/* Prefer to take it from the buffer list (freecl), if there
* is none, allocate a new one. */
if ((r = freecl))
freecl = r->next;
else
r = (Cline) zhalloc(sizeof(*r));
r->next = NULL;
r->line = l; r->llen = ll;
r->word = w; r->wlen = wl;
DPUTS(wl > 0 && !*w, "Bad word");
r->orig = o; r->olen = ol;
r->slen = 0;
r->flags = fl;
r->prefix = r->suffix = NULL;
r->min = r->max = 0;
return r;
}
/* This frees a cline list. */
/**/
void
free_cline(Cline l)
{
Cline n;
while (l) {
n = l->next;
l->next = freecl;
freecl = l;
free_cline(l->prefix);
free_cline(l->suffix);
l = n;
}
}
/* Copy a cline list. */
/**/
Cline
cp_cline(Cline l, int deep)
{
Cline r = NULL, *p = &r, t, lp = NULL;
while (l) {
if ((t = freecl))
freecl = t->next;
else
t = (Cline) zhalloc(sizeof(*t));
memcpy(t, l, sizeof(*t));
if (deep) {
if (t->prefix)
t->prefix = cp_cline(t->prefix, 0);
if (t->suffix)
t->suffix = cp_cline(t->suffix, 0);
}
*p = lp = t;
p = &(t->next);
l = l->next;
}
*p = NULL;
return r;
}
/* Calculate the length of a cline and its sub-lists. */
/**/
int
cline_sublen(Cline l)
{
int len = ((l->flags & CLF_LINE) ? l->llen : l->wlen);
if (l->olen && !((l->flags & CLF_SUF) ? l->suffix : l->prefix))
len += l->olen;
else {
Cline p;
for (p = l->prefix; p; p = p->next)
len += ((p->flags & CLF_LINE) ? p->llen : p->wlen);
for (p = l->suffix; p; p = p->next)
len += ((p->flags & CLF_LINE) ? p->llen : p->wlen);
}
return len;
}
/* Set the lengths in the cline lists. */
/**/
void
cline_setlens(Cline l, int both)
{
while (l) {
l->min = cline_sublen(l);
if (both)
l->max = l->min;
l = l->next;
}
}
/* This sets the CLF_MATCHED flag in the given clines. */
/**/
void
cline_matched(Cline p)
{
while (p) {
p->flags |= CLF_MATCHED;
cline_matched(p->prefix);
cline_matched(p->suffix);
p = p->next;
}
}
/* This reverts the order of the elements of the given cline list and
* returns a pointer to the new head. */
/**/
Cline
revert_cline(Cline p)
{
Cline r = NULL, n;
while (p) {
n = p->next;
p->next = r;
r = p;
p = n;
}
return r;
}
/* Global variables used during matching: a char-buffer for the string to
* use for the match, and two cline lists for the two levels we use. */
/**/
char *matchbuf = NULL;
/**/
int matchbuflen = 0, matchbufadded;
/**/
Cline matchparts, matchlastpart;
/**/
Cline matchsubs, matchlastsub;
/* This initialises the variables above. */
/**/
static void
start_match(void)
{
if (matchbuf)
*matchbuf = '\0';
matchbufadded = 0;
matchparts = matchlastpart = matchsubs = matchlastsub = NULL;
}
/* This aborts a matching, freeing the cline lists build. */
/**/
static void
abort_match(void)
{
free_cline(matchparts);
free_cline(matchsubs);
matchparts = matchsubs = NULL;
}
/* This adds a new string in the static char buffer. The arguments are
* the matcher used (if any), the strings from the line and the word
* and the length of the string from the word. The last argument is
* non-zero if we are matching a suffix (where the given string has to
* be prepended to the contents of the buffer). */
/**/
static void
add_match_str(Cmatcher m, char *l, char *w, int wl, int sfx)
{
/* Get the string and length to insert: either from the line
* or from the match. */
if (m && (m->flags & CMF_LINE)) {
wl = m->llen; w = l;
}
if (wl) {
/* Probably resize the buffer. */
if (matchbuflen - matchbufadded <= wl) {
int blen = matchbuflen + wl + 20;
char *buf;
buf = (char *) zalloc(blen);
if (matchbuf) {
memcpy(buf, matchbuf, matchbuflen);
zfree(matchbuf, matchbuflen);
}
#ifdef DEBUG
else {
DPUTS(matchbuflen, "matchbuflen with no matchbuf");
}
#endif
matchbuf = buf;
matchbuflen = blen;
}
/* Insert the string. */
if (sfx) {
memmove(matchbuf + wl, matchbuf, matchbufadded + 1);
memcpy(matchbuf, w, wl);
} else
memcpy(matchbuf + matchbufadded, w, wl);
matchbufadded += wl;
matchbuf[matchbufadded] = '\0';
}
}
/* This adds a cline for a word-part during matching. Arguments are the
* matcher used, pointers to the line and word strings for the anchor,
* a pointer to the original line string for the whole part, the string
* before (or after) the anchor that has not yet been added, the length
* of the line-string for that, and a flag saying if we are matching a
* suffix. */
/**/
static void
add_match_part(Cmatcher m, char *l, char *w, int wl,
char *o, int ol, char *s, int sl, int osl, int sfx)
{
Cline p, lp, lprem;
/* If the anchors are equal, we keep only one. */
if (l && !strncmp(l, w, wl))
l = NULL;
/*
* Split the new part into parts and turn the last one into a
* `suffix' if we have a left anchor---don't do this if the last one
* came from a right anchor before the end of the part we're
* splitting.
*/
p = bld_parts(s, sl, osl, &lp, &lprem);
if (lprem && m && (m->flags & CMF_LEFT)) {
lprem->flags |= CLF_SUF;
lprem->suffix = lprem->prefix;
lprem->prefix = NULL;
}
/* cline lists for suffixes are sorted from back to front, so we have
* to revert the list we got. */
if (sfx)
p = revert_cline(lp = p);
/* Now add the sub-clines we already had. */
if (matchsubs) {
if (sfx) {
Cline q;
if ((q = lp->prefix)) {
while (q->next)
q = q->next;
q->next = matchsubs;
} else
lp->prefix = matchsubs;
matchlastsub->next = NULL;
} else {
matchlastsub->next = p->prefix;
p->prefix = matchsubs;
}
matchsubs = matchlastsub = NULL;
}
/* Store the arguments in the last part-cline. */
if (lp->llen || lp->wlen) {
lp->next = get_cline(l, wl, w, wl, o, ol, CLF_NEW);
lp = lp->next;
} else {
lp->line = l; lp->llen = wl;
lp->word = w; lp->wlen = wl;
DPUTS(wl > 0 && !*w, "Bad word");
lp->orig = o; lp->olen = ol;
}
if (o || ol)
lp->flags &= ~CLF_NEW;
/* Finally, put the new parts on the list. */
if (matchlastpart)
matchlastpart->next = p;
else
matchparts = p;
matchlastpart = lp;
}
/* This adds a new sub-cline. Arguments are the matcher and the strings from
* the line and the word. */
/**/
static void
add_match_sub(Cmatcher m, char *l, int ll, char *w, int wl)
{
int flags;
Cline n;
/* Check if we are interested only in the string from the line. */
if (m && (m->flags & CMF_LINE)) {
w = NULL; wl = 0;
flags = CLF_LINE;
} else
flags = 0;
/* And add the cline. */
if (wl || ll) {
Cline p, lp;
if ((p = n = bld_parts(w, wl, ll, &lp, NULL)) && n != lp) {
for (; p->next != lp; p = p->next);
if (matchsubs) {
matchlastsub->next = n->prefix;
n->prefix = matchsubs;
}
matchsubs = matchlastsub = lp;
if (matchlastpart)
matchlastpart->next = n;
else
matchparts = n;
p->next = 0;
matchlastpart = p;
} else {
n = get_cline(l, ll, w, wl, NULL, 0,
flags | ((m && m->wlen == -2) ? CLF_SKIP : 0));
if (matchlastsub)
matchlastsub->next = n;
else
matchsubs = n;
matchlastsub = n;
}
}
}
/* This tests if the string from the line l matches the word w. In *bpp
* the offset for the brace is returned, in rwlp the length of the
* matched prefix or suffix, not including the stuff before or after
* the last anchor is given. When sfx is non-zero matching is done from
* the ends of the strings backward, if test is zero, the global variables
* above are used to build the string for the match and the cline. If
* part is non-zero, we are satisfied if only a part of the line-string
* is used (and return the length used). */
/**/
int
match_str(char *l, char *w, Brinfo *bpp, int bc, int *rwlp,
const int sfx, int test, int part)
{
/* How many characters from the line string and from the word string are
* yet to be matched. */
int ll = strlen(l), lw = strlen(w);
/* Number of characters from the line string and word string matched. */
int il = 0, iw = 0;
/* How many characters were matched exactly in the line and in the word. */
int exact = 0, wexact = 0;
int he = 0;
int bslash;
char *ow;
Cmlist ms; /* loop variable */
Cmatcher mp, lm = NULL;
Brinfo bp = NULL;
const int obc = bc;
const int ind = (sfx ? -1 : 0);
const int add = (sfx ? -1 : 1);
const int original_ll = ll, original_lw = lw;
/* INVARIANT: il+ll == original_ll; iw+lw == original_lw */
if (!test) {
start_match();
bp = *bpp;
}
/* Adjust the pointers and get the values for subscripting and
* incrementing. */
if (sfx) {
l += ll; w += lw;
}
/* ow will always point to the beginning (or end) of that sub-string
* in w that wasn't put in the match-variables yet. */
ow = w;
/* If the brace is at the beginning, we have to treat it now. */
if (!test && bp && bc >= bp->pos) {
bp->curpos = bc;
bp = bp->next;
}
/*** This once was: `while (ll && lw)', but then ignored characters at
* the end were not, well, ignored. */
while (ll) {
/* Hm, we unconditionally first tried the matchers for the cases
* where the beginnings of the line and word patterns match the
* same character(s).
* But first testing if the characters are the same is sooo
* much faster...
* Maybe it's OK to make same-character matching be preferred in
* recursive calls. At least, it /seems/ to work.
*
* Let's try.
*
* Update: this once tested `test && ...' to check for exact
* character matches only in recursive calls. But then one
* can't complete `nom<TAB>' to `nomatch' with a match spec
* of `B:[nN][oO]=' because that will eat the `no'.
* But that would break completion of strings like `nonomatch'
* because the `B:[nN][oO]=' doesn't match the second `no'.
* For this we added the code below that can remove already
* accepted exact characters and try again, preferring match
* specs.
*/
bslash = 0;
if (!sfx && lw && (!part || test) &&
(l[ind] == w[ind] ||
(bslash = (lw > 1 && w[ind] == '\\' && w[ind+1] == l[0])))) {
/* No matcher could be used, but the strings have the same
* character here, skip over it. */
l += add; w += (bslash ? (add + add) : add);
il++; iw += 1 + bslash;
ll--; lw -= 1 + bslash;
bc++;
exact++;
wexact += 1 + bslash;
if (!test)
while (bp && bc >= (useqbr ? bp->qpos : bp->pos)) {
bp->curpos = matchbufadded + (w - ow) + obc;
bp = bp->next;
}
lm = NULL;
he = 0;
continue;
}
retry:
/* First try the matchers. Err... see above. */
for (mp = NULL, ms = mstack; !mp && ms; ms = ms->next) {
for (mp = ms->matcher; mp; mp = mp->next) {
if ((lm && lm == mp) ||
((original_ll == ll || original_lw == lw) &&
(test == 1 || (test && !mp->left && !mp->right)) &&
mp->wlen < 0))
/* If we were called recursively, don't use `*' patterns
* at the beginning (avoiding infinite recursion). */
continue;
if (mp->wlen < 0) {
/* `*'-pattern. */
/*
* Similar to the identically-named variable in the 'else'
* block.
*/
int t;
/*
* 1 iff the anchor and the word are on the same side of
* the line pattern; that is: if either
* - the anchor is on the left and we are matching
* a prefix; or
* - the anchor is on the right and we are matching
* a suffix.
*/
int both;
/*
* Offset from the line pattern pointer ('l') to the start
* of the line pattern.
*/
int loff;
/*
* Offset from the line pattern pointer ('l') to the start
* of the anchor.
*/
int aoff;
/*
* The length of the line pattern.
*/
int llen;
/*
* The length of the anchor.
*
* SEE: ap; aol, aop
*/
int alen;
/*
* ### Related to 'zoff', which was removed in 2016.
*/
int moff;
/*
* ### These two are related.
*
* ### They may have a relation similar to that of lw/iw
* ### (q.v.), at least during the 'for' loop. They may be
* ### overloaded/repurposed after it.
*/
int ct, ict;
/*
* The length of the OTHER anchor: the left anchor when
* we're anchored on the right, and of the right anchor
* when we're anchored on the left.
*/
int aol;
/*
* LOST: Documentation comment. Last seen 10 years ago in
* the temporal lobe. Reward promised for its safe return.
* Contact zsh-workers@zsh.org.
*/
char *tp;
/*
* Temporary variable. Used as temporary storage for a
*
* {
* () {
* local foo="$foo"
* foo[1]=bar
* ...
* }
* (use original $foo here)
* }
*
* operation. Similar to savw.
*/
char savl = 0;
/*
* The anchor on this end.
*/
Cpattern ap;
/*
* The anchor on the other end.
*/
Cpattern aop;
/* This is for `*' patterns, first initialise some
* local variables. */
llen = mp->llen;
if (mp->flags & CMF_LEFT) {
alen = mp->lalen; aol = mp->ralen;
} else {
alen = mp->ralen; aol = mp->lalen;
}
/* Give up if we don't have enough characters for the
* line-string and the anchor, or for both anchors in
* the case of the trial completion word. */
if (ll < llen + alen || lw < alen + aol)
continue;
if (mp->flags & CMF_LEFT) {
ap = mp->left; moff = alen; aop = mp->right;
if (sfx) {
both = 0; loff = -llen; aoff = -(llen + alen);
} else {
both = 1; loff = alen; aoff = 0;
}
} else {
ap = mp->right; moff = 0; aop = mp->left;
if (sfx) {
both = 1; loff = -(llen + alen); aoff = -alen;
} else {
both = 0; loff = 0; aoff = llen;
}
}
/* Try to match the line pattern and the anchor. */
if (!pattern_match(mp->line, l + loff, NULL, NULL))
continue;
if (ap) {
if (!pattern_match(ap, l + aoff, NULL, NULL) ||
(both &&
(!pattern_match(ap, w + aoff, NULL, NULL) ||
(aol && aol <= aoff + iw &&
!pattern_match(aop, w + aoff - aol,
NULL, NULL)) ||
!match_parts(l + aoff, w + aoff, alen, part))))
continue;
} else if (!both || ((mp->flags & CMF_INTER) ?
((mp->flags & CMF_LINE) ? iw : il) :
(il || iw)))
continue;
/* Fine, now we call ourselves recursively to find the
* string matched by the `*'. */
if (sfx && (savl = l[-(llen + alen)]))
l[-(llen + alen)] = '\0';
for (t = 0, tp = w, ct = 0, ict = lw - alen + 1;
ict;
tp += add, ct++, ict--) {
if ((both &&
(!ap || !test ||
!pattern_match(ap, tp + aoff, NULL, NULL) ||
(aol && aol <= aoff + ct + iw &&
!pattern_match(aop, tp + aoff - aol,
NULL, NULL)))) ||
(!both &&
pattern_match(ap, tp - moff, NULL, NULL) &&
(!aol || (aol <= iw + ct - moff &&
pattern_match(aop, tp - moff - aol,
NULL, NULL))) &&
(mp->wlen == -1 ||
match_parts(l + aoff , tp - moff,
alen, part)))) {
if (!both && mp->wlen == -1 &&
!match_parts(l + aoff , tp - moff, alen, part))
break;
if (sfx) {
/* Call ourselves recursively with the
* anchor removed. */
char savw;
if ((savw = tp[-alen]))
tp[-alen] = '\0';
t = match_str(l - ll, w - lw,
NULL, 0, NULL, sfx, 2, part);
if (savw)
tp[-alen] = savw;
} else
t = match_str(l + llen + moff, tp + moff,
NULL, 0, NULL, sfx, 1, part);
if (t || (mp->wlen == -1 && !both))
break;
}
}
ict = ct;
if (sfx && savl)
l[-(llen + alen)] = savl;
/* Have we found a position in w where the rest of l
* matches? */
if (!t)
continue;
/* Yes, add the strings and clines if this is a
* top-level call. */
if (!test && (!he || (llen + alen))) {
char *op, *lp, *map, *wap, *wmp;
int ol;
if (sfx) {
op = w; ol = ow - w; lp = l - (llen + alen);
map = tp - alen;
if (mp->flags & CMF_LEFT) {
wap = tp - alen; wmp = tp;
} else {
wap = w - alen; wmp = tp - alen;
}
} else {
op = ow; ol = w - ow; lp = l;
map = ow;
if (mp->flags & CMF_LEFT) {
wap = w; wmp = w + alen;
} else {
wap = tp; wmp = ow;
}
}
/* If the matcher says that we are only interested
* in the line pattern, we just add that and the
* anchor and the string not added yet. Otherwise
* we add a new part. */
if (mp->flags & CMF_LINE) {
add_match_str(NULL, NULL, op, ol, sfx);
add_match_str(NULL, NULL, lp, llen + alen, sfx);
add_match_sub(NULL, NULL, ol, op, ol);
add_match_sub(NULL, NULL, llen + alen,
lp, llen + alen);
} else if (sfx) {
add_match_str(NULL, NULL,
map, ct + ol + alen, sfx);
add_match_part(mp, l + aoff, wap, alen,
l + loff, llen, op, ol, ol, sfx);
add_match_sub(NULL, NULL, 0, wmp, ct);
} else {
add_match_str(NULL, NULL,
map, ct + ol + alen, sfx);
if (both) {
add_match_sub(NULL, NULL, ol, op, ol);
ol = -1;
} else
ct += ol;
add_match_part(mp, l + aoff, wap, alen,
l + loff, llen, wmp, ct, ol, sfx);
}
}
/* Now skip over the matched portion and the anchor. */
llen += alen; alen += ict;
if (sfx) {
l -= llen; w -= alen;
} else {
l += llen; w += alen;
}
ll -= llen; il += llen;
lw -= alen; iw += alen;
bc += llen;
exact = 0;
if (!test) {
int bpc;
while (bp &&
bc >= (bpc = (useqbr ? bp->qpos : bp->pos))) {
bp->curpos = matchbufadded + bpc - bc + obc;
bp = bp->next;
}
}
ow = w;
if (!llen && !alen) {
lm = mp;
if (he) {
/* Signal the outer for loop to continue. */
mp = NULL;
}
else
he = 1;
} else {
lm = NULL; he = 0;
}
break;
} else if (ll >= mp->llen && lw >= mp->wlen) {
/* Non-`*'-pattern. */
/*
* Similar to the identically-named variable in the 'if'
* block.
*/
int t = 1;
char *tl, *tw;
int tll, tlw, til, tiw;
/* We do this only if the line- and word-substrings
* are not equal. */
if (!(mp->flags & (CMF_LEFT | CMF_RIGHT)) &&
mp->llen == mp->wlen &&
!(sfx ? strncmp(l - mp->llen, w - mp->wlen, mp->llen) :
strncmp(l, w, mp->llen)))
continue;
/* Using local variables to make the following
* independent of whether we match a prefix or a
* suffix. */
if (sfx) {
tl = l - mp->llen; tw = w - mp->wlen;
til = ll - mp->llen; tiw = lw - mp->wlen;
tll = il + mp->llen; tlw = iw + mp->wlen;
} else {
tl = l; tw = w;
til = il; tiw = iw;
tll = ll; tlw = lw;
}
if (mp->flags & CMF_LEFT) {
/* Try to match the left anchor, if any. */
if (til < mp->lalen || tiw < mp->lalen + mp->ralen)
continue;
else if (mp->left)
t = pattern_match(mp->left, tl - mp->lalen,
NULL, NULL) &&
pattern_match(mp->left, tw - mp->lalen,
NULL, NULL) &&
(!mp->ralen ||
pattern_match(mp->right,
tw - mp->lalen - mp->ralen,
NULL, NULL));
else
t = (!sfx && !((mp->flags & CMF_INTER) ?
((mp->flags & CMF_LINE) ? iw : il) :
(il || iw)));
}
if (mp->flags & CMF_RIGHT) {
/* Try to match the right anchor, if any. */
if (tll < mp->llen + mp->ralen ||
tlw < mp->wlen + mp->ralen + mp->lalen)
continue;
else if (mp->right)
t = pattern_match(mp->right,
/* tl + mp->llen - mp->ralen, */
tl + mp->llen,
NULL, NULL) &&
pattern_match(mp->right,
/* tw + mp->wlen - mp->ralen, */
tw + mp->wlen,
NULL, NULL) &&
(!mp->lalen ||
pattern_match(mp->left, tw + mp->wlen -
mp->ralen - mp->lalen,
NULL, NULL));
else
t = (sfx && !((mp->flags & CMF_INTER) ?
((mp->flags & CMF_LINE) ? iw : il) :
(il || iw)));
}
/* Now try to match the line and word patterns. */
if (!t || !pattern_match(mp->line, tl, mp->word, tw))
continue;
/* Probably add the matched strings. */
if (!test) {
if (sfx)
{
if (ow >= w)
add_match_str(NULL, NULL, w, ow - w, sfx);
}
else
{
if (w >= ow)
add_match_str(NULL, NULL, ow, w - ow, sfx);
}
add_match_str(mp, tl, tw, mp->wlen, sfx);
if (sfx)
{
if (ow >= w)
add_match_sub(NULL, NULL, 0, w, ow - w);
}
else
{
if (w >= ow)
add_match_sub(NULL, NULL, 0, ow, w - ow);
}
add_match_sub(mp, tl, mp->llen, tw, mp->wlen);
}
if (sfx) {
l = tl; w = tw;
} else {
l += mp->llen; w += mp->wlen;
}
il += mp->llen; iw += mp->wlen;
ll -= mp->llen; lw -= mp->wlen;
bc += mp->llen;
exact = 0;
if (!test) {
int bpc;
while (bp &&
bc >= (bpc = (useqbr ? bp->qpos : bp->pos))) {
bp->curpos = matchbufadded + bpc - bc + obc;
bp = bp->next;
}
}
ow = w;
lm = NULL;
he = 0;
break;
}
}
}
if (mp)
continue;
/* Same code as at the beginning, used in top-level calls. */
bslash = 0;
if ((!test || sfx) && lw &&
(l[ind] == w[ind] ||
(bslash = (lw > 1 && w[ind] == '\\' && w[ind+1] == l[0])))) {
/* No matcher could be used, but the strings have the same
* character here, skip over it. */
l += add; w += (bslash ? (add + add ) : add);
il++; iw += 1 + bslash;
ll--; lw -= 1 + bslash;
bc++;
if (!test)
while (bp && bc >= (useqbr ? bp->qpos : bp->pos)) {
bp->curpos = matchbufadded + (sfx ? (ow - w) : (w - ow)) + obc;
bp = bp->next;
}
lm = NULL;
he = 0;
} else {
if (!lw)
break;
if (exact && !part) {
/* If we just accepted some characters directly (at the
* beginning of the loop) and now can't match any further,
* we go back to before those characters and try again,
* preferring match specs this time. */
il -= exact; iw -= wexact;
ll += exact; lw += wexact;
bc -= exact;
l -= add * exact; w -= add * wexact;
exact = wexact = 0;
goto retry;
}
/* No matcher and different characters: l does not match w. */
if (test)
return 0;
abort_match();
return -1;
}
}
/* If this is a recursive call, we just return if l matched w or not. */
if (test)
return (part || !ll);
/* In top-level calls, if ll is non-zero (unmatched portion in l),
* we have to free the collected clines. */
if (!part && ll) {
abort_match();
return -1;
}
if (rwlp)
*rwlp = iw - (sfx ? ow - w : w - ow);
/* If we matched a suffix, the anchors stored in the top-clines
* will be in the wrong clines: shifted by one. Adjust this. */
if (sfx && matchparts) {
Cline t, tn, s;
if (matchparts->prefix || matchparts->suffix) {
t = get_cline(NULL, 0, NULL, 0, NULL, 0, 0);
t->next = matchparts;
if (matchparts->prefix)
t->prefix = (Cline) 1;
else
t->suffix = (Cline) 1;
matchparts = t;
}
for (t = matchparts; (tn = t->next); t = tn) {
s = (tn->prefix ? tn->prefix : tn->suffix);
if (t->suffix)
t->suffix = s;
else
t->prefix = s;
}
t->prefix = t->suffix = NULL;
}
/* Finally, return the number of matched characters. */
*bpp = bp;
return (part ? il : iw);
}
/* Wrapper for match_str(), only for a certain length and only doing
* the test. */
/**/
static int
match_parts(char *l, char *w, int n, int part)
{
char lsav = l[n], wsav = w[n];
int ret;
if (lsav)
l[n] = '\0';
if (wsav)
w[n] = '\0';
ret = match_str(l, w, NULL, 0, NULL, 0, 1, part);
if (lsav)
l[n] = lsav;
if (wsav)
w[n] = wsav;
return ret;
}
/* Check if the word w is matched by the strings in pfx and sfx (the prefix
* and the suffix from the line) or the pattern cp. In clp a cline list for
* w is returned.
* qu is non-zero if the words has to be quoted before processed any
* further: the value 2 indicates file quoting.
* bpl and bsl are used to report the positions where the brace-strings in
* the prefix and the suffix have to be re-inserted if this match is inserted
* in the line.
* The return value is the string to use as a completion or NULL if the prefix
* and the suffix don't match the word w. */
/**/
mod_export char *
comp_match(char *pfx, char *sfx, char *w, Patprog cp, Cline *clp, int qu,
Brinfo *bpl, int bcp, Brinfo *bsl, int bcs, int *exact)
{
char *r = NULL;
int onoerrs = noerrs;
if (cp) {
/* We have a globcomplete-like pattern, just use that. */
int wl;
char *teststr;
r = w;
if (!qu) {
/*
* If we're not quoting the strings, that means they're
* already quoted (?) and so when we test them against
* a pattern we have to remove the quotes else we will
* end up trying to match against the quote characters.
*
* Almost certainly this fails in some complicated cases
* but it should catch the basic ones.
*/
teststr = dupstring(r);
tokenize(teststr);
noerrs = 1;
if (parse_subst_string(teststr))
teststr = r;
else {
remnulargs(teststr);
untokenize(teststr);
}
noerrs = onoerrs;
} else
teststr = r;
if (!pattry(cp, teststr))
return NULL;
r = (qu == 2 ? tildequote(r, 0) : multiquote(r, !qu));
/* We still break it into parts here, trying to build a sensible
* cline list for these matches, too. */
w = dupstring(w);
wl = strlen(w);
*clp = bld_parts(w, wl, wl, NULL, NULL);
*exact = 0;
} else {
Cline pli, plil;
int mpl, rpl, wl;
w = (qu == 2 ? tildequote(w, 0) : multiquote(w, !qu));
wl = strlen(w);
/* Always try to match the prefix. */
useqbr = qu;
if ((mpl = match_str(pfx, w, bpl, bcp, &rpl, 0, 0, 0)) < 0)
return NULL;
if (sfx && *sfx) {
int wpl = matchbufadded, msl, rsl;
VARARR(char, wpfx, wpl);
Cline mli, mlil;
/* We also have a suffix to match, so first save the
* contents of the global matching variables. */
memcpy(wpfx, matchbuf, wpl);
if (matchsubs) {
Cline tmp = get_cline(NULL, 0, NULL, 0, NULL, 0, 0);
tmp->prefix = matchsubs;
if (matchlastpart)
matchlastpart->next = tmp;
else
matchparts = tmp;
}
pli = matchparts;
plil = matchlastpart;
/* The try to match the suffix. */
if ((msl = match_str(sfx, w + mpl, bsl, bcs, &rsl, 1, 0, 0)) < 0) {
free_cline(pli);
return NULL;
}
/* Matched, so add the string in the middle and the saved
* string for the prefix, and build a combined cline list
* for the prefix and the suffix. */
if (matchsubs) {
Cline tmp = get_cline(NULL, 0, NULL, 0, NULL, 0, CLF_SUF);
tmp->suffix = matchsubs;
if (matchlastpart)
matchlastpart->next = tmp;
else
matchparts = tmp;
}
add_match_str(NULL, NULL, w + rpl, wl - rpl - rsl, 1);
add_match_str(NULL, NULL, wpfx, wpl, 1);
mli = bld_parts(w + rpl, wl - rpl - rsl,
(mpl - rpl) + (msl - rsl), &mlil, NULL);
mlil->flags |= CLF_MID;
mlil->slen = msl - rsl;
mlil->next = revert_cline(matchparts);
if (plil)
plil->next = mli;
else
pli = mli;
} else {
/* Only a prefix, add the string and a part-cline for it. */
add_match_str(NULL, NULL, w + rpl, wl - rpl, 0);
add_match_part(NULL, NULL, NULL, 0, NULL, 0, w + rpl, wl - rpl,
mpl - rpl, 0);
pli = matchparts;
}
r = dupstring(matchbuf ? matchbuf : "");
*clp = pli;
/* Test if the string built is equal to the one from the line. */
if (sfx && *sfx) {
int pl = strlen(pfx);
*exact = (!strncmp(pfx, w, pl) && !strcmp(sfx, w + pl));
} else
*exact = !strcmp(pfx, w);
}
if (!qu)
hasunqu = 1;
return r;
}
/*
* Guts of a single pattern for pattern_match().
* Return non-zero if match successful.
* If the class was an equivalence, return 1 + the index into
* the equivalence class (see pattern.c for how this is calculated).
*/
/**/
mod_export convchar_t
pattern_match1(Cpattern p, convchar_t c, int *mtp)
{
convchar_t ind;
*mtp = 0;
switch (p->tp) {
case CPAT_CCLASS:
return PATMATCHRANGE(p->u.str, CONVCAST(c), NULL, NULL);
case CPAT_NCLASS:
return !PATMATCHRANGE(p->u.str, CONVCAST(c), NULL, NULL);
case CPAT_EQUIV:
if (PATMATCHRANGE(p->u.str, CONVCAST(c), &ind, mtp))
return ind + 1;
else
return 0;
case CPAT_ANY:
return 1;
case CPAT_CHAR:
return (p->u.chr == c);
default:
DPUTS(1, "bad matcher pattern type");
return 0;
}
}
/*
* Use an equivalence to deduce the line character from the word, or
* vice versa. (If vice versa, then "line" and "word" are reversed
* in what follows. The logic is symmetric.)
* lp is the line pattern.
* wind is the index returned by a pattern match on the word pattern,
* with type wmtp.
* wchr is the word character.
* Return CHR_INVALID if no matching character, else the character.
*
* Only makes sense if lp->tp == CPAT_EQUIV and the (unseen) word
* pattern also has that type.
*/
/**/
mod_export convchar_t
pattern_match_equivalence(Cpattern lp, convchar_t wind, int wmtp,
convchar_t wchr)
{
convchar_t lchr;
int lmtp;
if (!PATMATCHINDEX(lp->u.str, wind-1, &lchr, &lmtp)) {
/*
* No equivalent. No possible match; give up.
*/
return CHR_INVALID;
}
/*
* If we matched an exact character rather than a range
* type, return it.
*/
if (lchr != CHR_INVALID)
return lchr;
/*
* Check the match types. We may want a case-changed
* version of the word character.
*/
if (wmtp == PP_UPPER && lmtp == PP_LOWER)
return ZC_tolower(wchr);
else if (wmtp == PP_LOWER && lmtp == PP_UPPER)
return ZC_toupper(wchr);
else if (wmtp == lmtp) {
/*
* Be lenient and allow identical replacements
* for character classes, although in fact this
* doesn't give special functionality for equivalence
* classes.
*/
return wchr;
} else {
/*
* Non-matching generic types; this can't work.
*/
return CHR_INVALID;
}
}
/*
* Check if the given pattern matches the given string.
* p is either an anchor or line pattern and string;
* wp and wsc are word (candidate) pattern and string
*
* Check that characters match for {...} classes by comparing positions in the
* strings.
*
* prestrict is a chain of patterns at least as long
* as the line string. In this case we are still assembling the line at
* newline (which has been allocated but doesn't yet contain anything useful)
* and must continue to do so as we go along; prestrict gives
* restrictions on the line character to be applied along side the other
* patterns. In the simple case a restriction is a character to be put
* in place; otherwise it is a set of possible characters and we have to
* deduce an actual matching character. Note prestrict is never an
* equivalence class. In extreme cases we can't deduce a unique
* character; then the match fails.
*
* If prestrict is not NULL, s will be NULL.
*/
/**/
static int
pattern_match_restrict(Cpattern p, Cpattern wp, convchar_t *wsc, int wsclen,
Cpattern prestrict, ZLE_STRING_T new_line)
{
convchar_t c;
convchar_t ind, wind;
int mt, wmt;
while (p && wp && wsclen && prestrict) {
/* First test the word character */
wind = pattern_match1(wp, *wsc, &wmt);
if (!wind)
return 0;
/*
* Now the line character; deal with the case where
* we don't yet have it, only a restriction on it.
*/
if (prestrict->tp == CPAT_CHAR) {
/*
* Easy case: restricted to an exact character on
* the line. Proceed as normal.
*/
c = prestrict->u.chr;
} else {
if (p->tp == CPAT_CHAR) {
/*
* Normal line pattern is an exact character: as
* long as this matches prestrict, we can proceed
* as usual.
*/
c = p->u.chr;
} else if (p->tp == CPAT_EQUIV) {
/*
* An equivalence, so we can deduce the character
* backwards from the word pattern and see if it
* matches prestrict.
*/
if ((c = pattern_match_equivalence(p, wind, wmt, *wsc)) ==
CHR_INVALID)
return 0;
} else {
/*
* Not an equivalence, so that means we must match
* the word (not just the word pattern), so grab it
* and make sure it fulfills our needs. I think.
* Not 100% sure about that, but what else can
* we do? We haven't actually been passed a string
* from the command line.
*/
c = *wsc;
}
/* Character so deduced must match the restriction. */
if (!pattern_match1(prestrict, c, &mt))
return 0;
}
/*
* If either is "?", they match each other; no further tests.
* Apply this even if the character wasn't convertible;
* there's no point trying to be clever in that case.
*/
if (p->tp != CPAT_ANY || wp->tp != CPAT_ANY)
{
ind = pattern_match1(p, c, &mt);
if (!ind)
return 0;
if (ind != wind)
return 0;
if (mt != wmt) {
/*
* Special case if matching lower vs. upper or
* vice versa. The transformed characters must match.
* We don't need to check the transformation is
* the appropriate one for each character separately,
* since that was done in pattern_match1(), so just
* compare lower-cased versions of both.
*/
if ((mt == PP_LOWER || mt == PP_UPPER) &&
(wmt == PP_LOWER || wmt == PP_UPPER)) {
if (ZC_tolower(c) != ZC_tolower(*wsc))
return 0;
} else {
/* Other different classes can't match. */
return 0;
}
}
}
/* We need to assemble the line */
*new_line++ = (ZLE_CHAR_T)c;
prestrict = prestrict->next;
wsc++;
wsclen--;
p = p->next;
wp = wp->next;
}
while (p && prestrict) {
/*
* As above, but with even less info to go on.
* (Can this happen?) At least handle the cases where
* one of our patterns has given us a specific character.
*/
if (prestrict->tp == CPAT_CHAR) {
c = prestrict->u.chr;
} else {
if (p->tp == CPAT_CHAR) {
c = p->u.chr;
} else {
/*
* OK. Here we are in a function with just a line
* pattern and another pattern to restrict the
* characters that can go on the line, and no actual
* characters. We're matching two patterns against
* one another to generate a character to insert.
* This is a bit too psychedelic, so I'm going to
* bail out now. See you on the ground.
*/
return 0;
}
if (!pattern_match1(prestrict, c, &mt))
return 0;
}
if (!pattern_match1(p, c, &mt))
return 0;
p = p->next;
*new_line++ = (ZLE_CHAR_T)c;
prestrict = prestrict->next;
}
if (prestrict) {
/* Restriction with nothing to match */
return 0;
}
while (wp && wsclen) {
/* No funny business when we only have the word pattern. */
if (!pattern_match1(wp, *wsc, &wmt))
return 0;
wp = wp->next;
wsc++;
wsclen--;
}
return 1;
}
/*
* The usual version of pattern matching, without the line string
* being handled by restriction.
*
* Check if the given pattern matches the given string.
* p and s are either anchor or line pattern and string;
* wp and ws are word (candidate) pattern and string
*
* If only one pattern is given, we just check if characters match.
* If both line and word are given, we check that characters match
* for {...} classes by comparing positions in the strings.
*
* Patterns and strings are always passed in pairs, so it is enough
* to check for non-NULL wp. p should always be present.
*/
/**/
mod_export int
pattern_match(Cpattern p, char *s, Cpattern wp, char *ws)
{
convchar_t c, wc;
convchar_t ind, wind;
int len = 0, wlen = 0, mt, wmt;
while (p && wp && *s && *ws) {
/* First test the word character */
wc = unmeta_one(ws, &wlen);
wind = pattern_match1(wp, wc, &wmt);
if (!wind)
return 0;
/*
* Now the line character.
*/
c = unmeta_one(s, &len);
/*
* If either is "?", they match each other; no further tests.
* Apply this even if the character wasn't convertible;
* there's no point trying to be clever in that case.
*/
if (p->tp != CPAT_ANY || wp->tp != CPAT_ANY)
{
ind = pattern_match1(p, c, &mt);
if (!ind)
return 0;
if (ind != wind)
return 0;
if (mt != wmt) {
/*
* Special case if matching lower vs. upper or
* vice versa. The transformed characters must match.
* We don't need to check the transformation is
* the appropriate one for each character separately,
* since that was done in pattern_match1(), so just
* compare lower-cased versions of both.
*/
if ((mt == PP_LOWER || mt == PP_UPPER) &&
(wmt == PP_LOWER || wmt == PP_UPPER)) {
if (ZC_tolower(c) != ZC_tolower(wc))
return 0;
} else {
/* Other different classes can't match. */
return 0;
}
}
}
s += len;
ws += wlen;
p = p->next;
wp = wp->next;
}
while (p && *s) {
c = unmeta_one(s, &len);
if (!pattern_match1(p, c, &mt))
return 0;
p = p->next;
s += len;
}
while (wp && *ws) {
wc = unmeta_one(ws, &wlen);
if (!pattern_match1(wp, wc, &wmt))
return 0;
wp = wp->next;
ws += wlen;
}
return 1;
}
/* This splits the given string into a list of cline structs, separated
* at those places where one of the anchors of an `*' pattern was found.
* plen gives the number of characters on the line that matched this
* string.
*
* In *lp, if lp is not NULL, we return a pointer to the last cline struct we
* build.
*
* In *lprem, if lprem is not NULL, we return a pointer to the last
* cline struct we build if it came from the remainder of the
* line rather than from a right anchor match, else NULL.
*/
/**/
Cline
bld_parts(char *str, int len, int plen, Cline *lp, Cline *lprem)
{
Cline ret = NULL, *q = &ret, n = NULL;
Cmlist ms;
Cmatcher mp;
int t, op = plen;
char *p = str, *os = str;
while (len) {
for (t = 0, ms = bmatchers; ms && !t; ms = ms->next) {
mp = ms->matcher;
if (mp && mp->flags == CMF_RIGHT && mp->wlen < 0 && mp->ralen &&
!mp->llen && len >= mp->ralen && (str - os) >= mp->lalen &&
pattern_match(mp->right, str, NULL, NULL) &&
(!mp->lalen ||
((str - os) >= mp->lalen &&
pattern_match(mp->left, str - mp->lalen, NULL, NULL)))) {
int olen = str - p, llen;
/* We found an anchor, create a new cline. The NEW flag
* is set if the characters before the anchor were not
* on the line. */
*q = n = get_cline(NULL, mp->ralen, str, mp->ralen, NULL, 0,
((plen <= 0) ? CLF_NEW : 0));
/* If there were any characters before the anchor, add
* them as a cline struct. */
if (p != str) {
llen = (op < 0 ? 0 : op);
if (llen > olen)
llen = olen;
n->prefix = get_cline(NULL, llen, p, olen, NULL, 0, 0);
}
q = &(n->next);
str += mp->ralen; len -= mp->ralen;
plen -= mp->ralen;
op -= olen;
p = str;
t = 1;
}
}
if (!t) {
/* No anchor was found here, skip. */
str++; len--;
plen--;
}
}
/* This is the cline struct for the remaining string at the end. */
if (p != str) {
int olen = str - p, llen = (op < 0 ? 0 : op);
*q = n = get_cline(NULL, 0, NULL, 0, NULL, 0, (plen <= 0 ? CLF_NEW : 0));
if (llen > olen)
llen = olen;
n->prefix = get_cline(NULL, llen, p, olen, NULL, 0, 0);
if (lprem)
*lprem = n;
}
else if (!ret) {
*q = n =
get_cline(NULL, 0, NULL, 0, NULL, 0, (plen <= 0 ? CLF_NEW : 0));
if (lprem)
*lprem = n;
} else if (lprem) {
*lprem = NULL;
}
if (n)
n->next = NULL;
if (lp)
*lp = n;
return ret;
}
/*
* This builds all the possible line patterns for the pattern pat in the
* buffer line. Then we test if this line matches the string given by
* wlen and word.
*
* The matcher ) wpat, containing pattern that matched previously
* mp gives ) lpat, containing the pattern for line we build
* line is the line we are assembling; it is initially empty
* mword is a string that matched wpat before
* word is string that we try to match now
*
* The return value is the length of the string matched in the word, it
* is zero if we couldn't build a line that matches the word.
*/
/**/
static int
bld_line(Cmatcher mp, ZLE_STRING_T line, char *mword, char *word,
int wlen, int sfx)
{
Cpattern lpat = mp->line;
Cpattern wpat = mp->word;
Cpattern curgenpat;
Cmlist ms;
int llen, rl, l;
convchar_t convchr, *wordcp;
VARARR(convchar_t, wordchars, wlen);
VARARR(struct cpattern, genpatarr, mp->llen);
/*
* We may need to start the "word" array from the end. This
* is much easier if we convert it to an array of (possibly wide)
* characters.
*/
MB_METACHARINIT();
for (l = wlen, wordcp = wordchars; l; l--) {
int charlen = MB_METACHARLENCONV(word, &convchr);
#ifdef MULTIBYTE_SUPPORT
if (convchr == WEOF)
convchr = (*word == Meta) ? word[1] ^ 32 : *word;
#endif
*wordcp++ = convchr;
word += charlen;
}
/*
* Loop over all characters. At this stage, line is an empty
* space of length llen (not counting the null byte) which we assemble as
* we go along.
*
* However, first we need to know what characters can appear at each
* point in the line. For this we assemble an list genpatarr of the
* same length as the line. (It's convenient to store this as an
* array but it's linked as a list, too.) If there are equivalences
* we use mword to derive the equivalent character; when we've
* reached the end of mword, equivalences are treated just like
* ordinary character classes. For character classes we just attach
* the class to the genpatarr list and apply it as a restriction
* when we finally match the line against the set of matchers.
*/
curgenpat = genpatarr;
MB_METACHARINIT();
while (lpat) {
convchar_t wchr, wind;
int wmtp, mwordlen;
/*
* If the line pattern is an equivalence, query wpat to find the
* word part of the equivalence. If we don't find one we don't try
* equivalencing but use lpat as an ordinary match. (It's not
* entirely clear to me this is the correct behaviour on a
* failed character match within the equivalence, but that was
* the behaviour of the old logic that this replaces.)
*/
if (lpat->tp == CPAT_EQUIV && wpat && *mword) {
mwordlen = MB_METACHARLENCONV(mword, &wchr);
wind = pattern_match1(wpat, wchr, &wmtp);
wpat = wpat->next;
mword += mwordlen;
} else
wind = 0;
if (wind) {
/*
* Successful match for word side of equivalence.
* Find the line equivalent.
*/
convchar_t lchr;
if ((lchr = pattern_match_equivalence(lpat, wind, wmtp, wchr))
== CHR_INVALID) {
/*
* No equivalent. No possible match; give up.
*/
return 0;
}
/*
* We now have an exact character to match,
* so make up a pattern element for it.
*/
curgenpat->tp = CPAT_CHAR;
curgenpat->u.chr = lchr;
} else {
/*
* Not an equivalence class, so we just keep the
* test in the lpat as it is.
*/
curgenpat->tp = lpat->tp;
if (lpat->tp == CPAT_CHAR)
curgenpat->u.chr = lpat->u.chr;
else if (lpat->tp != CPAT_ANY) {
/*
* The string isn't modified and is only needed in calls from
* this function, so we don't even need to copy it.
*/
curgenpat->u.str = lpat->u.str;
}
}
lpat = lpat->next;
/*
* This linked list is defined above as an array.
* We could get away with just keeping it as an array
* and passing it down as such, but that's a bit icky
* since the generic linkage of Cpatterns is as a linked
* list and we should keep our local memory management
* problems to ourselvess.
*/
if (lpat)
curgenpat->next = curgenpat+1;
else
curgenpat->next = NULL;
curgenpat++;
}
/*
* We now know how to match the word with the line patterns; let's
* see if it does. We will use the information in curgenpat if we
* are successful to work out what character goes on the line. This
* is a bit hairy, as in "the Yeti is a creature that is a bit
* hairy".
*/
llen = mp->llen;
rl = 0;
if (sfx)
{
/*
* We need to work backwards from the end of both the
* word and the line strings.
*/
wordcp = wordchars + wlen;
/*
* We construct the line from the end.
*/
line += llen;
curgenpat = genpatarr + llen;
} else {
wordcp = wordchars;
curgenpat = genpatarr;
}
/* we now reuse mp, lpat, wpat for the global matchers */
MB_METACHARINIT();
while (llen && wlen) {
int wmtp;
convchar_t *wp;
Cpattern tmpgenpat;
if (sfx) {
wp = wordcp - 1;
tmpgenpat = curgenpat - 1;
} else {
tmpgenpat = curgenpat;
wp = wordcp;
}
if (pattern_match1(tmpgenpat, *wp, &wmtp))
{
convchar_t lchr;
/*
* We can match the line character directly with the word
* character. If the line character is a fixed one,
* keep it, since we went to all that trouble above,
* else if it's generic, keep the word character,
* since we have no choice.
*/
if (tmpgenpat->tp == CPAT_CHAR)
lchr = tmpgenpat->u.chr;
else
lchr = *wp;
if (sfx)
*--line = lchr;
else
*line++ = lchr;
llen--;
wlen--;
rl++;
if (sfx) {
wordcp = wp;
curgenpat = tmpgenpat;
} else {
if (llen)
curgenpat++;
wordcp++;
}
}
else
{
ZLE_CHAR_T *lp;
/*
* Need to loop over pattern matchers.
*/
for (ms = bmatchers; ms; ms = ms->next) {
mp = ms->matcher;
/*
* This is the nightmare case: we have line and
* and word matchers and some pattern which restricts
* the value on the line without us knowing exactly
* what it is. Dispatch to the special function
* for that.
*/
if (mp && !mp->flags && mp->wlen <= wlen &&
mp->llen <= llen)
{
lp = line;
wp = wordcp;
tmpgenpat = curgenpat;
if (sfx) {
lp -= mp->llen;
wp -= mp->wlen;
tmpgenpat -= mp->llen;
}
if (pattern_match_restrict(mp->line, mp->word, wp,
wlen - (wp - wordchars),
tmpgenpat, lp)) {
/*
* Matched: advance over as many characters
* of the patterns and strings as
* we've done matches.
*/
if (sfx) {
line = lp;
wordcp = wp;
curgenpat = tmpgenpat;
} else {
line += mp->llen;
wordcp += mp->wlen;
curgenpat += mp->llen;
}
llen -= mp->llen;
wlen -= mp->wlen;
rl += mp->wlen;
break;
}
}
}
if (!ms)
return 0; /* Didn't match, give up */
}
}
if (!llen) {
/* Unmatched portion in the line built, return matched length. */
return rl;
}
return 0;
}
/* This builds a string that may be put on the line that fully matches the
* given strings. The return value is NULL if no such string could be built
* or that string in local static memory, dup it. */
/**/
static char *
join_strs(int la, char *sa, int lb, char *sb)
{
static char *rs = NULL;
static int rl = 0;
Cmlist ms;
Cmatcher mp;
int t, bl;
/** rr is the remaining length already allocated in rs */
int rr = rl;
/*
* convlen is the length we need for the string converted to
* char * (possibly multibyte).
*/
int convlen;
char *rp = rs;
while (la && lb) {
if (*sa != *sb) {
/* Different characters, try the matchers. */
for (t = 0, ms = bmatchers; ms && !t; ms = ms->next) {
mp = ms->matcher;
if (mp && !mp->flags && mp->wlen > 0 && mp->llen > 0 &&
mp->wlen <= la && mp->wlen <= lb) {
/* The pattern has no anchors and the word
* pattern fits, try it. */
if ((t = pattern_match(mp->word, sa, NULL, NULL)) ||
pattern_match(mp->word, sb, NULL, NULL)) {
/* It matched one of the strings, t says which one. */
VARARR(ZLE_CHAR_T, line, mp->llen);
char **ap, **bp;
int *alp, *blp;
if (t) {
ap = &sa;
alp = &la;
bp = &sb;
blp = &lb;
} else {
ap = &sb;
alp = &lb;
bp = &sa;
blp = &la;
}
/* Now try to build a string that matches the other
* string. */
if ((bl = bld_line(mp, line, *ap, *bp, *blp, 0))) {
/* Found one, put it into the return string. */
char *convstr =
zlelineasstring(line, mp->llen, 0, &convlen,
NULL, 0);
if (rr <= convlen) {
char *or = rs;
int alloclen = (convlen > 20) ? convlen : 20;
rs = realloc(rs, (rl += alloclen));
rr += alloclen;
rp += rs - or;
}
memcpy(rp, convstr, convlen);
rp += convlen;
rr -= convlen;
/* HERE: multibyte chars */
*ap += mp->wlen;
*alp -= mp->wlen;
*bp += bl;
*blp -= bl;
t = 1;
free(convstr);
} else
t = 0;
}
}
}
if (!t)
break;
} else {
/* Same character, just take it. */
if (rr <= 1 /* HERE charlen */) {
char *or = rs;
rs = realloc(rs, (rl += 20));
rr += 20;
rp += rs - or;
}
/* HERE: multibyte char */
*rp++ = *sa;
rr--;
sa++;
sb++;
la--;
lb--;
}
}
if (la || lb)
return NULL;
*rp = '\0';
return rs;
}
/*
* This compares the anchors stored in two top-level clines.
* It returns 1 if the anchors are the same, 2 if they are
* compatible (and have been combined in "o"), 0 otherwise.
*/
/**/
static int
cmp_anchors(Cline o, Cline n, int join)
{
int line = 0;
char *j;
/* First try the exact strings. */
if ((!(o->flags & CLF_LINE) && o->wlen == n->wlen &&
(!o->word || !strncmp(o->word, n->word, o->wlen))) ||
(line = ((!o->line && !n->line && !o->wlen && !n->wlen) ||
(o->llen == n->llen && o->line && n->line &&
!strncmp(o->line, n->line, o->llen))))) {
if (line) {
o->flags |= CLF_LINE;
o->word = NULL;
o->wlen = 0;
}
return 1;
}
/* Didn't work, try to build a string matching both anchors. */
if (join && !(o->flags & CLF_JOIN) && o->word && n->word &&
(j = join_strs(o->wlen, o->word, n->wlen, n->word))) {
o->flags |= CLF_JOIN;
o->wlen = strlen(j);
o->word = dupstring(j);
return 2;
}
return 0;
}
/* Below is the code to join two cline lists. This struct is used to walk
* through a sub-list. */
typedef struct cmdata *Cmdata;
struct cmdata {
Cline cl, pcl;
char *str, *astr;
int len, alen, olen, line;
};
/* This is used to ensure that a cmdata struct contains usable data.
* The return value is non-zero if we reached the end. */
static int
check_cmdata(Cmdata md, int sfx)
{
/* We will use the str and len fields to contain the next sub-string
* in the list. If len is zero, we have to use the next cline. */
if (!md->len) {
/* If there is none, we reached the end. */
if (!md->cl)
return 1;
/* Otherwise, get the string. Only the line-string or both.
* We also have to adjust the pointer if this is for a suffix. */
if (md->cl->flags & CLF_LINE) {
md->line = 1;
md->len = md->cl->llen;
md->str = md->cl->line;
} else {
md->line = 0;
md->len = md->olen = md->cl->wlen;
/* HERE: multibyte */
if ((md->str = md->cl->word) && sfx)
md->str += md->len;
md->alen = md->cl->llen;
/* HERE: multibyte */
if ((md->astr = md->cl->line) && sfx)
md->astr += md->alen;
}
md->pcl = md->cl;
md->cl = md->cl->next;
}
return 0;
}
/* This puts the not-yet-matched portion back into the last cline and
* returns that. */
static Cline
undo_cmdata(Cmdata md, int sfx)
{
Cline r = md->pcl;
if (md->line) {
r->word = NULL;
r->wlen = 0;
r->flags |= CLF_LINE;
r->llen = md->len;
/* HERE: multibyte */
r->line = md->str - (sfx ? md->len : 0);
} else if (md->len != md->olen) {
r->wlen = md->len;
/* HERE: multibyte */
r->word = md->str - (sfx ? md->len : 0);
DPUTS(r->wlen > 0 && !*r->word, "Bad word");
}
return r;
}
/* This tries to build a string matching a sub-string in a sub-cline
* that could not be matched otherwise. */
static Cline
join_sub(Cmdata md, char *str, int len, int *mlen, int sfx, int join)
{
if (!check_cmdata(md, sfx)) {
char *ow = str, *nw = md->str;
int ol = len, nl = md->len;
Cmlist ms;
Cmatcher mp;
int t;
if (sfx) {
ow += ol; nw += nl;
}
for (t = 0, ms = bmatchers; ms && !t; ms = ms->next) {
mp = ms->matcher;
/* We use only those patterns that match a non-empty
* string in both the line and the word and that have
* no anchors. */
if (mp && !mp->flags && mp->wlen > 0 && mp->llen > 0) {
/* We first test, if the old string matches already the
* new one. */
if (mp->llen <= ol && mp->wlen <= nl &&
pattern_match(mp->line, ow - (sfx ? mp->llen : 0),
mp->word, nw - (sfx ? mp->wlen : 0))) {
/* It did, update the contents of the cmdata struct
* and return a cline for the matched part. */
if (sfx)
md->str -= mp->wlen;
else
md->str += mp->wlen;
md->len -= mp->wlen;
*mlen = mp->llen;
return get_cline(NULL, 0, ow - (sfx ? mp->llen : 0),
mp->llen, NULL, 0, 0);
}
/* Otherwise we will try to build a string that matches
* both strings. But try the pattern only if the word-
* pattern matches one of the strings. */
if (join && mp->wlen <= ol && mp->wlen <= nl &&
((t = pattern_match(mp->word, ow - (sfx ? mp->wlen : 0),
NULL, NULL)) ||
pattern_match(mp->word, nw - (sfx ? mp->wlen : 0),
NULL, NULL))) {
VARARR(ZLE_CHAR_T, line, mp->llen);
int bl;
char *mw;
/* Then build all the possible lines and see
* if one of them matches the other string. */
/* HERE: they're multibyte */
if (t)
mw = ow - (sfx ? mp->wlen : 0);
else
mw = nw - (sfx ? mp->wlen : 0);
if ((bl = bld_line(mp, line, mw, (t ? nw : ow),
(t ? nl : ol), sfx))) {
/* Yep, one of the lines matched the other
* string. */
/* HERE: multibyte characters */
if (t) {
ol = mp->wlen; nl = bl;
} else {
ol = bl; nl = mp->wlen;
}
if (sfx)
md->str -= nl;
else
md->str += nl;
md->len -= nl;
*mlen = ol;
return get_cline(NULL, 0,
zlelineasstring(line, mp->llen,
0, NULL, NULL, 1),
mp->llen, NULL, 0, CLF_JOIN);
}
}
}
}
}
return NULL;
}
/* This is used to match a sub-string in a sub-cline. The length of the
* matched portion is returned. This tests only for exact equality. */
static int
sub_match(Cmdata md, char *str, int len, int sfx)
{
int ret = 0, l, ind, add;
char *p, *q;
#ifdef MULTIBYTE_SUPPORT
int fulllen = len;
char *fullstr = str;
mbstate_t mbs;
#endif
if (sfx) {
str += len;
ind = -1; add = -1;
} else {
ind = 0; add = 1;
}
/* str and len describe the old string, in md we have the new one. */
while (len) {
if (check_cmdata(md, sfx))
return ret;
/*
* Look for a common prefix. If we do include metafied
* characters, at this stage we still need the overall length
* including Meta's as separate characters.
*/
for (l = 0, p = str, q = md->str;
l < len && l < md->len && p[ind] == q[ind];
l++, p += add, q += add) {}
/* Make sure we don't end in the middle of a Meta sequence. */
if (add == 1) {
if (l && p[-1] == Meta)
l--;
} else {
if (l && ((l < len && p[-1] == Meta)
|| (l < md->len && q[-1] == Meta)))
l--;
}
#ifdef MULTIBYTE_SUPPORT
/*
* Make sure we don't end in the middle of a multibyte character.
* Don't need to do this if the match ended at the start
* of the original string.
*
* Let q be the match point we've found.
*/
q = sfx ? str - l : str + l;
if (q != fullstr) {
memset(&mbs, 0, sizeof mbs);
/*
* Otherwise read characters from the start of the original
* string until we reach or pass the match point. This
* is rather inefficient, but in general only reading
* the full string can keep track of where we are in
* a character. With a prefix we could be more efficient,
* but it's difficult with a suffix where the match point
* moves backwards.
*/
for (p = fullstr; p < fullstr + fulllen; ) {
wchar_t wc;
/*
* ret must, in fact, be set by the current logic,
* but gcc doesn't realise (at least some versions don't).
*/
size_t cnt = MB_INVALID;
int diff;
char *p2;
/*
* Because the string is metafied, we need to
* assembled wide characters a byte at a time.
*/
for (p2 = p; p2 < fullstr + fulllen; p2++) {
char curchar = (*p2 == Meta) ? (*++p2 ^ 32) : *p2;
cnt = mbrtowc(&wc, &curchar, 1, &mbs);
/* Continue while character is incomplete. */
if (cnt != MB_INCOMPLETE)
break;
}
if (cnt == MB_INVALID || cnt == MB_INCOMPLETE) {
/* not a valid character, give up test */
break;
}
/* increment p2 for last byte read */
diff = ++p2 - q;
if (diff == 0) {
/*
* Prefix or suffix matches at end of multbyte character,
* so OK.
*/
break;
} else if (diff > 0) {
/*
* The prefix or suffix finishes in the middle
* of a character. Shorten it until it doesn't.
*/
if (sfx) {
/*
* We need to remove the trailing part of
* the character from the suffix.
*/
l -= diff;
} else {
/*
* We need to remove the initial part of
* the character from the prefix.
*/
l -= (q - p);
}
break;
}
/* Advance over full character */
p = p2;
}
}
#endif
if (l) {
/* There was a common prefix, use it. */
md->len -= l; len -= l;
if (sfx) {
md->str -= l; str -= l;
} else {
md->str += l; str += l;
}
ret += l;
} else if (md->line || md->len != md->olen || !md->astr)
return ret;
else {
/* We still have the line string to try. */
md->line = 1;
md->len = md->alen;
md->str = md->astr;
}
}
return ret;
}
/* This is used to build a common prefix or suffix sub-list. If requested
* it returns the unmatched cline lists in orest and nrest. */
/**/
static void
join_psfx(Cline ot, Cline nt, Cline *orest, Cline *nrest, int sfx)
{
Cline p = NULL, o, n;
struct cmdata md, omd;
char **sstr = NULL;
int len, join = 0, line = 0, *slen = NULL;
if (sfx) {
o = ot->suffix; n = nt->suffix;
} else {
o = ot->prefix; n = nt->prefix;
}
if (!o) {
if (orest)
*orest = NULL;
if (nrest)
*nrest = n;
if (n && n->wlen)
ot->flags |= CLF_MISS;
return;
}
if (!n) {
if (sfx)
ot->suffix = NULL;
else
ot->prefix = NULL;
if (orest)
*orest = o;
else
free_cline(o);
if (nrest)
*nrest = NULL;
return;
}
md.cl = n;
md.len = 0;
/* Walk through the old list. */
while (o) {
join = 0;
memcpy(&omd, &md, sizeof(struct cmdata));
/* We first get the length of the prefix equal in both strings. */
if (o->flags & CLF_LINE) {
if ((len = sub_match(&md, o->line, o->llen, sfx)) != o->llen) {
join = 1; line = 1; slen = &(o->llen); sstr = &(o->line);
}
} else if ((len = sub_match(&md, o->word, o->wlen, sfx)) != o->wlen) {
if (o->line) {
memcpy(&md, &omd, sizeof(struct cmdata));
o->flags |= CLF_LINE | CLF_DIFF;
continue;
}
o->llen = o->llen - ot->slen;
join = 1; line = 0; slen = &(o->wlen); sstr = &(o->word);
}
if (join) {
/* There is a rest that is different in the two lists,
* we try to build a new cline matching both strings. */
Cline joinl;
int jlen;
if ((joinl = join_sub(&md, *sstr + len, *slen - len,
&jlen, sfx, !(o->flags & CLF_JOIN)))) {
/* We have one, insert it into the list. */
joinl->flags |= CLF_DIFF;
if (len + jlen != *slen) {
Cline rest;
rest = get_cline(NULL, 0, *sstr + (sfx ? 0 : len + jlen),
*slen - len - jlen, NULL, 0, 0);
rest->next = o->next;
joinl->next = rest;
} else
joinl->next = o->next;
if (len) {
if (sfx)
*sstr += *slen - len;
*slen = len;
o->next = joinl;
} else {
o->next = NULL;
free_cline(o);
if (p)
p->next = joinl;
else if (sfx)
ot->suffix = joinl;
else
ot->prefix = joinl;
}
o = joinl;
join = 0;
}
}
if (join) {
/* We couldn't build a cline for a common string, so we
* cut the list here. */
if (len) {
Cline r;
if (orest) {
if (line)
r = get_cline(o->line + len, *slen - len,
NULL, 0, NULL, 0, o->flags);
else
r = get_cline(NULL, 0, o->word + len, *slen - len,
NULL, 0, o->flags);
r->next = o->next;
*orest = r;
*slen = len;
o->next = NULL;
} else {
if (sfx)
*sstr += *slen - len;
*slen = len;
free_cline(o->next);
o->next = NULL;
}
} else {
if (p)
p->next = NULL;
else if (sfx)
ot->suffix = NULL;
else
ot->prefix = NULL;
if (orest)
*orest = o;
else
free_cline(o);
}
if (!orest || !nrest)
ot->flags |= CLF_MISS;
if (nrest)
*nrest = undo_cmdata(&md, sfx);
return;
}
p = o;
o = o->next;
}
if (md.len || md.cl)
ot->flags |= CLF_MISS;
if (orest)
*orest = NULL;
if (nrest)
*nrest = undo_cmdata(&md, sfx);
}
/* This builds the common prefix and suffix for a mid-cline -- the one
* describing the place where the prefix and the suffix meet. */
/**/
static void
join_mid(Cline o, Cline n)
{
if (o->flags & CLF_JOIN) {
/* The JOIN flag is set in the old cline struct if it was
* already joined with another one. In this case the suffix
* field contains the suffix from previous calls. */
Cline nr;
join_psfx(o, n, NULL, &nr, 0);
n->suffix = revert_cline(nr);
join_psfx(o, n, NULL, NULL, 1);
} else {
/* This is the first time for both structs, so the prefix field
* contains the whole sub-list. */
Cline or, nr;
o->flags |= CLF_JOIN;
/* We let us give both rests and use them as the suffixes. */
join_psfx(o, n, &or, &nr, 0);
if (or)
or->llen = (o->slen > or->wlen ? or->wlen : o->slen);
o->suffix = revert_cline(or);
n->suffix = revert_cline(nr);
join_psfx(o, n, NULL, NULL, 1);
}
n->suffix = NULL;
}
/* This turns the sequence of anchor cline structs from b to e into a
* prefix sequence, puts it before the prefix of e and then tries to
* join that with the prefix of a.
* This is needed if some matches had a anchor match spec and others
* didn't. */
/**/
static int
sub_join(Cline a, Cline b, Cline e, int anew)
{
if (!e->suffix && a->prefix) {
Cline op = e->prefix, n = NULL, *p = &n, t, ca;
int min = 0, max = 0;
for (; b != e; b = b->next) {
if ((*p = t = b->prefix)) {
while (t->next)
t = t->next;
p = &(t->next);
}
b->suffix = b->prefix = NULL;
b->flags &= ~CLF_SUF;
min += b->min;
max += b->max;
*p = b;
p = &(b->next);
}
*p = e->prefix;
ca = a->prefix;
while (n) {
e->prefix = cp_cline(n, 1);
a->prefix = cp_cline(ca, 1);
if (anew) {
int f = e->flags;
join_psfx(e, a, NULL, NULL, 0);
e->flags = f;
if (e->prefix)
return max - min;
} else {
int f = e->flags;
join_psfx(a, e, NULL, NULL, 0);
e->flags = f;
if (a->prefix)
return max - min;
}
min -= n->min;
if (n == op)
break;
n = n->next;
}
return max - min;
}
return 0;
}
/* This simplifies the cline list given as the first argument so that
* it also matches the second list. */
/**/
Cline
join_clines(Cline o, Cline n)
{
cline_setlens(n, 1);
/* First time called, just return the new list. On further invocations
* we will get it as the first argument. */
if (!o)
return n;
else {
Cline oo = o, nn = n, po = NULL, pn = NULL, x;
int diff;
/* Walk through the lists. */
while (o && n) {
/* If one of them describes a new part and the other one does
* not, synchronise them by searching an old part in the
* other list. */
if ((o->flags & CLF_NEW) && !(n->flags & CLF_NEW)) {
Cline t, tn;
for (t = o; (tn = t->next) &&
((tn->flags & CLF_NEW) || !cmp_anchors(tn, n, 0));
t = tn);
if (tn) {
diff = sub_join(n, o, tn, 1);
if (po)
po->next = tn;
else
oo = tn;
t->next = NULL;
free_cline(o);
x = o;
o = tn;
if (po && po->prefix && cmp_anchors(x, po, 0)) {
po->flags |= CLF_MISS;
po->max += diff;
} else {
o->flags |= CLF_MISS;
o->max += diff;
}
continue;
}
}
if (!(o->flags & CLF_NEW) && (n->flags & CLF_NEW)) {
Cline t, tn;
for (t = n; (tn = t->next) &&
((tn->flags & CLF_NEW) || !cmp_anchors(o, tn, 0));
t = tn);
if (tn) {
int of = o->flags & CLF_MISS;
diff = sub_join(o, n, tn, 0);
o->flags = (o->flags & ~CLF_MISS) | of;
if (po && po->prefix && cmp_anchors(n, pn, 0)) {
po->flags |= CLF_MISS;
po->max += diff;
} else {
o->flags |= CLF_MISS;
o->max += diff;
}
n = tn;
continue;
}
}
/* Almost the same as above, but for the case that they
* describe different types of parts (prefix, suffix, or mid). */
if ((o->flags & (CLF_SUF | CLF_MID)) !=
(n->flags & (CLF_SUF | CLF_MID))) {
Cline t, tn;
for (t = n;
(tn = t->next) &&
(tn->flags & (CLF_SUF | CLF_MID)) !=
(o->flags & (CLF_SUF | CLF_MID));
t = tn);
if (tn && cmp_anchors(o, tn, 1)) {
sub_join(o, n, tn, 0);
n = tn;
continue;
}
for (t = o;
(tn = t->next) &&
(tn->flags & (CLF_SUF | CLF_MID)) !=
(n->flags & (CLF_SUF | CLF_MID));
t = tn);
if (tn && cmp_anchors(tn, n, 1)) {
sub_join(n, o, tn, 1);
if (po)
po->next = tn;
else
oo = tn;
t->next = NULL;
free_cline(o);
o = tn;
continue;
}
if (o->flags & CLF_MID) {
o->flags = (o->flags & ~CLF_MID) | (n->flags & CLF_SUF);
if (n->flags & CLF_SUF) {
free_cline(o->prefix);
o->prefix = NULL;
} else {
free_cline(o->suffix);
o->suffix = NULL;
}
}
break;
}
/* Now see if they have matching anchors. If not, cut the list. */
if (!(o->flags & CLF_MID) && !cmp_anchors(o, n, 1)) {
Cline t, tn, tt, to = NULL;
for (t = n; (tn = t->next); t = tn)
if (!(tn->flags & CLF_NEW) && (tn->flags & CLF_SKIP)) {
for (tt = o; (to = tt->next); tt = to)
if (!(to->flags & CLF_NEW) && (to->flags & CLF_SKIP) &&
cmp_anchors(tn, to, 1))
break;
if (to)
break;
}
if (tn) {
if (po)
po->next = to;
else
oo = to;
o = to;
diff = sub_join(o, n, tn, 0);
o->flags |= CLF_MISS;
o->max += diff;
n = tn;
po = o;
o = o->next;
pn = n;
n = n->next;
continue;
} else {
for (t = o; (to = t->next); t = to)
if ((to->flags & CLF_SKIP) && cmp_anchors(n, to, 1))
break;
if (to) {
diff = sub_join(n, o, to, 1);
if (po)
po->next = to;
else
oo = to;
x = o;
o = to;
if (po && po->prefix && cmp_anchors(x, po, 0)) {
po->flags |= CLF_MISS;
po->max += diff;
} else {
o->flags |= CLF_MISS;
o->max += diff;
}
continue;
} else {
for (tt = NULL, t = n; (tn = t->next); t = tn) {
if (tn->flags & CLF_SKIP)
for (tt = o; (to = tt->next); tt = to)
if ((to->flags & CLF_SKIP) &&
cmp_anchors(tn, to, 1))
break;
if (to)
break;
}
if (to) {
diff = sub_join(n, o, to, 1);
if (po)
po->next = to;
else
oo = to;
x = o;
o = to;
if (po && po->prefix && cmp_anchors(x, po, 0)) {
po->flags |= CLF_MISS;
po->max += diff;
} else {
o->flags |= CLF_MISS;
o->max += diff;
}
continue;
} else {
for (tn = n; tn; tn = tn->next)
if ((tn->flags & CLF_NEW) ==
(o->flags & CLF_NEW) &&
cmp_anchors(tn, o, 1)) break;
if (tn) {
int of = o->flags & CLF_MISS;
if ((diff = sub_join(o, n, tn, 0))) {
o->flags = (o->flags & ~CLF_MISS) | of;
if (po && po->prefix) {
po->flags |= CLF_MISS;
po->max += diff;
}
else {
o->flags |= CLF_MISS;
o->max += diff;
}
}
n = tn;
po = o;
o = o->next;
pn = n;
n = n->next;
continue;
}
if (o->flags & CLF_SUF)
break;
o->word = o->line = o->orig = NULL;
o->wlen = 0;
free_cline(o->next);
o->next = NULL;
o->flags |= CLF_MISS;
}
}
}
}
/* Ok, they are equal, now copy the information about the
* original string if needed, calculate minimum and maximum
* lengths, and join the sub-lists. */
if (!o->orig && !o->olen) {
o->orig = n->orig;
o->olen = n->olen;
}
if (n->min < o->min)
o->min = n->min;
if (n->max > o->max)
o->max = n->max;
if (o->flags & CLF_MID)
join_mid(o, n);
else
join_psfx(o, n, NULL, NULL, (o->flags & CLF_SUF));
po = o;
o = o->next;
pn = n;
n = n->next;
}
/* Free the rest of the old list. */
if (o) {
if (po)
po->next = NULL;
else
oo = NULL;
free_cline(o);
}
free_cline(nn);
return oo;
}
}