From aa8e4a02904b3a1c4b3064eb7502d887f7de958b Mon Sep 17 00:00:00 2001 From: Peter Stephenson Date: Tue, 1 Aug 2023 14:32:55 +0100 Subject: [PATCH] 52008: Pattern bug with branches + exclusion Add tests. --- ChangeLog | 5 +++++ Src/pattern.c | 22 ++++++++++++++++++++-- Test/D02glob.ztst | 26 ++++++++++++++++++++++++++ 3 files changed, 51 insertions(+), 2 deletions(-) diff --git a/ChangeLog b/ChangeLog index 372092a32..8e6e3fb18 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,3 +1,8 @@ +2023-08-01 Peter Stephenson + + * 52008: Src/pattern.c, Test/D02glob.ztst: Fix bug with branches + in patterns followed by an exculsion, and add tests. + 2023-07-31 dana * github #100: HexorCatZ: Completion/Unix/Command/_qemu: diff --git a/Src/pattern.c b/Src/pattern.c index 3edda1772..2a1a514fb 100644 --- a/Src/pattern.c +++ b/Src/pattern.c @@ -2987,14 +2987,15 @@ patmatch(Upat prog) case P_EXCSYNC: /* See the P_EXCLUDE code below for where syncptr comes from */ { - unsigned char *syncptr; + unsigned char *syncstart, *syncptr, *ptr; Upat after; after = P_OPERAND(scan); DPUTS(!P_ISEXCLUDE(after), "BUG: EXCSYNC not followed by EXCLUDE."); DPUTS(!P_OPERAND(after)->p, "BUG: EXCSYNC not handled by EXCLUDE"); - syncptr = P_OPERAND(after)->p + (patinput - patinstart); + syncstart = P_OPERAND(after)->p; + syncptr = syncstart + (patinput - patinstart); /* * If we already matched from here, this time we fail. * See WBRANCH code for story about error count. @@ -3009,6 +3010,23 @@ patmatch(Upat prog) * failed anyway. */ *syncptr = errsfound + 1; + /* + * Because of backtracking, any match before this point + * can't apply to the current branch we're on so is now + * a failure --- this can happen if, on a previous + * branch, we initially marked a success before failing + * on a later part of the pattern after marking up the + * P_EXCSYNC (even an end anchor will have this effect). + * To make sure we record the current match point + * correctly, mark those down now. + * + * This might have side effects on the efficiency of + * pathological cases involving nested branches. To + * fix that we'd probably need to record matches on + * different branches separately. + */ + for (ptr = syncstart; ptr < syncptr; ++ptr) + *ptr = 0; } break; case P_EXCEND: diff --git a/Test/D02glob.ztst b/Test/D02glob.ztst index 850a535e5..4d88e5c27 100644 --- a/Test/D02glob.ztst +++ b/Test/D02glob.ztst @@ -817,6 +817,32 @@ *>*/glob.tmp/(flip|flop) *>*/glob.tmp/(flip|flop)/trailing/components +# The following set test an obscure problem with branches followed by +# exclusions that shows up when the exclusion matches against +# something other than the complete test string, hence the complicated +# double negative. + [[ ab = (|a*)~^(*b) ]] +0:Regression test for exclusion after branches: empty first alternative + + [[ ab = (b|a*)~^(*b) ]] +0:Regression test for exclusion after branches: non-empty first alternative + + [[ ab = (b*|a*)~^(*b) ]] +0:Regression test for exclusion after branches: full length first alternative + +# Corresponding tests where the exclusion should succeed, so the +# match fails. It's hard to know how to provoke bugs here... + [[ abc = (|a*)~^(*b) ]] +1:Regression test for exclusion after branches: failure case 1 + + [[ abc = (b|a*)~^(*b) ]] +1:Regression test for exclusion after branches: failure case 2 + + [[ abc = (b*|a*)~^(*b) ]] +1:Regression test for exclusion after branches: failure case 3 + +# Careful: extendedglob off from this point. + unsetopt extendedglob print -r -- ${(*)=${(@s.+.):-A+B}/(#b)(?)/-${(L)match[1]} ${match[1]}} 0:the '*' qualfier enables extended_glob for pattern matching