1
0
mirror of git://git.code.sf.net/p/zsh/code synced 2024-09-27 14:30:46 +02:00
zsh/Test/V07pcre.ztst
Peter Stephenson 74aff4106a 41542: Fix pcre logic for extracting matched parentheses.
When we went off the end of the array but measured the length
implicitly, we got lucky before.  After 41308 we were looking up lengths
in stale memory.

Rename some variables, clean up the logic, be easier to understand.

Add tests.
2017-08-14 11:17:48 +01:00

179 lines
4.4 KiB
Plaintext

%prep
if ! zmodload -F zsh/pcre C:pcre-match 2>/dev/null
then
ZTST_unimplemented="the zsh/pcre module is not available"
return 0
fi
# Load the rest of the builtins
zmodload zsh/pcre
setopt rematch_pcre
# Find a UTF-8 locale.
setopt multibyte
# Don't let LC_* override our choice of locale.
unset -m LC_\*
mb_ok=
langs=(en_{US,GB}.{UTF-,utf}8 en.UTF-8
$(locale -a 2>/dev/null | egrep 'utf8|UTF-8'))
for LANG in $langs; do
if [[ é = ? ]]; then
mb_ok=1
break;
fi
done
if [[ -z $mb_ok ]]; then
ZTST_unimplemented="no UTF-8 locale or multibyte mode is not implemented"
else
print -u $ZTST_fd Testing PCRE multibyte with locale $LANG
mkdir multibyte.tmp && cd multibyte.tmp
fi
%test
[[ 'foo→bar' =~ .([^[:ascii:]]). ]]
print $MATCH
print $match[1]
0:Basic non-ASCII regexp matching
>o→b
>→
unset match mend
s=$'\u00a0'
[[ $s =~ '^.$' ]] && print OK
[[ A${s}B =~ .(.). && $match[1] == $s ]] && print OK
[[ A${s}${s}B =~ A([^[:ascii:]]*)B && $mend[1] == 3 ]] && print OK
unset s
0:Raw IMETA characters in input string
>OK
>OK
>OK
[[ foo =~ f.+ ]] ; print $?
[[ foo =~ x.+ ]] ; print $?
[[ ! foo =~ f.+ ]] ; print $?
[[ ! foo =~ x.+ ]] ; print $?
[[ foo =~ f.+ && bar =~ b.+ ]] ; print $?
[[ foo =~ x.+ && bar =~ b.+ ]] ; print $?
[[ foo =~ f.+ && bar =~ x.+ ]] ; print $?
[[ ! foo =~ f.+ && bar =~ b.+ ]] ; print $?
[[ foo =~ f.+ && ! bar =~ b.+ ]] ; print $?
[[ ! ( foo =~ f.+ && bar =~ b.+ ) ]] ; print $?
[[ ! foo =~ x.+ && bar =~ b.+ ]] ; print $?
[[ foo =~ x.+ && ! bar =~ b.+ ]] ; print $?
[[ ! ( foo =~ x.+ && bar =~ b.+ ) ]] ; print $?
0:Regex result inversion detection
>0
>1
>1
>0
>0
>1
>1
>1
>1
>1
>0
>1
>0
# Note that PCRE_ANCHORED only means anchored at the start
# Also note that we don't unset MATCH/match on failed match (and it's an
# open issue as to whether or not we should)
pcre_compile '.(→.)'
pcre_match foo→bar
print $? $MATCH $match ; unset MATCH match
pcre_match foo.bar
print $? $MATCH $match ; unset MATCH match
pcre_match foo†bar
print $? $MATCH $match ; unset MATCH match
pcre_match foo→†ar
print $? $MATCH $match ; unset MATCH match
pcre_study
pcre_match foo→bar
print $? $MATCH $match ; unset MATCH match
pcre_compile -a '.(→.)'
pcre_match foo→bar
print $? $MATCH $match ; unset MATCH match
pcre_match o→bar
print $? $MATCH $match ; unset MATCH match
pcre_match o→b
print $? $MATCH $match ; unset MATCH match
pcre_compile 'x.(→.)'
pcre_match xo→t
print $? $MATCH $match ; unset MATCH match
pcre_match Xo→t
print $? $MATCH $match ; unset MATCH match
pcre_compile -i 'x.(→.)'
pcre_match xo→t
print $? $MATCH $match ; unset MATCH match
pcre_match Xo→t
print $? $MATCH $match ; unset MATCH match
0:pcre_compile interface testing: basic, anchored & case-insensitive
>0 o→b →b
>1
>1
>0 o→† →†
>0 o→b →b
>1
>0 o→b →b
>0 o→b →b
>0 xo→t →t
>1
>0 xo→t →t
>0 Xo→t →t
string="The following zip codes: 78884 90210 99513"
pcre_compile -m "\d{5}"
pcre_match -b -- $string && print "$MATCH; ZPCRE_OP: $ZPCRE_OP"
pcre_match -b -n $ZPCRE_OP[(w)2] -- $string || print failed
print "$MATCH; ZPCRE_OP: $ZPCRE_OP"
0:pcre_match -b and pcre_match -n
>78884; ZPCRE_OP: 25 30
>90210; ZPCRE_OP: 31 36
# Embedded NULs allowed in plaintext, but not in RE (although \0 as two-chars allowed)
[[ $'a\0bc\0d' =~ '^(a\0.)(.+)$' ]]
print "${#MATCH}; ${#match[1]}; ${#match[2]}"
0:ensure ASCII NUL passes in and out of matched plaintext
>6; 3; 3
# Ensure the long-form infix operator works
[[ foo -pcre-match ^f..$ ]]
print $?
[[ foo -pcre-match ^g..$ ]]
print $?
[[ ! foo -pcre-match ^g..$ ]]
print $?
0:infix -pcre-match works
>0
>1
>0
# Bash mode; note zsh documents that variables not updated on match failure,
# which remains different from bash
setopt bash_rematch
[[ "goo" -pcre-match ^f.+$ ]] ; print $?
[[ "foo" -pcre-match ^f.+$ ]] ; print -l $? _${^BASH_REMATCH[@]}
[[ "foot" -pcre-match ^f([aeiou]+)(.)$ ]]; print -l $? _${^BASH_REMATCH[@]}
[[ "foo" -pcre-match ^f.+$ ]] ; print -l $? _${^BASH_REMATCH[@]}
[[ ! "goo" -pcre-match ^f.+$ ]] ; print $?
unsetopt bash_rematch
0:bash-compatibility works
>1
>0
>_foo
>0
>_foot
>_oo
>_t
>0
>_foo
>0
# Subshell because crash on failure
( setopt re_match_pcre
[[ test.txt =~ '^(.*_)?(test)' ]]
echo $match[2] )
0:regression for segmentation fault, workers/38307
>test