mirror of
git://git.code.sf.net/p/zsh/code
synced 2024-09-27 14:30:46 +02:00
74aff4106a
When we went off the end of the array but measured the length implicitly, we got lucky before. After 41308 we were looking up lengths in stale memory. Rename some variables, clean up the logic, be easier to understand. Add tests.
179 lines
4.4 KiB
Plaintext
179 lines
4.4 KiB
Plaintext
%prep
|
|
|
|
if ! zmodload -F zsh/pcre C:pcre-match 2>/dev/null
|
|
then
|
|
ZTST_unimplemented="the zsh/pcre module is not available"
|
|
return 0
|
|
fi
|
|
# Load the rest of the builtins
|
|
zmodload zsh/pcre
|
|
setopt rematch_pcre
|
|
# Find a UTF-8 locale.
|
|
setopt multibyte
|
|
# Don't let LC_* override our choice of locale.
|
|
unset -m LC_\*
|
|
mb_ok=
|
|
langs=(en_{US,GB}.{UTF-,utf}8 en.UTF-8
|
|
$(locale -a 2>/dev/null | egrep 'utf8|UTF-8'))
|
|
for LANG in $langs; do
|
|
if [[ é = ? ]]; then
|
|
mb_ok=1
|
|
break;
|
|
fi
|
|
done
|
|
if [[ -z $mb_ok ]]; then
|
|
ZTST_unimplemented="no UTF-8 locale or multibyte mode is not implemented"
|
|
else
|
|
print -u $ZTST_fd Testing PCRE multibyte with locale $LANG
|
|
mkdir multibyte.tmp && cd multibyte.tmp
|
|
fi
|
|
|
|
%test
|
|
|
|
[[ 'foo→bar' =~ .([^[:ascii:]]). ]]
|
|
print $MATCH
|
|
print $match[1]
|
|
0:Basic non-ASCII regexp matching
|
|
>o→b
|
|
>→
|
|
|
|
unset match mend
|
|
s=$'\u00a0'
|
|
[[ $s =~ '^.$' ]] && print OK
|
|
[[ A${s}B =~ .(.). && $match[1] == $s ]] && print OK
|
|
[[ A${s}${s}B =~ A([^[:ascii:]]*)B && $mend[1] == 3 ]] && print OK
|
|
unset s
|
|
0:Raw IMETA characters in input string
|
|
>OK
|
|
>OK
|
|
>OK
|
|
|
|
[[ foo =~ f.+ ]] ; print $?
|
|
[[ foo =~ x.+ ]] ; print $?
|
|
[[ ! foo =~ f.+ ]] ; print $?
|
|
[[ ! foo =~ x.+ ]] ; print $?
|
|
[[ foo =~ f.+ && bar =~ b.+ ]] ; print $?
|
|
[[ foo =~ x.+ && bar =~ b.+ ]] ; print $?
|
|
[[ foo =~ f.+ && bar =~ x.+ ]] ; print $?
|
|
[[ ! foo =~ f.+ && bar =~ b.+ ]] ; print $?
|
|
[[ foo =~ f.+ && ! bar =~ b.+ ]] ; print $?
|
|
[[ ! ( foo =~ f.+ && bar =~ b.+ ) ]] ; print $?
|
|
[[ ! foo =~ x.+ && bar =~ b.+ ]] ; print $?
|
|
[[ foo =~ x.+ && ! bar =~ b.+ ]] ; print $?
|
|
[[ ! ( foo =~ x.+ && bar =~ b.+ ) ]] ; print $?
|
|
0:Regex result inversion detection
|
|
>0
|
|
>1
|
|
>1
|
|
>0
|
|
>0
|
|
>1
|
|
>1
|
|
>1
|
|
>1
|
|
>1
|
|
>0
|
|
>1
|
|
>0
|
|
|
|
# Note that PCRE_ANCHORED only means anchored at the start
|
|
# Also note that we don't unset MATCH/match on failed match (and it's an
|
|
# open issue as to whether or not we should)
|
|
pcre_compile '.(→.)'
|
|
pcre_match foo→bar
|
|
print $? $MATCH $match ; unset MATCH match
|
|
pcre_match foo.bar
|
|
print $? $MATCH $match ; unset MATCH match
|
|
pcre_match foo†bar
|
|
print $? $MATCH $match ; unset MATCH match
|
|
pcre_match foo→†ar
|
|
print $? $MATCH $match ; unset MATCH match
|
|
pcre_study
|
|
pcre_match foo→bar
|
|
print $? $MATCH $match ; unset MATCH match
|
|
pcre_compile -a '.(→.)'
|
|
pcre_match foo→bar
|
|
print $? $MATCH $match ; unset MATCH match
|
|
pcre_match o→bar
|
|
print $? $MATCH $match ; unset MATCH match
|
|
pcre_match o→b
|
|
print $? $MATCH $match ; unset MATCH match
|
|
pcre_compile 'x.(→.)'
|
|
pcre_match xo→t
|
|
print $? $MATCH $match ; unset MATCH match
|
|
pcre_match Xo→t
|
|
print $? $MATCH $match ; unset MATCH match
|
|
pcre_compile -i 'x.(→.)'
|
|
pcre_match xo→t
|
|
print $? $MATCH $match ; unset MATCH match
|
|
pcre_match Xo→t
|
|
print $? $MATCH $match ; unset MATCH match
|
|
0:pcre_compile interface testing: basic, anchored & case-insensitive
|
|
>0 o→b →b
|
|
>1
|
|
>1
|
|
>0 o→† →†
|
|
>0 o→b →b
|
|
>1
|
|
>0 o→b →b
|
|
>0 o→b →b
|
|
>0 xo→t →t
|
|
>1
|
|
>0 xo→t →t
|
|
>0 Xo→t →t
|
|
|
|
string="The following zip codes: 78884 90210 99513"
|
|
pcre_compile -m "\d{5}"
|
|
pcre_match -b -- $string && print "$MATCH; ZPCRE_OP: $ZPCRE_OP"
|
|
pcre_match -b -n $ZPCRE_OP[(w)2] -- $string || print failed
|
|
print "$MATCH; ZPCRE_OP: $ZPCRE_OP"
|
|
0:pcre_match -b and pcre_match -n
|
|
>78884; ZPCRE_OP: 25 30
|
|
>90210; ZPCRE_OP: 31 36
|
|
|
|
# Embedded NULs allowed in plaintext, but not in RE (although \0 as two-chars allowed)
|
|
[[ $'a\0bc\0d' =~ '^(a\0.)(.+)$' ]]
|
|
print "${#MATCH}; ${#match[1]}; ${#match[2]}"
|
|
0:ensure ASCII NUL passes in and out of matched plaintext
|
|
>6; 3; 3
|
|
|
|
# Ensure the long-form infix operator works
|
|
[[ foo -pcre-match ^f..$ ]]
|
|
print $?
|
|
[[ foo -pcre-match ^g..$ ]]
|
|
print $?
|
|
[[ ! foo -pcre-match ^g..$ ]]
|
|
print $?
|
|
0:infix -pcre-match works
|
|
>0
|
|
>1
|
|
>0
|
|
|
|
# Bash mode; note zsh documents that variables not updated on match failure,
|
|
# which remains different from bash
|
|
setopt bash_rematch
|
|
[[ "goo" -pcre-match ^f.+$ ]] ; print $?
|
|
[[ "foo" -pcre-match ^f.+$ ]] ; print -l $? _${^BASH_REMATCH[@]}
|
|
[[ "foot" -pcre-match ^f([aeiou]+)(.)$ ]]; print -l $? _${^BASH_REMATCH[@]}
|
|
[[ "foo" -pcre-match ^f.+$ ]] ; print -l $? _${^BASH_REMATCH[@]}
|
|
[[ ! "goo" -pcre-match ^f.+$ ]] ; print $?
|
|
unsetopt bash_rematch
|
|
0:bash-compatibility works
|
|
>1
|
|
>0
|
|
>_foo
|
|
>0
|
|
>_foot
|
|
>_oo
|
|
>_t
|
|
>0
|
|
>_foo
|
|
>0
|
|
|
|
# Subshell because crash on failure
|
|
( setopt re_match_pcre
|
|
[[ test.txt =~ '^(.*_)?(test)' ]]
|
|
echo $match[2] )
|
|
0:regression for segmentation fault, workers/38307
|
|
>test
|