zsh/Test/V07pcre.ztst

%prep

  if ! zmodload -F zsh/pcre C:pcre-match 2>/dev/null
  then
    ZTST_unimplemented="the zsh/pcre module is not available"
    return 0
  fi
# Load the rest of the builtins
  zmodload zsh/pcre
  setopt rematch_pcre
# Find a UTF-8 locale.
  setopt multibyte
# Don't let LC_* override our choice of locale.
  unset -m LC_\*
  mb_ok=
  langs=(en_{US,GB}.{UTF-,utf}8 en.UTF-8
	 $(locale -a 2>/dev/null | egrep 'utf8|UTF-8'))
  for LANG in $langs; do
    if [[ é = ? ]]; then
      mb_ok=1
      break;
    fi
  done
  if [[ -z $mb_ok ]]; then
    ZTST_unimplemented="no UTF-8 locale or multibyte mode is not implemented"
  else
    print -u $ZTST_fd Testing PCRE multibyte with locale $LANG
    mkdir multibyte.tmp && cd multibyte.tmp
  fi

%test

  [[ 'foo→bar' =~ .([^[:ascii:]]). ]]
  print $MATCH
  print $match[1]
0:Basic non-ASCII regexp matching
>o→b
>→

  unset match mend
  s=$'\u00a0'
  [[ $s =~ '^.$' ]] && print OK
  [[ A${s}B =~ .(.). && $match[1] == $s ]] && print OK
  [[ A${s}${s}B =~ A([^[:ascii:]]*)B && $mend[1] == 3 ]] && print OK
  unset s
0:Raw IMETA characters in input string
>OK
>OK
>OK

  [[ foo =~ f.+ ]] ; print $?
  [[ foo =~ x.+ ]] ; print $?
  [[ ! foo =~ f.+ ]] ; print $?
  [[ ! foo =~ x.+ ]] ; print $?
  [[ foo =~ f.+ && bar =~ b.+ ]] ; print $?
  [[ foo =~ x.+ && bar =~ b.+ ]] ; print $?
  [[ foo =~ f.+ && bar =~ x.+ ]] ; print $?
  [[ ! foo =~ f.+ && bar =~ b.+ ]] ; print $?
  [[ foo =~ f.+ && ! bar =~ b.+ ]] ; print $?
  [[ ! ( foo =~ f.+ && bar =~ b.+ ) ]] ; print $?
  [[ ! foo =~ x.+ && bar =~ b.+ ]] ; print $?
  [[ foo =~ x.+ && ! bar =~ b.+ ]] ; print $?
  [[ ! ( foo =~ x.+ && bar =~ b.+ ) ]] ; print $?
0:Regex result inversion detection
>0
>1
>1
>0
>0
>1
>1
>1
>1
>1
>0
>1
>0

# Note that PCRE_ANCHORED only means anchored at the start
# Also note that we don't unset MATCH/match on failed match (and it's an
# open issue as to whether or not we should)
  pcre_compile '.(→.)'
  pcre_match foo→bar
  print $? $MATCH $match ; unset MATCH match
  pcre_match foo.bar
  print $? $MATCH $match ; unset MATCH match
  pcre_match foo†bar
  print $? $MATCH $match ; unset MATCH match
  pcre_match foo→†ar
  print $? $MATCH $match ; unset MATCH match
  pcre_study
  pcre_match foo→bar
  print $? $MATCH $match ; unset MATCH match
  pcre_compile -a '.(→.)'
  pcre_match foo→bar
  print $? $MATCH $match ; unset MATCH match
  pcre_match o→bar
  print $? $MATCH $match ; unset MATCH match
  pcre_match o→b
  print $? $MATCH $match ; unset MATCH match
  pcre_compile 'x.(→.)'
  pcre_match xo→t
  print $? $MATCH $match ; unset MATCH match
  pcre_match Xo→t
  print $? $MATCH $match ; unset MATCH match
  pcre_compile -i 'x.(→.)'
  pcre_match xo→t
  print $? $MATCH $match ; unset MATCH match
  pcre_match Xo→t
  print $? $MATCH $match ; unset MATCH match
0:pcre_compile interface testing: basic, anchored & case-insensitive
>0 o→b →b
>1
>1
>0 o→† →†
>0 o→b →b
>1
>0 o→b →b
>0 o→b →b
>0 xo→t →t
>1
>0 xo→t →t
>0 Xo→t →t

  string="The following zip codes: 78884 90210 99513"
  pcre_compile -m "\d{5}"
  pcre_match -b -- $string && print "$MATCH; ZPCRE_OP: $ZPCRE_OP"
  pcre_match -b -n $ZPCRE_OP[(w)2] -- $string || print failed
  print "$MATCH; ZPCRE_OP: $ZPCRE_OP"
0:pcre_match -b and pcre_match -n
>78884; ZPCRE_OP: 25 30
>90210; ZPCRE_OP: 31 36

# Embedded NULs allowed in plaintext, but not in RE (although \0 as two-chars allowed)
  [[ $'a\0bc\0d' =~ '^(a\0.)(.+)$' ]]
  print "${#MATCH}; ${#match[1]}; ${#match[2]}"
0:ensure ASCII NUL passes in and out of matched plaintext
>6; 3; 3

# Ensure the long-form infix operator works
  [[ foo -pcre-match ^f..$ ]]
  print $?
  [[ foo -pcre-match ^g..$ ]]
  print $?
  [[ ! foo -pcre-match ^g..$ ]]
  print $?
0:infix -pcre-match works
>0
>1
>0

# Bash mode; note zsh documents that variables not updated on match failure,
# which remains different from bash
  setopt bash_rematch
  [[ "goo" -pcre-match ^f.+$ ]] ; print $?
  [[ "foo" -pcre-match ^f.+$ ]] ; print -l $? _${^BASH_REMATCH[@]}
  [[ "foot" -pcre-match ^f([aeiou]+)(.)$ ]]; print -l $? _${^BASH_REMATCH[@]}
  [[ "foo" -pcre-match ^f.+$ ]] ; print -l $? _${^BASH_REMATCH[@]}
  [[ ! "goo" -pcre-match ^f.+$ ]] ; print $?
  unsetopt bash_rematch
0:bash-compatibility works
>1
>0
>_foo
>0
>_foot
>_oo
>_t
>0
>_foo
>0

# Subshell because crash on failure
  ( setopt re_match_pcre
    [[ test.txt =~ '^(.*_)?(test)' ]]
    echo $match[2] )
0:regression for segmentation fault, workers/38307
>test