zsh/Functions/Zle/match-words-by-style

# Match words by the style given below.  The matching depends on the
# cursor position.  The matched_words array is set to the matched portions
# separately.  These look like:
#    <stuff-at-start> <word-before-cursor> <whitespace-before-cursor>
#    <whitespace-after-cursor> <word-after-cursor> <whitespace-after-word>
#    <stuff-at-end>
# where the cursor position is always after the third item and `after'
# is to be interpreted as `after or on'.  Some
# of the array elements will be empty; this depends on the style.
# For example
#    foo bar  rod stick
#            ^
# with the cursor where indicated will with typical settings produce the
# elements `foo ', `bar', ` ', ` ', `rod', ` ' and `stick'.
#
# The style word-style can be set to indicate what a word is.
# The three possibilities are:
#
#  shell	Words are shell words, i.e. elements of a command line.
#  whitespace	Words are space delimited words; only space or tab characters
#               are considered to terminated a word.
#  normal       (the default): the usual zle logic is applied, with all
#		alphanumeric characters plus any characters in $WORDCHARS
#		considered parts of a word.  The style word-chars overrides
#		the parameter.  (Any currently undefined value will be
#		treated as `normal', but this should not be relied upon.)
#  specified    Similar to normal, except that only the words given
#               in the string (and not also alphanumeric characters)
#               are to be considered parts of words.
#  unspecified  The negation of `specified': the characters given
#               are those that aren't to be considered parts of a word.
#               They should probably include white space.
#
# In the case of the `normal' or `(un)specified', more control on the
# behaviour can be obtained by setting the style `word-chars' for the
# current context.  The value is used to override $WORDCHARS locally.
# Hence,
#   zstyle ':zle:transpose-words*' word-style normal
#   zstyle ':zle:transpose-words*' word-chars ''
# will force bash-style word recognition, i.e only alphanumeric characters
# are considered parts of a word.  It is up to the function which calls
# match-words-by-style to set the context in the variable curcontext,
# else a default context will be used (not recommended).
#
# You can override the use of word-chars with the style word-class.
# This specifies the same information, but as a character class.
# The surrounding square brackets shouldn't be given, but anything
# which can appear inside is allowed.  For example,
#   zstyle ':zle:*' word-class '-:[:alnum:]'
# is valid.  Note the usual care with `]' , `^' and `-' must be taken if
# they need to appear as individual characters rather than for grouping.
#
# The final style is `skip-chars'.  This is an integer; that many
# characters counting the one under the cursor will be treated as
# whitespace regardless and added to the front of the fourth element of
# matched_words.  The default is zero, i.e. the character under the cursor
# will appear in <whitespace-after-cursor> if it is whitespace, else in
# <word-after-cursor>.  This style is mostly useful for forcing
# transposition to ignore the current character.
#
# The values of the styles can be overridden by options to the function:
#  -w <word-style>
#  -s <skip-chars>
#  -c <word-class>
#  -C <word-chars>

emulate -L zsh
setopt extendedglob

local wordstyle spacepat wordpat1 wordpat2 opt charskip wordchars wordclass
local match mbegin mend pat1 pat2 word1 word2 ws1 ws2 ws3 skip
local nwords MATCH MBEGIN MEND

if [[ -z $curcontext ]]; then
    local curcontext=:zle:match-words-by-style
fi

while getopts "w:s:c:C:tT" opt; do
  case $opt in
    (w)
    wordstyle=$OPTARG
    ;;

    (s)
    skip=$OPTARG
    ;;

    (c)
    wordclass=$OPTARG
    ;;

    (C)
    wordchars=$OPTARG
    ;;

    (*)
    return 1
    ;;
  esac
done

[[ -z $wordstyle ]] && zstyle -s $curcontext word-style wordstyle
[[ -z $skip ]] && zstyle -s $curcontext skip-chars skip
[[ -z $skip ]] && skip=0

case $wordstyle in
  (shell) local bufwords
	  # This splits the line into words as the shell understands them.
	  bufwords=(${(z)LBUFFER})
	  nwords=${#bufwords}
	  # Work around bug: if stripping quotes failed, a bogus
	  # space is appended.  Not a good test, since this may
	  # be a quoted space, but it's hard to get right.
	  wordpat1=${bufwords[-1]}
	  if [[ ${wordpat1[-1]} = ' ' ]]; then
	    wordpat1=${(q)wordpat1[1,-2]}
	  else
	    wordpat1="${(q)wordpat1}"
	  fi

	  # Take substring of RBUFFER to skip over $skip characters
	  # from the cursor position.
	  bufwords=(${(z)RBUFFER[1+$skip,-1]})
	  # Work around bug again.
	  wordpat2=${bufwords[1]}
	  if [[ ${wordpat2[-1]} = ' ' ]]
	  then
	    wordpat2=${(q)wordpat2[1,-2]}
	  else
	    wordpat2="${(q)wordpat2}"
	  fi
	  spacepat='[[:space:]]#'

	  # Assume the words are at the top level, i.e. if we are inside
	  # 'something with spaces' then we need to ignore the embedded
	  # spaces and consider the whole word.
	  bufwords=(${(z)BUFFER})
	  if (( ${#bufwords[$nwords]} > ${#wordpat1} )); then
	    # Yes, we're in the middle of a shell word.
	    # Find out what's in front.
	    eval pat1='${LBUFFER%%(#b)('${wordpat1}')('${spacepat}')}'
	    # Now everything from ${#pat1}+1 is wordy
	    wordpat1=${(q)LBUFFER[${#pat1}+1,-1]}

	    # Likewise at the end...
	    eval pat2='${RBUFFER##(#b)('${charskip}${spacepat}')('\
${wordpat2}')('${spacepat}')}'
	    wordpat2=${(q)RBUFFER[1,-1-${#pat2}]}
	  fi
	  ;;
  (*space) spacepat='[[:space:]]#'
           wordpat1='[^[:space:]]##'
	   wordpat2=$wordpat1
	   ;;
  (*) local wc
      # See if there is a character class.
      wc=$wordclass
      if [[ -n $wc ]] || zstyle -s $curcontext word-class wc; then
	# Treat as a character class: do minimal quoting.
	wc=${wc//(#m)[\'\"\`\$\(\)\^]/\\$MATCH}
      else
	# See if there is a local version of $WORDCHARS.
	wc=$wordchars
	if [[ -z $wc ]]; then
	  zstyle -s $curcontext word-chars wc ||
	  wc=$WORDCHARS
	fi
	if [[ $wc = (#b)(?*)-(*) ]]; then
	  # We need to bring any `-' to the front to avoid confusing
	  # character classes... we get away with `]' since in zsh
          # this isn't a pattern character if it's quoted.
	  wc=-$match[1]$match[2]
	fi
	wc="${(q)wc}"
      fi
      # Quote $wc where necessary, because we don't want those
      # characters to be considered as pattern characters later on.
      if [[ $wordstyle = *specified ]]; then
        if [[ $wordstyle != un* ]]; then
	  # The given set of characters are the word characters, nothing else
	  wordpat1="[${wc}]##"
	  # anything else is a space.
	  spacepat="[^${wc}]#"
	else
	  # The other way round.
	  wordpat1="[^${wc}]##"
	  spacepat="[${wc}]#"
    	fi
      else
        # Normal: similar, but add alphanumerics.
	wordpat1="[${wc}[:alnum:]]##"
	spacepat="[^${wc}[:alnum:]]#"
      fi
      wordpat2=$wordpat1
      ;;
esac

# The eval makes any special characters in the parameters active.
# In particular, we need the surrounding `[' s to be `real'.
# This is why we quoted the wordpats in the `shell' option, where
# they have to be treated as literal strings at this point.
match=()
eval pat1='${LBUFFER%%(#b)('${wordpat1}')('${spacepat}')}'
word1=$match[1]
ws1=$match[2]

match=()
charskip=
repeat $skip charskip+=\?

eval pat2='${RBUFFER##(#b)('${charskip}${spacepat}')('\
${wordpat2}')('${spacepat}')}'

ws2=$match[1]
word2=$match[2]
ws3=$match[3]

matched_words=("$pat1" "$word1" "$ws1" "$ws2" "$word2" "$ws3" "$pat2")
18394: New word movement and editing widgets. 2003-03-28 12:34:07 +01:00			`# Match words by the style given below. The matching depends on the`
			`# cursor position. The matched_words array is set to the matched portions`
			`# separately. These look like:`
			`# <stuff-at-start> <word-before-cursor> <whitespace-before-cursor>`
			`# <whitespace-after-cursor> <word-after-cursor> <whitespace-after-word>`
			`# <stuff-at-end>`
			# where the cursor position is always after the third item and `after'
			# is to be interpreted as `after or on'. Some
			`# of the array elements will be empty; this depends on the style.`
			`# For example`
			`# foo bar rod stick`
			`# ^`
18472: spelling corrections 2003-04-25 13:18:50 +02:00			`# with the cursor where indicated will with typical settings produce the`
18394: New word movement and editing widgets. 2003-03-28 12:34:07 +01:00			# elements `foo ', `bar', ` ', ` ', `rod', ` ' and `stick'.
			`#`
			`# The style word-style can be set to indicate what a word is.`
			`# The three possibilities are:`
			`#`
			`# shell Words are shell words, i.e. elements of a command line.`
			`# whitespace Words are space delimited words; only space or tab characters`
			`# are considered to terminated a word.`
			`# normal (the default): the usual zle logic is applied, with all`
			`# alphanumeric characters plus any characters in $WORDCHARS`
			`# considered parts of a word. The style word-chars overrides`
			`# the parameter. (Any currently undefined value will be`
			# treated as `normal', but this should not be relied upon.)
			`# specified Similar to normal, except that only the words given`
			`# in the string (and not also alphanumeric characters)`
18472: spelling corrections 2003-04-25 13:18:50 +02:00			`# are to be considered parts of words.`
18394: New word movement and editing widgets. 2003-03-28 12:34:07 +01:00			# unspecified The negation of `specified': the characters given
			`# are those that aren't to be considered parts of a word.`
			`# They should probably include white space.`
			`#`
			# In the case of the `normal' or `(un)specified', more control on the
			# behaviour can be obtained by setting the style `word-chars' for the
			`# current context. The value is used to override $WORDCHARS locally.`
			`# Hence,`
			`# zstyle ':zle:transpose-words*' word-style normal`
			`# zstyle ':zle:transpose-words*' word-chars ''`
			`# will force bash-style word recognition, i.e only alphanumeric characters`
18472: spelling corrections 2003-04-25 13:18:50 +02:00			`# are considered parts of a word. It is up to the function which calls`
18394: New word movement and editing widgets. 2003-03-28 12:34:07 +01:00			`# match-words-by-style to set the context in the variable curcontext,`
			`# else a default context will be used (not recommended).`
			`#`
			`# You can override the use of word-chars with the style word-class.`
			`# This specifies the same information, but as a character class.`
			`# The surrounding square brackets shouldn't be given, but anything`
			`# which can appear inside is allowed. For example,`
			`# zstyle ':zle:*' word-class '-:[:alnum:]'`
			# is valid. Note the usual care with `]' , `^' and `-' must be taken if
			`# they need to appear as individual characters rather than for grouping.`
			`#`
			# The final style is `skip-chars'. This is an integer; that many
			`# characters counting the one under the cursor will be treated as`
			`# whitespace regardless and added to the front of the fourth element of`
			`# matched_words. The default is zero, i.e. the character under the cursor`
			`# will appear in <whitespace-after-cursor> if it is whitespace, else in`
			`# <word-after-cursor>. This style is mostly useful for forcing`
			`# transposition to ignore the current character.`
20612: add options to match-words-by-style widget 2004-12-09 15:44:42 +01:00			`#`
			`# The values of the styles can be overridden by options to the function:`
			`# -w <word-style>`
			`# -s <skip-chars>`
			`# -c <word-class>`
			`# -C <word-chars>`
18394: New word movement and editing widgets. 2003-03-28 12:34:07 +01:00
			`emulate -L zsh`
			`setopt extendedglob`

20612: add options to match-words-by-style widget 2004-12-09 15:44:42 +01:00			`local wordstyle spacepat wordpat1 wordpat2 opt charskip wordchars wordclass`
18394: New word movement and editing widgets. 2003-03-28 12:34:07 +01:00			`local match mbegin mend pat1 pat2 word1 word2 ws1 ws2 ws3 skip`
20613: fix inconsistencies with embedded whitespace in match-words-by-style 2004-12-09 18:26:35 +01:00			`local nwords MATCH MBEGIN MEND`
18394: New word movement and editing widgets. 2003-03-28 12:34:07 +01:00
			`if [[ -z $curcontext ]]; then`
			`local curcontext=:zle:match-words-by-style`
			`fi`

20613: fix inconsistencies with embedded whitespace in match-words-by-style 2004-12-09 18:26:35 +01:00			`while getopts "w:s:c:C:tT" opt; do`
20612: add options to match-words-by-style widget 2004-12-09 15:44:42 +01:00			`case $opt in`
			`(w)`
			`wordstyle=$OPTARG`
			`;;`

			`(s)`
			`skip=$OPTARG`
			`;;`

			`(c)`
			`wordclass=$OPTARG`
			`;;`

			`(C)`
			`wordchars=$OPTARG`
			`;;`

			`(*)`
			`return 1`
			`;;`
			`esac`
			`done`

			`[[ -z $wordstyle ]] && zstyle -s $curcontext word-style wordstyle`
			`[[ -z $skip ]] && zstyle -s $curcontext skip-chars skip`
18394: New word movement and editing widgets. 2003-03-28 12:34:07 +01:00			`[[ -z $skip ]] && skip=0`

			`case $wordstyle in`
			`(shell) local bufwords`
			`# This splits the line into words as the shell understands them.`
			`bufwords=(${(z)LBUFFER})`
20613: fix inconsistencies with embedded whitespace in match-words-by-style 2004-12-09 18:26:35 +01:00			`nwords=${#bufwords}`
18394: New word movement and editing widgets. 2003-03-28 12:34:07 +01:00			`# Work around bug: if stripping quotes failed, a bogus`
			`# space is appended. Not a good test, since this may`
			`# be a quoted space, but it's hard to get right.`
			`wordpat1=${bufwords[-1]}`
			`if [[ ${wordpat1[-1]} = ' ' ]]; then`
			`wordpat1=${(q)wordpat1[1,-2]}`
			`else`
			`wordpat1="${(q)wordpat1}"`
			`fi`

			`# Take substring of RBUFFER to skip over $skip characters`
			`# from the cursor position.`
			`bufwords=(${(z)RBUFFER[1+$skip,-1]})`
			`# Work around bug again.`
			`wordpat2=${bufwords[1]}`
			`if [[ ${wordpat2[-1]} = ' ' ]]`
			`then`
			`wordpat2=${(q)wordpat2[1,-2]}`
			`else`
			`wordpat2="${(q)wordpat2}"`
			`fi`
			`spacepat='[[:space:]]#'`
20613: fix inconsistencies with embedded whitespace in match-words-by-style 2004-12-09 18:26:35 +01:00
			`# Assume the words are at the top level, i.e. if we are inside`
			`# 'something with spaces' then we need to ignore the embedded`
			`# spaces and consider the whole word.`
			`bufwords=(${(z)BUFFER})`
			`if (( ${#bufwords[$nwords]} > ${#wordpat1} )); then`
			`# Yes, we're in the middle of a shell word.`
			`# Find out what's in front.`
			`eval pat1='${LBUFFER%%(#b)('${wordpat1}')('${spacepat}')}'`
			`# Now everything from ${#pat1}+1 is wordy`
			`wordpat1=${(q)LBUFFER[${#pat1}+1,-1]}`

			`# Likewise at the end...`
			`eval pat2='${RBUFFER##(#b)('${charskip}${spacepat}')('\`
			`${wordpat2}')('${spacepat}')}'`
			`wordpat2=${(q)RBUFFER[1,-1-${#pat2}]}`
			`fi`
18394: New word movement and editing widgets. 2003-03-28 12:34:07 +01:00			`;;`
			`(*space) spacepat='[[:space:]]#'`
			`wordpat1='[^[:space:]]##'`
			`wordpat2=$wordpat1`
			`;;`
			`(*) local wc`
			`# See if there is a character class.`
20612: add options to match-words-by-style widget 2004-12-09 15:44:42 +01:00			`wc=$wordclass`
			`if [[ -n $wc ]] \|\| zstyle -s $curcontext word-class wc; then`
			`# Treat as a character class: do minimal quoting.`
			wc=${wc//(#m)[\'\"\`\$\(\)\^]/\\$MATCH}
18394: New word movement and editing widgets. 2003-03-28 12:34:07 +01:00			`else`
20612: add options to match-words-by-style widget 2004-12-09 15:44:42 +01:00			`# See if there is a local version of $WORDCHARS.`
			`wc=$wordchars`
			`if [[ -z $wc ]]; then`
18394: New word movement and editing widgets. 2003-03-28 12:34:07 +01:00			`zstyle -s $curcontext word-chars wc \|\|`
			`wc=$WORDCHARS`
20612: add options to match-words-by-style widget 2004-12-09 15:44:42 +01:00			`fi`
			`if [[ $wc = (#b)(?)-() ]]; then`
			# We need to bring any `-' to the front to avoid confusing
			# character classes... we get away with `]' since in zsh
			`# this isn't a pattern character if it's quoted.`
			`wc=-$match[1]$match[2]`
			`fi`
			`wc="${(q)wc}"`
18394: New word movement and editing widgets. 2003-03-28 12:34:07 +01:00			`fi`
			`# Quote $wc where necessary, because we don't want those`
			`# characters to be considered as pattern characters later on.`
			`if [[ $wordstyle = *specified ]]; then`
			`if [[ $wordstyle != un* ]]; then`
			`# The given set of characters are the word characters, nothing else`
			`wordpat1="[${wc}]##"`
			`# anything else is a space.`
			`spacepat="[^${wc}]#"`
			`else`
			`# The other way round.`
			`wordpat1="[^${wc}]##"`
			`spacepat="[${wc}]#"`
			`fi`
			`else`
			`# Normal: similar, but add alphanumerics.`
			`wordpat1="[${wc}[:alnum:]]##"`
			`spacepat="[^${wc}[:alnum:]]#"`
			`fi`
			`wordpat2=$wordpat1`
			`;;`
			`esac`

			`# The eval makes any special characters in the parameters active.`
			# In particular, we need the surrounding `[' s to be `real'.
			# This is why we quoted the wordpats in the `shell' option, where
			`# they have to be treated as literal strings at this point.`
			`match=()`
			`eval pat1='${LBUFFER%%(#b)('${wordpat1}')('${spacepat}')}'`
			`word1=$match[1]`
			`ws1=$match[2]`

			`match=()`
			`charskip=`
			`repeat $skip charskip+=\?`

			`eval pat2='${RBUFFER##(#b)('${charskip}${spacepat}')('\`
			`${wordpat2}')('${spacepat}')}'`

			`ws2=$match[1]`
			`word2=$match[2]`
			`ws3=$match[3]`

			`matched_words=("$pat1" "$word1" "$ws1" "$ws2" "$word2" "$ws3" "$pat2")`