21662: new zle widget insert-unicode-char

2024-10-01 08:21:17 +02:00 · 2005-08-18 16:45:28 +00:00 · 2005-08-18 16:45:28 +00:00 · e987416cdd
commit e987416cdd
parent d551b0f483
3 changed files with 323 additions and 0 deletions
--- a/3
+++ b/3
@ -1,5 +1,8 @@
 2005-08-18  Peter Stephenson  <pws@csr.com>

+	* 21662: Doc/Zsh/contrib.yo, Functions/Zle/insert-unicode-char:
+	compose and insert a Unicode character using two-key codes.
+
 	* 21661: Src/glob.c: variable in 21655 shouldn't be static.

 2005-08-18  Peter Stephenson  <pws@pwstephenson.fsnet.co.uk>
--- a/Doc/Zsh/contrib.yo
+++ b/Doc/Zsh/contrib.yo
@ -668,6 +668,112 @@ into the command line.

 example(bindkey '^Xf' insert-files)
 )
+tindex(insert-unicode-char)
+item(tt(insert-unicode-char))(
+This function allows you to compose Unicode characters to be inserted
+into the command line.  The command is followed by two keys (there is
+no prompt), of which the first indicates the type of accent or special
+character, and the second indicates the base character.  Both input
+characters are always from the ASCII character set.  For best results
+zsh should have been built with support for multibyte characters
+(configured with tt(--enable-multibyte)).
+
+The character is converted from Unicode into the local representation and
+inserted into the command line at the cursor position.
+(The conversion is done within the shell, using whatever facilities
+the C library provides.)  With a numeric argument, the character and its
+code are previewed in the status line
+
+The function may be run outside zle in which case it prints the character
+(together with a newline) to standard output.  Input is still read from
+keystrokes.
+
+The set of accented characters is reasonably complete up to U+0180, the
+set of special characters less so.  However, it mostly gives up at that
+point.  Adding new Unicode characters is easy, however.  Please send any
+additions to tt(zsh-workers@sunsite.dk).
+
+The codes for the first character are as follows:
+startsitem()
+sitem(tt(`))(
+Grave accent.
+)
+sitem(tt('))(
+Acute accent.
+)
+sitem(tt(d))(
+Double acute accent (only supported on a few letters).
+)
+sitem(tt(^))(
+Circumflex.
+)
+sitem(tt(~))(
+Tilde.
+)
+sitem(tt("))(
+Diaeresis (Umlaut).
+)
+sitem(tt(o))(
+Circle over the base character.
+)
+sitem(tt(e))(
+Ligatures ending in e or E: tt(e A) gives AE, tt(e o) gives oe, etc.
+)
+sitem(tt(j))(
+Ligatures ending in j or J: ij or IJ.
+)
+sitem(tt(c))(
+Cedilla.
+)
+sitem(tt(/))(
+Stroke through the base character.
+)
+sitem(tt(-))(
+Macron.  (A horizonal bar over the base character.)
+)
+sitem(tt(u))(
+Breve.  (A shallow dish shape over the base character.)
+)
+sitem(tt(.))(
+Dot above the base character
+)
+sitem(tt(:))(
+A dot in the middle plane of the base character
+)
+sitem(tt(g))(
+Ogonek.  (A little forward facing hook at the bottom right
+of the character.  The "g" stands for "Ogonek" but another
+mnemonic is that g has a squiggle below the line.)
+)
+sitem(tt(v))(
+Caron.  (A little v over the letter.)
+)
+sitem(tt(s))(
+Used only as tt(s s), a german Eszett or "scharfes S" ligature.
+)
+sitem(tt(h))(
+Icelandic (or Runic) edh (tt(h d)) or thorn (tt(h t)).
+)
+sitem(tt(m))(
+Various mathematical characters: not (tt(m \)), multiply (tt(m *)), divide
+(tt(m /)), degree (tt(m o)), +/- (tt(m +)), superscripts 1, 2, 3 (tt(m 1),
+etc.), micro (tt(m u)), quarter (tt(m q)), half (tt(m h)), three quarters
+(tt(m t)).
+)
+sitem(tt(p))(
+Various punctuation and currency characters (any non-mathematical symbol
+that is not part of a word):  soft space (tt(p _)), inverted ! (tt(p !)),
+cent (tt(p C)), pound sign (tt(p l)) (think lira, librum), currency (tt(p
+$)), yen (tt(p y)), broken bar (tt(p |)), section sign (tt(p s)), lonely
+diaeresis (tt(p ")), copyright sign (tt(p C)), Spanish feminine marker
+(tt(p f)), left guillemet (tt(p <)), soft hyphen (tt(p h)), registered
+trade mark (tt(p R)), lonely macron (tt(p -)), lonely acute (tt(p ')),
+Pilcrow (paragraph) sign (tt(p p)), middle dot (tt(p :)),
+lonely cedilla (tt(p c)), Spanish masculine marker (tt(p m)), right
+guillemet (tt(p >)), inverted ? (tt(p ?)), Euro sign (tt(p e)).
+)
+endsitem()
+)
 tindex(narrow-to-region)
 tindex(narrow-to-region-invisible)
 xitem(tt(narrow-to-region [ -p) var(pre) tt(] [ -P) var(post) tt(]))
--- a/Functions/Zle/insert-unicode-char
+++ b/Functions/Zle/insert-unicode-char
@ -0,0 +1,214 @@
+# Accented characters.  Inputs two keys: first the code for the accent, then
+# the base character being accented.  Note that all input characters are
+# ASCII.  For best results zsh should have been built with support for
+# multibyte characters (--enable-multibyte).
+#
+# Outputs the character converted from Unicode into the local representation.
+# (The conversion is done within the shell, using whatever facilities
+# the C library provides.)
+#
+# When used as a zle widget, the character is inserted at the cursor
+# position.  With a numeric argument, preview in status line; outside zle,
+# print character (and newline) to standard output.
+#
+# The set of accented characters is reasonably complete up to U+0180, the
+# set of special characters less so.  However, it mostly gives up at that
+# point.  Adding new Unicode characters is easy, however.  Please send any
+# additions to zsh-workers@sunsite.dk .
+#
+# Some of the accent codes are a little more obscure than others.
+# Only the base character changes for upper case: A with circle is "o A".
+#  `   Grave
+#  '   Acute
+#  d   Double acute
+#  ^   Circumflex
+#  ~   Tilde
+#  "   Diaeresis (Umlaut)
+#  o   Circle
+#  e   Ligatures ending in e or E: e A gives AE, e o gives oe, etc.
+#  j   Ligatures ending in j or J: ij or IJ
+#  c   Cedilla
+#  /   Stroke through character
+#  -   Macron.  (A horizonal bar over the letter.)
+#  u   Breve.  (A shallow dish shape over the letter.)
+#  .   Dot above
+#  :   Middle dot
+#  g   Ogonek.  (A little forward facing hook at the bottom right
+#      of the character.  The "g" stands for "Ogonek" but another
+#      mnemonic is that g has a squiggle below the line.)
+#  v   Caron.  (A little v over the letter.)
+#  s   s s = Eszett (lower case only)
+#  h   Icelandic (or Runic) edh (h d) or thorn (h t)
+#  m   Mathematical: not (m \), multiply (m *), divide (m /), degree (m o),
+#      +/- (m +), superscripts 1, 2, 3 (m 1 etc.), micro (m u), quarter (m q),
+#      half (m h), three quarters (m t)
+#  p   Punctuation (and currency etc.): soft space (p _), inverted ! (p !),
+#      cent (p C), pound sign (p l) (think lira, librum), currency (p $),
+#      yen (p y), broken bar (p |), section (p s), lonely diaeresis (p "),
+#      copyright (p C), Spanish feminine marker (p f), left guillemet (p
+#      <), soft hyphen (p h), registered trade mark (p R), lonely macron (p
+#      -), lonely acute (p '), Pilcrow (paragraph) (p p), middle dot (p :),
+#      lonely cedilla (p c), Spanish masculine marker (p m), right
+#      guillemet (p >), inverted ? (p ?), Euro sign (p e).
+#
+
+emulate -LR zsh
+setopt cbases extendedglob printeightbit
+
+local accent basechar ochar error
+
+if [[ -n $WIDGET ]]; then
+  error=(zle -M)
+else
+  error=print
+fi
+
+if (( ${+zsh_accented_chars} == 0 )); then
+  # The associative array zsh_accent_chars is indexed by the
+  # accent.  The values are sets of character / Unicode pairs for
+  # the character with the given accent.  The Unicode value is
+  # a hex index with no base discriminator; essentially a UCS-4 index
+  # with the leading zeroes suppressed.
+  typeset -gA zsh_accented_chars
+
+  # grave
+  accent=\`
+  zsh_accented_chars[$accent]="\
+A C0 E C8 I CC O D2 U D9 a E0 e E8 i EC o F2 u F9 N 1F8 n 1F9 \
+"
+  # acute
+  accent=\'
+  zsh_accented_chars[$accent]="\
+A C1 E C9 I CD O D3 U DA Y DD a E1 e E9 i EC o F3 u FA y FD C 106 c 107 \
+L 139 l 13A N 143 n 144 R 154 r 155 S 15A s 15B Z 179 z 17A \
+"
+  # double acute
+  accent=d
+  zsh_accented_chars[$accent]="\
+O 150 o 151 U 170 u 171\
+"
+  # circumflex
+  accent=\^
+  zsh_accented_chars[$accent]="\
+A C2 E CA I CE O D4 U DB a E2 e EA i EE o F4 u FB C 108 c 109 G 11C g 11d \
+H 124 h 125 J 134 j 135 S 15C s 15D W 174 w 175 Y 176 y 177 \
+"
+  # tilde
+  accent=\~
+  zsh_accented_chars[$accent]="\
+A C3 E CB N D1 O D5 a E3 n F1 o F5 I 128 i 129 U 168 u 169 \
+"
+  # diaeresis / Umlaut
+  accent=\"
+  zsh_accented_chars[$accent]="\
+A C4 I CF O D6 U DC a E4 e EB i EF o F6 u FC y FF Y 178 \
+"
+  # ring above
+  accent=o
+  zsh_accented_chars[$accent]="\
+A C5 a E5 U 16E u 16F \
+"
+  # ligature with e or E
+  accent=e
+  zsh_accented_chars[$accent]="\
+A C6 a E6 O 152 o 153 \
+"
+  # ligature with j or J
+  accent=j
+  zsh_accented_chars[$accent]="\
+I 132 i 133\
+"
+  # cedilla
+  accent=c
+  zsh_accented_chars[$accent]="\
+C C7 c E7 G 122 g 123 K 136 k 137 L 13B l 13C N 145 n 146 R 156 r 157 \
+S 15E s 15F T 162 t 163 \
+"
+  # stroke through
+  accent=/
+  zsh_accented_chars[$accent]="\
+O D8 o F8 D 110 d 111 H 126 h 127 L 141 l 142 T 166 t 167 b 180 \
+"
+  # macron
+  accent=-
+  zsh_accented_chars[$accent]="\
+A 100 a 101 E 112 e 113 I 12a i 12b O 14C o 14D U 16A u 16B \
+"
+  # breve
+  accent=u
+  zsh_accented_chars[$accent]="\
+A 102 a 103 E 114 e 115 G 11E g 11F I 12C i 12D O 14E o 14F U 16C u 16D \
+"
+  # dot above
+  accent=.
+  zsh_accented_chars[$accent]="\
+C 10A c 10b E 116 e 117 G 120 g 121 I 130 i 131 Z 17B z 17C \
+"
+  # middle dot
+  accent=:
+  zsh_accented_chars[$accent]="\
+L 13F l 140 \
+"
+  # ogonek
+  accent=g
+  zsh_accented_chars[$accent]="\
+A 104 a 105 E 118 e 119 I 12E i 12F U 172 u 173 \
+"
+  # caron
+  accent=v
+  zsh_accented_chars[$accent]="\
+C 10C c 10D D 10E d 10F E 11A e 11B L 13D l 13E N 147 n 148 R 158 r 159 \
+S 160 s 161 T 164 t 165 Z 17D z 17E \
+"
+  # eszett
+  accent=s
+  zsh_accented_chars[$accent]="\
+s DF \
+"
+  # edh or thorn
+  accent=h
+  zsh_accented_chars[$accent]="\
+D D0 d F0 t FE \
+"
+  # mathematical
+  accent=m
+  zsh_accented_chars[$accent]="\
+\\ AC o B0 * D7 / F7 + B1 2 B2 3 B3 u B5 1 B9 q BC h BD t BE\
+"
+  # punctuation and currency
+  accent=p
+  zsh_accented_chars[$accent]="\
+_ A0 ! A1 C A2 l A3 $ A4 y A5 | A6 s A7 \" A8 C A9 f AA < AB \
+h AD R AE - AF ' B4 p B6 : B7 c B8 m BA > BB ? BF e 20AC \
+"
+fi
+
+read -k accent || return 1
+
+if [[ -z $zsh_accented_chars[$accent] ]]; then
+  $error "No accented characters with accent: $accent"
+  return 1
+fi
+
+local -A charmap
+charmap=(${=zsh_accented_chars[$accent]})
+
+read -k basechar
+
+if [[ -z $charmap[$basechar] ]]; then
+  $error "Accent $accent not available with character $basechar"
+  return 1
+fi
+
+if [[ -z $WIDGET ]]; then
+  [[ -t 1 ]] && print
+  print "\U${(l.8..0.)charmap[$basechar]}"
+else
+  ochar="$(print -n "\U${(l.8..0.)charmap[$basechar]}")"
+
+  if (( ${+NUMERIC} )); then
+    $error "Character ${(l.8..0.)charmap[$basechar]}: $ochar"
+  else
+    LBUFFER+=$ochar
+  fi
+fi