1
0
mirror of git://git.code.sf.net/p/zsh/code synced 2024-10-01 08:21:17 +02:00

21662: new zle widget insert-unicode-char

This commit is contained in:
Peter Stephenson 2005-08-18 16:45:28 +00:00
parent d551b0f483
commit e987416cdd
3 changed files with 323 additions and 0 deletions

@ -1,5 +1,8 @@
2005-08-18 Peter Stephenson <pws@csr.com>
* 21662: Doc/Zsh/contrib.yo, Functions/Zle/insert-unicode-char:
compose and insert a Unicode character using two-key codes.
* 21661: Src/glob.c: variable in 21655 shouldn't be static.
2005-08-18 Peter Stephenson <pws@pwstephenson.fsnet.co.uk>

@ -668,6 +668,112 @@ into the command line.
example(bindkey '^Xf' insert-files)
)
tindex(insert-unicode-char)
item(tt(insert-unicode-char))(
This function allows you to compose Unicode characters to be inserted
into the command line. The command is followed by two keys (there is
no prompt), of which the first indicates the type of accent or special
character, and the second indicates the base character. Both input
characters are always from the ASCII character set. For best results
zsh should have been built with support for multibyte characters
(configured with tt(--enable-multibyte)).
The character is converted from Unicode into the local representation and
inserted into the command line at the cursor position.
(The conversion is done within the shell, using whatever facilities
the C library provides.) With a numeric argument, the character and its
code are previewed in the status line
The function may be run outside zle in which case it prints the character
(together with a newline) to standard output. Input is still read from
keystrokes.
The set of accented characters is reasonably complete up to U+0180, the
set of special characters less so. However, it mostly gives up at that
point. Adding new Unicode characters is easy, however. Please send any
additions to tt(zsh-workers@sunsite.dk).
The codes for the first character are as follows:
startsitem()
sitem(tt(`))(
Grave accent.
)
sitem(tt('))(
Acute accent.
)
sitem(tt(d))(
Double acute accent (only supported on a few letters).
)
sitem(tt(^))(
Circumflex.
)
sitem(tt(~))(
Tilde.
)
sitem(tt("))(
Diaeresis (Umlaut).
)
sitem(tt(o))(
Circle over the base character.
)
sitem(tt(e))(
Ligatures ending in e or E: tt(e A) gives AE, tt(e o) gives oe, etc.
)
sitem(tt(j))(
Ligatures ending in j or J: ij or IJ.
)
sitem(tt(c))(
Cedilla.
)
sitem(tt(/))(
Stroke through the base character.
)
sitem(tt(-))(
Macron. (A horizonal bar over the base character.)
)
sitem(tt(u))(
Breve. (A shallow dish shape over the base character.)
)
sitem(tt(.))(
Dot above the base character
)
sitem(tt(:))(
A dot in the middle plane of the base character
)
sitem(tt(g))(
Ogonek. (A little forward facing hook at the bottom right
of the character. The "g" stands for "Ogonek" but another
mnemonic is that g has a squiggle below the line.)
)
sitem(tt(v))(
Caron. (A little v over the letter.)
)
sitem(tt(s))(
Used only as tt(s s), a german Eszett or "scharfes S" ligature.
)
sitem(tt(h))(
Icelandic (or Runic) edh (tt(h d)) or thorn (tt(h t)).
)
sitem(tt(m))(
Various mathematical characters: not (tt(m \)), multiply (tt(m *)), divide
(tt(m /)), degree (tt(m o)), +/- (tt(m +)), superscripts 1, 2, 3 (tt(m 1),
etc.), micro (tt(m u)), quarter (tt(m q)), half (tt(m h)), three quarters
(tt(m t)).
)
sitem(tt(p))(
Various punctuation and currency characters (any non-mathematical symbol
that is not part of a word): soft space (tt(p _)), inverted ! (tt(p !)),
cent (tt(p C)), pound sign (tt(p l)) (think lira, librum), currency (tt(p
$)), yen (tt(p y)), broken bar (tt(p |)), section sign (tt(p s)), lonely
diaeresis (tt(p ")), copyright sign (tt(p C)), Spanish feminine marker
(tt(p f)), left guillemet (tt(p <)), soft hyphen (tt(p h)), registered
trade mark (tt(p R)), lonely macron (tt(p -)), lonely acute (tt(p ')),
Pilcrow (paragraph) sign (tt(p p)), middle dot (tt(p :)),
lonely cedilla (tt(p c)), Spanish masculine marker (tt(p m)), right
guillemet (tt(p >)), inverted ? (tt(p ?)), Euro sign (tt(p e)).
)
endsitem()
)
tindex(narrow-to-region)
tindex(narrow-to-region-invisible)
xitem(tt(narrow-to-region [ -p) var(pre) tt(] [ -P) var(post) tt(]))

@ -0,0 +1,214 @@
# Accented characters. Inputs two keys: first the code for the accent, then
# the base character being accented. Note that all input characters are
# ASCII. For best results zsh should have been built with support for
# multibyte characters (--enable-multibyte).
#
# Outputs the character converted from Unicode into the local representation.
# (The conversion is done within the shell, using whatever facilities
# the C library provides.)
#
# When used as a zle widget, the character is inserted at the cursor
# position. With a numeric argument, preview in status line; outside zle,
# print character (and newline) to standard output.
#
# The set of accented characters is reasonably complete up to U+0180, the
# set of special characters less so. However, it mostly gives up at that
# point. Adding new Unicode characters is easy, however. Please send any
# additions to zsh-workers@sunsite.dk .
#
# Some of the accent codes are a little more obscure than others.
# Only the base character changes for upper case: A with circle is "o A".
# ` Grave
# ' Acute
# d Double acute
# ^ Circumflex
# ~ Tilde
# " Diaeresis (Umlaut)
# o Circle
# e Ligatures ending in e or E: e A gives AE, e o gives oe, etc.
# j Ligatures ending in j or J: ij or IJ
# c Cedilla
# / Stroke through character
# - Macron. (A horizonal bar over the letter.)
# u Breve. (A shallow dish shape over the letter.)
# . Dot above
# : Middle dot
# g Ogonek. (A little forward facing hook at the bottom right
# of the character. The "g" stands for "Ogonek" but another
# mnemonic is that g has a squiggle below the line.)
# v Caron. (A little v over the letter.)
# s s s = Eszett (lower case only)
# h Icelandic (or Runic) edh (h d) or thorn (h t)
# m Mathematical: not (m \), multiply (m *), divide (m /), degree (m o),
# +/- (m +), superscripts 1, 2, 3 (m 1 etc.), micro (m u), quarter (m q),
# half (m h), three quarters (m t)
# p Punctuation (and currency etc.): soft space (p _), inverted ! (p !),
# cent (p C), pound sign (p l) (think lira, librum), currency (p $),
# yen (p y), broken bar (p |), section (p s), lonely diaeresis (p "),
# copyright (p C), Spanish feminine marker (p f), left guillemet (p
# <), soft hyphen (p h), registered trade mark (p R), lonely macron (p
# -), lonely acute (p '), Pilcrow (paragraph) (p p), middle dot (p :),
# lonely cedilla (p c), Spanish masculine marker (p m), right
# guillemet (p >), inverted ? (p ?), Euro sign (p e).
#
emulate -LR zsh
setopt cbases extendedglob printeightbit
local accent basechar ochar error
if [[ -n $WIDGET ]]; then
error=(zle -M)
else
error=print
fi
if (( ${+zsh_accented_chars} == 0 )); then
# The associative array zsh_accent_chars is indexed by the
# accent. The values are sets of character / Unicode pairs for
# the character with the given accent. The Unicode value is
# a hex index with no base discriminator; essentially a UCS-4 index
# with the leading zeroes suppressed.
typeset -gA zsh_accented_chars
# grave
accent=\`
zsh_accented_chars[$accent]="\
A C0 E C8 I CC O D2 U D9 a E0 e E8 i EC o F2 u F9 N 1F8 n 1F9 \
"
# acute
accent=\'
zsh_accented_chars[$accent]="\
A C1 E C9 I CD O D3 U DA Y DD a E1 e E9 i EC o F3 u FA y FD C 106 c 107 \
L 139 l 13A N 143 n 144 R 154 r 155 S 15A s 15B Z 179 z 17A \
"
# double acute
accent=d
zsh_accented_chars[$accent]="\
O 150 o 151 U 170 u 171\
"
# circumflex
accent=\^
zsh_accented_chars[$accent]="\
A C2 E CA I CE O D4 U DB a E2 e EA i EE o F4 u FB C 108 c 109 G 11C g 11d \
H 124 h 125 J 134 j 135 S 15C s 15D W 174 w 175 Y 176 y 177 \
"
# tilde
accent=\~
zsh_accented_chars[$accent]="\
A C3 E CB N D1 O D5 a E3 n F1 o F5 I 128 i 129 U 168 u 169 \
"
# diaeresis / Umlaut
accent=\"
zsh_accented_chars[$accent]="\
A C4 I CF O D6 U DC a E4 e EB i EF o F6 u FC y FF Y 178 \
"
# ring above
accent=o
zsh_accented_chars[$accent]="\
A C5 a E5 U 16E u 16F \
"
# ligature with e or E
accent=e
zsh_accented_chars[$accent]="\
A C6 a E6 O 152 o 153 \
"
# ligature with j or J
accent=j
zsh_accented_chars[$accent]="\
I 132 i 133\
"
# cedilla
accent=c
zsh_accented_chars[$accent]="\
C C7 c E7 G 122 g 123 K 136 k 137 L 13B l 13C N 145 n 146 R 156 r 157 \
S 15E s 15F T 162 t 163 \
"
# stroke through
accent=/
zsh_accented_chars[$accent]="\
O D8 o F8 D 110 d 111 H 126 h 127 L 141 l 142 T 166 t 167 b 180 \
"
# macron
accent=-
zsh_accented_chars[$accent]="\
A 100 a 101 E 112 e 113 I 12a i 12b O 14C o 14D U 16A u 16B \
"
# breve
accent=u
zsh_accented_chars[$accent]="\
A 102 a 103 E 114 e 115 G 11E g 11F I 12C i 12D O 14E o 14F U 16C u 16D \
"
# dot above
accent=.
zsh_accented_chars[$accent]="\
C 10A c 10b E 116 e 117 G 120 g 121 I 130 i 131 Z 17B z 17C \
"
# middle dot
accent=:
zsh_accented_chars[$accent]="\
L 13F l 140 \
"
# ogonek
accent=g
zsh_accented_chars[$accent]="\
A 104 a 105 E 118 e 119 I 12E i 12F U 172 u 173 \
"
# caron
accent=v
zsh_accented_chars[$accent]="\
C 10C c 10D D 10E d 10F E 11A e 11B L 13D l 13E N 147 n 148 R 158 r 159 \
S 160 s 161 T 164 t 165 Z 17D z 17E \
"
# eszett
accent=s
zsh_accented_chars[$accent]="\
s DF \
"
# edh or thorn
accent=h
zsh_accented_chars[$accent]="\
D D0 d F0 t FE \
"
# mathematical
accent=m
zsh_accented_chars[$accent]="\
\\ AC o B0 * D7 / F7 + B1 2 B2 3 B3 u B5 1 B9 q BC h BD t BE\
"
# punctuation and currency
accent=p
zsh_accented_chars[$accent]="\
_ A0 ! A1 C A2 l A3 $ A4 y A5 | A6 s A7 \" A8 C A9 f AA < AB \
h AD R AE - AF ' B4 p B6 : B7 c B8 m BA > BB ? BF e 20AC \
"
fi
read -k accent || return 1
if [[ -z $zsh_accented_chars[$accent] ]]; then
$error "No accented characters with accent: $accent"
return 1
fi
local -A charmap
charmap=(${=zsh_accented_chars[$accent]})
read -k basechar
if [[ -z $charmap[$basechar] ]]; then
$error "Accent $accent not available with character $basechar"
return 1
fi
if [[ -z $WIDGET ]]; then
[[ -t 1 ]] && print
print "\U${(l.8..0.)charmap[$basechar]}"
else
ochar="$(print -n "\U${(l.8..0.)charmap[$basechar]}")"
if (( ${+NUMERIC} )); then
$error "Character ${(l.8..0.)charmap[$basechar]}: $ochar"
else
LBUFFER+=$ochar
fi
fi