mirror of
git://git.code.sf.net/p/zsh/code
synced 2024-09-28 15:01:21 +02:00
22557: turn on multibyte option by default
This commit is contained in:
parent
b9bf52d1fc
commit
09bc7ee2b5
@ -1,5 +1,10 @@
|
||||
2006-07-25 Peter Stephenson <pws@csr.com>
|
||||
|
||||
* 22557: Doc/Zsh/options.yo, Misc/globtests, Src/options.c,
|
||||
Src/pattern.c, Test/D02glob.ztst, Test/D07multibyte.ztst:
|
||||
Turn on multibyte option by default for MULTIBYTE_SUPPORT and fix
|
||||
tests and patterns.
|
||||
|
||||
* unposted: Src/pattern.c, Src/utils.c: minor typos in
|
||||
22556 found when MULTIBYTE_SUPPORT is not defined.
|
||||
|
||||
|
@ -411,19 +411,31 @@ item(tt(MARK_DIRS) (tt(-8), ksh: tt(-X)))(
|
||||
Append a trailing `tt(/)' to all directory
|
||||
names resulting from filename generation (globbing).
|
||||
)
|
||||
pindex(MULTIBYTE)
|
||||
pindex(MULTIBYTE <D>)
|
||||
cindex(characters, multibyte, in expansion and globbing)
|
||||
cindex(multibyte characters, in expansion and globbing)
|
||||
item(tt(MULTIBYTE))(
|
||||
Respect multibyte characters when found during pattern matching.
|
||||
When this option is set, characters strings are examined using the
|
||||
Respect multibyte characters when found in strings.
|
||||
When this option is set, strings are examined using the
|
||||
system library to determine how many bytes form a character, depending
|
||||
on the current locale. If the option is unset
|
||||
(or the shell was not compiled with the configuration option
|
||||
tt(MULTIBYTE_SUPPORT)) a single byte is always treated as a single
|
||||
character. The option will eventually be extended to cover expansion.
|
||||
Note, however, that it does not affect the shellʼs editor, which always
|
||||
uses the locale to determine multibyte characters.
|
||||
on the current locale. This affects the way characters are counted in
|
||||
pattern matching, parameter values and various delimiters.
|
||||
|
||||
The option is on by default if the shell was compiled with
|
||||
tt(MULTIBYTE_SUPPORT); otherwise it is off by default and has no effect if
|
||||
turned on.
|
||||
|
||||
If the option is off a single byte is always treated as a single
|
||||
character. This setting is designed purely for examining strings
|
||||
known to contain raw bytes or other values that may not be characters
|
||||
in the current locale. It is not necessary to unset the option merely
|
||||
because the character set for the current locale does not contain multibyte
|
||||
characters.
|
||||
|
||||
The option does not affect the shell's editor, which always uses the
|
||||
locale to determine multibyte characters. This is because
|
||||
the character set displayed by the terminal emulator is independent of
|
||||
shell settings.
|
||||
)
|
||||
pindex(NOMATCH)
|
||||
cindex(globbing, no matches)
|
||||
|
@ -182,6 +182,5 @@ f atest/path *((#s)|/)test((#e)|/)*
|
||||
f path/testy *((#s)|/)test((#e)|/)*
|
||||
f path/testy/ohyes *((#s)|/)test((#e)|/)*
|
||||
f path/atest/ohyes *((#s)|/)test((#e)|/)*
|
||||
t bj<62>n *[裝<>トヨ]*
|
||||
EOT
|
||||
print "$failed tests failed."
|
||||
|
@ -166,7 +166,13 @@ static struct optname optns[] = {
|
||||
{{NULL, "markdirs", 0}, MARKDIRS},
|
||||
{{NULL, "menucomplete", 0}, MENUCOMPLETE},
|
||||
{{NULL, "monitor", OPT_SPECIAL}, MONITOR},
|
||||
{{NULL, "multibyte", 0/*TBD*/}, MULTIBYTE},
|
||||
{{NULL, "multibyte",
|
||||
#ifdef MULTIBYTE_SUPPORT
|
||||
OPT_ALL
|
||||
#else
|
||||
0
|
||||
#endif
|
||||
}, MULTIBYTE},
|
||||
{{NULL, "multios", OPT_EMULATE|OPT_ZSH}, MULTIOS},
|
||||
{{NULL, "nomatch", OPT_EMULATE|OPT_NONBOURNE},NOMATCH},
|
||||
{{NULL, "notify", OPT_ZSH}, NOTIFY},
|
||||
|
@ -343,7 +343,7 @@ metacharinc(char **x)
|
||||
/* Error. Treat as single byte. */
|
||||
/* Reset the shift state for next time. */
|
||||
memset(&shiftstate, 0, sizeof(shiftstate));
|
||||
return (wchar_t) *(*x)++;
|
||||
return (wchar_t) STOUC(*(*x)++);
|
||||
}
|
||||
|
||||
#else
|
||||
@ -595,7 +595,7 @@ patcompile(char *exp, int inflags, char **endexp)
|
||||
while (oplen--) {
|
||||
if (imeta(*opnd)) {
|
||||
*dst++ = Meta;
|
||||
*dst++ = *opnd ^ 32;
|
||||
*dst++ = *opnd++ ^ 32;
|
||||
} else {
|
||||
*dst++ = *opnd++;
|
||||
}
|
||||
|
@ -6,7 +6,9 @@
|
||||
mkdir glob.tmp/dir3/subdir
|
||||
: >glob.tmp/{,{dir1,dir2}/}{a,b,c}
|
||||
|
||||
globtest () { $ZTST_testdir/../Src/zsh -f $ZTST_srcdir/../Misc/$1 }
|
||||
globtest () {
|
||||
$ZTST_testdir/../Src/zsh -f $ZTST_srcdir/../Misc/$1
|
||||
}
|
||||
|
||||
regress_absolute_path_and_core_dump() {
|
||||
local absolute_dir=$(cd glob.tmp && pwd -P)
|
||||
@ -175,7 +177,6 @@
|
||||
>1: [[ path/testy = *((#s)|/)test((#e)|/)* ]]
|
||||
>1: [[ path/testy/ohyes = *((#s)|/)test((#e)|/)* ]]
|
||||
>1: [[ path/atest/ohyes = *((#s)|/)test((#e)|/)* ]]
|
||||
>0: [[ björn = *[åäöÅÄÖ]* ]]
|
||||
>0 tests failed.
|
||||
|
||||
globtest globtests.ksh
|
||||
@ -263,6 +264,10 @@
|
||||
>0: [[ Modules = (#i)*m* ]]
|
||||
>0 tests failed.
|
||||
|
||||
(unsetopt multibyte
|
||||
[[ björn = *[åäöÅÄÖ]* ]])
|
||||
0:single byte match with top bit set
|
||||
|
||||
( regress_absolute_path_and_core_dump )
|
||||
0:exclusions regression test
|
||||
>
|
||||
|
@ -176,7 +176,7 @@
|
||||
?(eval):1: command not found: hähä=3
|
||||
|
||||
foo="Ølaf«Ødd«øpénëd«ån«àpple"
|
||||
print -l ${(s.«.)foo}
|
||||
print -l ${(s.«.)foo}
|
||||
ioh="Ἐν ἀρχῇ ἦν ὁ λόγος, καὶ ὁ λόγος ἦν πρὸς τὸν θεόν, καὶ θεὸς ἦν ὁ λόγος."
|
||||
print -l ${=ioh}
|
||||
print ${(w)#ioh}
|
||||
@ -228,3 +228,39 @@
|
||||
0:read multibyte characters
|
||||
<«»ignored
|
||||
>«»
|
||||
|
||||
# See if the system grokks first-century Greek...
|
||||
ioh="Ἐν ἀρχῇ ἦν ὁ λόγος, καὶ ὁ λόγος ἦν πρὸς τὸν θεόν, καὶ θεὸς ἦν ὁ λόγος."
|
||||
for (( i = 1; i <= ${#ioh}; i++ )); do
|
||||
# FC3 doesn't recognise ῇ (U+1FC7: Greek small letter eta with
|
||||
# perispomeni and ypogegrammeni, of course) as a lower case character.
|
||||
if [[ $ioh[i] != [[:lower:]] && $i != 7 ]]; then
|
||||
for tp in upper space punct invalid; do
|
||||
if [[ $tp = invalid || $ioh[i] = [[:${tp}:]] ]]; then
|
||||
print "$i: $tp"
|
||||
break
|
||||
fi
|
||||
done
|
||||
fi
|
||||
done
|
||||
0:isw* functions on non-ASCII wide characters
|
||||
>1: upper
|
||||
>3: space
|
||||
>8: space
|
||||
>11: space
|
||||
>13: space
|
||||
>19: punct
|
||||
>20: space
|
||||
>24: space
|
||||
>26: space
|
||||
>32: space
|
||||
>35: space
|
||||
>40: space
|
||||
>44: space
|
||||
>49: punct
|
||||
>50: space
|
||||
>54: space
|
||||
>59: space
|
||||
>62: space
|
||||
>64: space
|
||||
>70: punct
|
||||
|
Loading…
Reference in New Issue
Block a user