mirror of
git://git.code.sf.net/p/zsh/code
synced 2024-06-15 06:06:11 +02:00
23375: Phil Pennock: =~, zsh/regex etc. etc.
This commit is contained in:
parent
eb4c3d4bf2
commit
7f03c3d851
|
@ -1,3 +1,12 @@
|
||||||
|
2007-05-01 Peter Stephenson <p.w.stephenson@ntlworld.com>
|
||||||
|
|
||||||
|
* Phil Pennock + tweaks: 23375: configure.ac, Doc/Makefile.in,
|
||||||
|
Doc/Zsh/cond.yo, Doc/Zsh/mod_pcre.yo, Doc/Zsh/options.yo,
|
||||||
|
Src/cond.c, Src/options.c, Src/parse.c, Src/text.c, Src/zsh.h,
|
||||||
|
Src/Modules/pcre.c, Src/Modules/regex.c, Src/Modules/regex.mdd:
|
||||||
|
Add zsh/regex option, =~ syntax and related options
|
||||||
|
BASH_REMATCH, NO_CASE_MATCH, RE_MATCH_PCRE.
|
||||||
|
|
||||||
2007-05-01 Peter Stephenson <pws@csr.com>
|
2007-05-01 Peter Stephenson <pws@csr.com>
|
||||||
|
|
||||||
* unposted: Doc/Zsh/contrib.yo: typo in widget name.
|
* unposted: Doc/Zsh/contrib.yo: typo in widget name.
|
||||||
|
|
|
@ -61,7 +61,7 @@ Zsh/mod_computil.yo \
|
||||||
Zsh/mod_datetime.yo Zsh/mod_deltochar.yo \
|
Zsh/mod_datetime.yo Zsh/mod_deltochar.yo \
|
||||||
Zsh/mod_example.yo Zsh/mod_files.yo \
|
Zsh/mod_example.yo Zsh/mod_files.yo \
|
||||||
Zsh/mod_mapfile.yo Zsh/mod_mathfunc.yo Zsh/mod_newuser.yo \
|
Zsh/mod_mapfile.yo Zsh/mod_mathfunc.yo Zsh/mod_newuser.yo \
|
||||||
Zsh/mod_parameter.yo Zsh/mod_pcre.yo \
|
Zsh/mod_parameter.yo Zsh/mod_pcre.yo Zsh/mod_regex.yo \
|
||||||
Zsh/mod_sched.yo Zsh/mod_socket.yo \
|
Zsh/mod_sched.yo Zsh/mod_socket.yo \
|
||||||
Zsh/mod_stat.yo Zsh/mod_system.yo Zsh/mod_tcp.yo \
|
Zsh/mod_stat.yo Zsh/mod_system.yo Zsh/mod_tcp.yo \
|
||||||
Zsh/mod_termcap.yo Zsh/mod_terminfo.yo \
|
Zsh/mod_termcap.yo Zsh/mod_terminfo.yo \
|
||||||
|
|
|
@ -109,6 +109,20 @@ backward compatibility and should be considered obsolete.
|
||||||
item(var(string) tt(!=) var(pattern))(
|
item(var(string) tt(!=) var(pattern))(
|
||||||
true if var(string) does not match var(pattern).
|
true if var(string) does not match var(pattern).
|
||||||
)
|
)
|
||||||
|
item(var(string) tt(=~) var(regexp))(
|
||||||
|
true if var(string) matches the regular expression
|
||||||
|
var(regexp). If the option tt(RE_MATCH_PCRE) is set
|
||||||
|
var(regexp) is tested as a PCRE regular expression using
|
||||||
|
the tt(zsh/pcre) module, else it is tested as a POSIX
|
||||||
|
regular expression using the tt(zsh/regex) module.
|
||||||
|
If the option tt(BASH_REMATCH) is set the array
|
||||||
|
tt(BASH_REMATCH) is set to the substring that matched the pattern
|
||||||
|
followed by the substrings that matched parenthesised
|
||||||
|
subexpressions within the pattern; otherwise, the scalar parameter
|
||||||
|
tt(MATCH) is set to the substring that matched the pattern and
|
||||||
|
and the array tt(match) to the substrings that matched parenthesised
|
||||||
|
subexpressions.
|
||||||
|
)
|
||||||
item(var(string1) tt(<) var(string2))(
|
item(var(string1) tt(<) var(string2))(
|
||||||
true if var(string1) comes before var(string2)
|
true if var(string1) comes before var(string2)
|
||||||
based on ASCII value of their characters.
|
based on ASCII value of their characters.
|
||||||
|
|
|
@ -22,14 +22,17 @@ Studies the previously-compiled PCRE which may result in faster
|
||||||
matching.
|
matching.
|
||||||
)
|
)
|
||||||
findex(pcre_match)
|
findex(pcre_match)
|
||||||
item(tt(pcre_match) [ tt(-a) var(arr) ] var(string))(
|
item(tt(pcre_match) [ tt(-v) var(var) ] [ tt(-a) var(arr) ] var(string))(
|
||||||
Returns successfully if tt(string) matches the previously-compiled
|
Returns successfully if tt(string) matches the previously-compiled
|
||||||
PCRE.
|
PCRE.
|
||||||
|
|
||||||
If the expression captures substrings within parentheses,
|
If the expression captures substrings within parentheses,
|
||||||
tt(pcre_match) will set the array var($match) to those
|
tt(pcre_match) will set the array var($match) to those
|
||||||
substrings, unless the tt(-a) option is given, in which
|
substrings, unless the tt(-a) option is given, in which
|
||||||
case it will set the array var(arr).
|
case it will set the array var(arr). Similarly, the variable
|
||||||
|
var(MATCH) will be set to the entire matched portion of the
|
||||||
|
string, unless the tt(-v) option is given, in which case the variable
|
||||||
|
var(var) will be set.
|
||||||
)
|
)
|
||||||
enditem()
|
enditem()
|
||||||
|
|
||||||
|
|
|
@ -319,6 +319,13 @@ will cause case-insensitive matching. For example, tt(cvs+LPAR()/+RPAR())
|
||||||
can match the directory tt(CVS) owing to the presence of the globbing flag
|
can match the directory tt(CVS) owing to the presence of the globbing flag
|
||||||
(unless the option tt(BARE_GLOB_QUAL) is unset).
|
(unless the option tt(BARE_GLOB_QUAL) is unset).
|
||||||
)
|
)
|
||||||
|
pindex(CASE_MATCH)
|
||||||
|
cindex(case-insensitive regular expression matches, option)
|
||||||
|
cindex(regular expressions, case-insensitive matching, option)
|
||||||
|
item(tt(CASE_MATCH) <D>)(
|
||||||
|
Make regular expressions using the tt(zsh/regex) module (including
|
||||||
|
matches with tt(=~)) sensitive to case.
|
||||||
|
)
|
||||||
pindex(CSH_NULL_GLOB)
|
pindex(CSH_NULL_GLOB)
|
||||||
cindex(csh, null globbing style)
|
cindex(csh, null globbing style)
|
||||||
cindex(null globbing style, csh)
|
cindex(null globbing style, csh)
|
||||||
|
@ -478,6 +485,15 @@ var(xx) is set to tt(LPAR())var(a b c)tt(RPAR()), are substituted with
|
||||||
`var(fooabar foobbar foocbar)' instead of the default
|
`var(fooabar foobbar foocbar)' instead of the default
|
||||||
`var(fooa b cbar)'.
|
`var(fooa b cbar)'.
|
||||||
)
|
)
|
||||||
|
pindex(REMATCH_PCRE)
|
||||||
|
cindex(regexp, PCRE)
|
||||||
|
cindex(PCRE, regexp)
|
||||||
|
item(tt(REMATCH_PCRE) <Z>)(
|
||||||
|
If set, regular expression matching with the tt(=~) operator will use
|
||||||
|
Perl-Compatible Regular Expressions from the PCRE library, if available.
|
||||||
|
If not set, regular expressions will use the extended regexp syntax
|
||||||
|
provided by the system libraries.
|
||||||
|
)
|
||||||
pindex(SH_GLOB)
|
pindex(SH_GLOB)
|
||||||
cindex(sh, globbing style)
|
cindex(sh, globbing style)
|
||||||
cindex(globbing style, sh)
|
cindex(globbing style, sh)
|
||||||
|
@ -1131,6 +1147,20 @@ enditem()
|
||||||
|
|
||||||
subsect(Shell Emulation)
|
subsect(Shell Emulation)
|
||||||
startitem()
|
startitem()
|
||||||
|
pindex(BASH_REMATCH)
|
||||||
|
cindex(bash, BASH_REMATCH variable)
|
||||||
|
cindex(regexp, bash BASH_REMATCH variable)
|
||||||
|
item(tt(BASH_REMATCH))(
|
||||||
|
When set, matches performed with the tt(=~) operator will set the
|
||||||
|
tt(BASH_REMATCH) array variable, instead of the default tt(MATCH) and
|
||||||
|
tt(match) variables. The first element of the tt(BASH_REMATCH) array
|
||||||
|
will contain the entire matched text and subsequent elements will contain
|
||||||
|
extracted substrings. This option makes more sense when tt(KSH_ARRAYS) is
|
||||||
|
also set, so that the entire matched portion is stored at index 0 and the
|
||||||
|
first substring is at index 1. Without this option, the tt(MATCH) variable
|
||||||
|
contains the entire matched text and the tt(match) array variable contains
|
||||||
|
substrings.
|
||||||
|
)
|
||||||
pindex(BSD_ECHO)
|
pindex(BSD_ECHO)
|
||||||
cindex(echo, BSD compatible)
|
cindex(echo, BSD compatible)
|
||||||
item(tt(BSD_ECHO) <S>)(
|
item(tt(BSD_ECHO) <S>)(
|
||||||
|
|
|
@ -3,7 +3,7 @@
|
||||||
*
|
*
|
||||||
* This file is part of zsh, the Z shell.
|
* This file is part of zsh, the Z shell.
|
||||||
*
|
*
|
||||||
* Copyright (c) 2001, 2002, 2003, 2004 Clint Adams
|
* Copyright (c) 2001, 2002, 2003, 2004, 2007 Clint Adams
|
||||||
* All rights reserved.
|
* All rights reserved.
|
||||||
*
|
*
|
||||||
* Permission is hereby granted, without written agreement and without
|
* Permission is hereby granted, without written agreement and without
|
||||||
|
@ -40,6 +40,37 @@
|
||||||
static pcre *pcre_pattern;
|
static pcre *pcre_pattern;
|
||||||
static pcre_extra *pcre_hints;
|
static pcre_extra *pcre_hints;
|
||||||
|
|
||||||
|
/**/
|
||||||
|
static int
|
||||||
|
zpcre_utf8_enabled(void)
|
||||||
|
{
|
||||||
|
#if defined(MULTIBYTE_SUPPORT) && defined(HAVE_NL_LANGINFO) && defined(CODESET)
|
||||||
|
static int have_utf8_pcre = -1;
|
||||||
|
|
||||||
|
/* value can toggle based on MULTIBYTE, so don't
|
||||||
|
* be too eager with caching */
|
||||||
|
if (have_utf8_pcre < -1)
|
||||||
|
return 0;
|
||||||
|
|
||||||
|
if (!isset(MULTIBYTE))
|
||||||
|
return 0;
|
||||||
|
|
||||||
|
if ((have_utf8_pcre == -1) &&
|
||||||
|
(!strcmp(nl_langinfo(CODESET), "UTF-8"))) {
|
||||||
|
|
||||||
|
if (pcre_config(PCRE_CONFIG_UTF8, &have_utf8_pcre))
|
||||||
|
have_utf8_pcre = -2; /* erk, failed to ask */
|
||||||
|
}
|
||||||
|
|
||||||
|
if (have_utf8_pcre < 0)
|
||||||
|
return 0;
|
||||||
|
return have_utf8_pcre;
|
||||||
|
|
||||||
|
#else
|
||||||
|
return 0;
|
||||||
|
#endif
|
||||||
|
}
|
||||||
|
|
||||||
/**/
|
/**/
|
||||||
static int
|
static int
|
||||||
bin_pcre_compile(char *nam, char **args, Options ops, UNUSED(int func))
|
bin_pcre_compile(char *nam, char **args, Options ops, UNUSED(int func))
|
||||||
|
@ -52,8 +83,14 @@ bin_pcre_compile(char *nam, char **args, Options ops, UNUSED(int func))
|
||||||
if(OPT_ISSET(ops,'m')) pcre_opts |= PCRE_MULTILINE;
|
if(OPT_ISSET(ops,'m')) pcre_opts |= PCRE_MULTILINE;
|
||||||
if(OPT_ISSET(ops,'x')) pcre_opts |= PCRE_EXTENDED;
|
if(OPT_ISSET(ops,'x')) pcre_opts |= PCRE_EXTENDED;
|
||||||
|
|
||||||
|
if (zpcre_utf8_enabled())
|
||||||
|
pcre_opts |= PCRE_UTF8;
|
||||||
|
|
||||||
pcre_hints = NULL; /* Is this necessary? */
|
pcre_hints = NULL; /* Is this necessary? */
|
||||||
|
|
||||||
|
if (pcre_pattern)
|
||||||
|
pcre_free(pcre_pattern);
|
||||||
|
|
||||||
pcre_pattern = pcre_compile(*args, pcre_opts, &pcre_error, &pcre_errptr, NULL);
|
pcre_pattern = pcre_compile(*args, pcre_opts, &pcre_error, &pcre_errptr, NULL);
|
||||||
|
|
||||||
if (pcre_pattern == NULL)
|
if (pcre_pattern == NULL)
|
||||||
|
@ -100,37 +137,52 @@ bin_pcre_study(char *nam, UNUSED(char **args), UNUSED(Options ops), UNUSED(int f
|
||||||
|
|
||||||
/**/
|
/**/
|
||||||
static int
|
static int
|
||||||
zpcre_get_substrings(char *arg, int *ovec, int ret, char *receptacle)
|
zpcre_get_substrings(char *arg, int *ovec, int ret, char *matchvar, char *substravar, int matchedinarr)
|
||||||
{
|
{
|
||||||
char **captures, **matches;
|
char **captures, **match_all, **matches;
|
||||||
|
int capture_start = 1;
|
||||||
|
|
||||||
if(!pcre_get_substring_list(arg, ovec, ret, (const char ***)&captures)) {
|
if (matchedinarr)
|
||||||
|
capture_start = 0;
|
||||||
matches = zarrdup(&captures[1]); /* first one would be entire string */
|
if (matchvar == NULL)
|
||||||
if (receptacle == NULL)
|
matchvar = "MATCH";
|
||||||
setaparam("match", matches);
|
if (substravar == NULL)
|
||||||
else
|
substravar = "match";
|
||||||
setaparam(receptacle, matches);
|
|
||||||
|
|
||||||
pcre_free_substring_list((const char **)captures);
|
|
||||||
}
|
|
||||||
|
|
||||||
return 0;
|
/* captures[0] will be entire matched string, [1] first substring */
|
||||||
|
if(!pcre_get_substring_list(arg, ovec, ret, (const char ***)&captures)) {
|
||||||
|
match_all = ztrdup(captures[0]);
|
||||||
|
setsparam(matchvar, match_all);
|
||||||
|
matches = zarrdup(&captures[capture_start]);
|
||||||
|
setaparam(substravar, matches);
|
||||||
|
pcre_free_substring_list((const char **)captures);
|
||||||
|
}
|
||||||
|
|
||||||
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
/**/
|
/**/
|
||||||
static int
|
static int
|
||||||
bin_pcre_match(char *nam, char **args, Options ops, UNUSED(int func))
|
bin_pcre_match(char *nam, char **args, Options ops, UNUSED(int func))
|
||||||
{
|
{
|
||||||
int ret, capcount, *ovec, ovecsize;
|
int ret, capcount, *ovec, ovecsize, c;
|
||||||
|
char *matched_portion = NULL;
|
||||||
char *receptacle = NULL;
|
char *receptacle = NULL;
|
||||||
|
int return_value = 1;
|
||||||
|
|
||||||
|
if (pcre_pattern == NULL) {
|
||||||
|
zwarnnam(nam, "no pattern has been compiled");
|
||||||
|
return 1;
|
||||||
|
}
|
||||||
|
|
||||||
if(OPT_ISSET(ops,'a')) {
|
if(OPT_HASARG(ops,c='a')) {
|
||||||
receptacle = *args++;
|
receptacle = OPT_ARG(ops,c);
|
||||||
if(!*args) {
|
}
|
||||||
zwarnnam(nam, "not enough arguments");
|
if(OPT_HASARG(ops,c='v')) {
|
||||||
return 1;
|
matched_portion = OPT_ARG(ops,c);
|
||||||
}
|
}
|
||||||
|
if(!*args) {
|
||||||
|
zwarnnam(nam, "not enough arguments");
|
||||||
}
|
}
|
||||||
|
|
||||||
if ((ret = pcre_fullinfo(pcre_pattern, pcre_hints, PCRE_INFO_CAPTURECOUNT, &capcount)))
|
if ((ret = pcre_fullinfo(pcre_pattern, pcre_hints, PCRE_INFO_CAPTURECOUNT, &capcount)))
|
||||||
|
@ -144,18 +196,20 @@ bin_pcre_match(char *nam, char **args, Options ops, UNUSED(int func))
|
||||||
|
|
||||||
ret = pcre_exec(pcre_pattern, pcre_hints, *args, strlen(*args), 0, 0, ovec, ovecsize);
|
ret = pcre_exec(pcre_pattern, pcre_hints, *args, strlen(*args), 0, 0, ovec, ovecsize);
|
||||||
|
|
||||||
if (ret==0) return 0;
|
if (ret==0) return_value = 0;
|
||||||
else if (ret==PCRE_ERROR_NOMATCH) return 1; /* no match */
|
else if (ret==PCRE_ERROR_NOMATCH) /* no match */;
|
||||||
else if (ret>0) {
|
else if (ret>0) {
|
||||||
zpcre_get_substrings(*args, ovec, ret, receptacle);
|
zpcre_get_substrings(*args, ovec, ret, matched_portion, receptacle, 0);
|
||||||
return 0;
|
return_value = 0;
|
||||||
}
|
}
|
||||||
else {
|
else {
|
||||||
zwarnnam(nam, "error in pcre_exec");
|
zwarnnam(nam, "error in pcre_exec");
|
||||||
return 1;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
return 1;
|
if (ovec)
|
||||||
|
zfree(ovec, ovecsize*sizeof(int));
|
||||||
|
|
||||||
|
return return_value;
|
||||||
}
|
}
|
||||||
|
|
||||||
/**/
|
/**/
|
||||||
|
@ -164,33 +218,63 @@ cond_pcre_match(char **a, int id)
|
||||||
{
|
{
|
||||||
pcre *pcre_pat;
|
pcre *pcre_pat;
|
||||||
const char *pcre_err;
|
const char *pcre_err;
|
||||||
char *lhstr, *rhre;
|
char *lhstr, *rhre, *avar=NULL;
|
||||||
int r = 0, pcre_opts = 0, pcre_errptr, capcnt, *ov, ovsize;
|
int r = 0, pcre_opts = 0, pcre_errptr, capcnt, *ov, ovsize;
|
||||||
|
int return_value = 0;
|
||||||
|
|
||||||
|
if (zpcre_utf8_enabled())
|
||||||
|
pcre_opts |= PCRE_UTF8;
|
||||||
|
|
||||||
lhstr = cond_str(a,0,0);
|
lhstr = cond_str(a,0,0);
|
||||||
rhre = cond_str(a,1,0);
|
rhre = cond_str(a,1,0);
|
||||||
|
pcre_pat = ov = NULL;
|
||||||
|
|
||||||
|
if (isset(BASHREMATCH))
|
||||||
|
avar="BASH_REMATCH";
|
||||||
|
|
||||||
switch(id) {
|
switch(id) {
|
||||||
case CPCRE_PLAIN:
|
case CPCRE_PLAIN:
|
||||||
pcre_pat = pcre_compile(rhre, pcre_opts, &pcre_err, &pcre_errptr, NULL);
|
pcre_pat = pcre_compile(rhre, pcre_opts, &pcre_err, &pcre_errptr, NULL);
|
||||||
pcre_fullinfo(pcre_pat, NULL, PCRE_INFO_CAPTURECOUNT, &capcnt);
|
if (pcre_pat == NULL) {
|
||||||
ovsize = (capcnt+1)*3;
|
zwarn("failed to compile regexp /%s/: %s", rhre, pcre_err);
|
||||||
ov = zalloc(ovsize*sizeof(int));
|
break;
|
||||||
r = pcre_exec(pcre_pat, NULL, lhstr, strlen(lhstr), 0, 0, ov, ovsize);
|
}
|
||||||
if (r==0) return 1;
|
pcre_fullinfo(pcre_pat, NULL, PCRE_INFO_CAPTURECOUNT, &capcnt);
|
||||||
|
ovsize = (capcnt+1)*3;
|
||||||
|
ov = zalloc(ovsize*sizeof(int));
|
||||||
|
r = pcre_exec(pcre_pat, NULL, lhstr, strlen(lhstr), 0, 0, ov, ovsize);
|
||||||
|
/* r < 0 => error; r==0 match but not enough size in ov
|
||||||
|
* r > 0 => (r-1) substrings found; r==1 => no substrings
|
||||||
|
*/
|
||||||
|
if (r==0) {
|
||||||
|
zwarn("reportable zsh problem: pcre_exec() returned 0");
|
||||||
|
return_value = 1;
|
||||||
|
break;
|
||||||
|
}
|
||||||
else if (r==PCRE_ERROR_NOMATCH) return 0; /* no match */
|
else if (r==PCRE_ERROR_NOMATCH) return 0; /* no match */
|
||||||
|
else if (r<0) {
|
||||||
|
zwarn("pcre_exec() error: %d", r);
|
||||||
|
break;
|
||||||
|
}
|
||||||
else if (r>0) {
|
else if (r>0) {
|
||||||
zpcre_get_substrings(lhstr, ov, r, NULL);
|
zpcre_get_substrings(lhstr, ov, r, NULL, avar, isset(BASHREMATCH));
|
||||||
return 1;
|
return_value = 1;
|
||||||
|
break;
|
||||||
}
|
}
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
|
|
||||||
return 0;
|
if (pcre_pat)
|
||||||
|
pcre_free(pcre_pat);
|
||||||
|
if (ov)
|
||||||
|
zfree(ov, ovsize*sizeof(int));
|
||||||
|
|
||||||
|
return return_value;
|
||||||
}
|
}
|
||||||
|
|
||||||
static struct conddef cotab[] = {
|
static struct conddef cotab[] = {
|
||||||
CONDDEF("pcre-match", CONDF_INFIX, cond_pcre_match, 0, 0, CPCRE_PLAIN)
|
CONDDEF("pcre-match", CONDF_INFIX, cond_pcre_match, 0, 0, CPCRE_PLAIN)
|
||||||
|
/* CONDDEF can register =~ but it won't be found */
|
||||||
};
|
};
|
||||||
|
|
||||||
/**/
|
/**/
|
||||||
|
@ -206,7 +290,7 @@ static struct conddef cotab[] = {
|
||||||
static struct builtin bintab[] = {
|
static struct builtin bintab[] = {
|
||||||
BUILTIN("pcre_compile", 0, bin_pcre_compile, 1, 1, 0, "aimx", NULL),
|
BUILTIN("pcre_compile", 0, bin_pcre_compile, 1, 1, 0, "aimx", NULL),
|
||||||
BUILTIN("pcre_study", 0, bin_pcre_study, 0, 0, 0, NULL, NULL),
|
BUILTIN("pcre_study", 0, bin_pcre_study, 0, 0, 0, NULL, NULL),
|
||||||
BUILTIN("pcre_match", 0, bin_pcre_match, 1, 2, 0, "a", NULL)
|
BUILTIN("pcre_match", 0, bin_pcre_match, 1, 1, 0, "a:v:", NULL)
|
||||||
};
|
};
|
||||||
|
|
||||||
|
|
||||||
|
|
|
@ -0,0 +1,161 @@
|
||||||
|
/*
|
||||||
|
* regex.c
|
||||||
|
*
|
||||||
|
* This file is part of zsh, the Z shell.
|
||||||
|
*
|
||||||
|
* Copyright (c) 2007 Phil Pennock
|
||||||
|
* All Rights Reserved.
|
||||||
|
*
|
||||||
|
* Permission is hereby granted, without written agreement and without
|
||||||
|
* license or royalty fees, to use, copy, modify, and distribute this
|
||||||
|
* software and to distribute modified versions of this software for any
|
||||||
|
* purpose, provided that the above copyright notice and the following
|
||||||
|
* two paragraphs appear in all copies of this software.
|
||||||
|
*
|
||||||
|
* In no event shall Phil Pennock or the Zsh Development Group be liable
|
||||||
|
* to any party for direct, indirect, special, incidental, or consequential
|
||||||
|
* damages arising out of the use of this software and its documentation,
|
||||||
|
* even if Phil Pennock and the Zsh Development Group have been advised of
|
||||||
|
* the possibility of such damage.
|
||||||
|
*
|
||||||
|
* Phil Pennock and the Zsh Development Group specifically disclaim any
|
||||||
|
* warranties, including, but not limited to, the implied warranties of
|
||||||
|
* merchantability and fitness for a particular purpose. The software
|
||||||
|
* provided hereunder is on an "as is" basis, and Phil Pennock and the
|
||||||
|
* Zsh Development Group have no obligation to provide maintenance,
|
||||||
|
* support, updates, enhancements, or modifications.
|
||||||
|
*
|
||||||
|
*/
|
||||||
|
|
||||||
|
#include "regex.mdh"
|
||||||
|
#include "regex.pro"
|
||||||
|
|
||||||
|
#include <regex.h>
|
||||||
|
|
||||||
|
/* we default to a vaguely modern syntax and set of capabilities */
|
||||||
|
#define ZREGEX_EXTENDED 0
|
||||||
|
/* if you want Basic syntax, make it an alternative options */
|
||||||
|
|
||||||
|
static void
|
||||||
|
zregex_regerrwarn(int r, regex_t *re, char *msg)
|
||||||
|
{
|
||||||
|
char *errbuf;
|
||||||
|
size_t errbufsz;
|
||||||
|
|
||||||
|
errbufsz = regerror(r, re, NULL, 0);
|
||||||
|
errbuf = zalloc(errbufsz*sizeof(char));
|
||||||
|
regerror(r, re, errbuf, errbufsz);
|
||||||
|
zwarn("%s: %s", msg, errbuf);
|
||||||
|
zfree(errbuf, errbufsz);
|
||||||
|
}
|
||||||
|
|
||||||
|
/**/
|
||||||
|
static int
|
||||||
|
zcond_regex_match(char **a, int id)
|
||||||
|
{
|
||||||
|
regex_t re;
|
||||||
|
regmatch_t *m, *matches = NULL;
|
||||||
|
size_t matchessz;
|
||||||
|
char *lhstr, *rhre, *s, **arr, **x;
|
||||||
|
int r, n, return_value, rcflags, reflags, nelem, start;
|
||||||
|
|
||||||
|
lhstr = cond_str(a,0,0);
|
||||||
|
rhre = cond_str(a,1,0);
|
||||||
|
rcflags = reflags = 0;
|
||||||
|
return_value = 0; /* 1 => matched successfully */
|
||||||
|
|
||||||
|
switch(id) {
|
||||||
|
case ZREGEX_EXTENDED:
|
||||||
|
rcflags |= REG_EXTENDED;
|
||||||
|
if (!isset(CASEMATCH))
|
||||||
|
rcflags |= REG_ICASE;
|
||||||
|
r = regcomp(&re, rhre, rcflags);
|
||||||
|
if (r) {
|
||||||
|
zregex_regerrwarn(r, &re, "failed to compile regex");
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
/* re.re_nsub is number of parenthesized groups, we also need
|
||||||
|
* 1 for the 0 offset, which is the entire matched portion
|
||||||
|
*/
|
||||||
|
if (re.re_nsub < 0) {
|
||||||
|
zwarn("INTERNAL ERROR: regcomp() returned "
|
||||||
|
"negative subpattern count %d", re.re_nsub);
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
matchessz = (re.re_nsub + 1) * sizeof(regmatch_t);
|
||||||
|
matches = zalloc(matchessz);
|
||||||
|
r = regexec(&re, lhstr, re.re_nsub+1, matches, reflags);
|
||||||
|
if (r == REG_NOMATCH) /**/;
|
||||||
|
else if (r == 0) {
|
||||||
|
return_value = 1;
|
||||||
|
if (isset(BASHREMATCH)) {
|
||||||
|
start = 0;
|
||||||
|
nelem = re.re_nsub + 1;
|
||||||
|
} else {
|
||||||
|
start = 1;
|
||||||
|
nelem = re.re_nsub;
|
||||||
|
}
|
||||||
|
arr = NULL; /* bogus gcc warning of used uninitialised */
|
||||||
|
/* entire matched portion + re_nsub substrings + NULL */
|
||||||
|
if (nelem) {
|
||||||
|
arr = x = (char **) zalloc(sizeof(char *) * (nelem + 1));
|
||||||
|
for (m = matches + start, n = start; n <= re.re_nsub; ++n, ++m, ++x) {
|
||||||
|
*x = ztrduppfx(lhstr + m->rm_so, m->rm_eo - m->rm_so);
|
||||||
|
}
|
||||||
|
*x = NULL;
|
||||||
|
}
|
||||||
|
if (isset(BASHREMATCH)) {
|
||||||
|
setaparam("BASH_REMATCH", arr);
|
||||||
|
} else {
|
||||||
|
m = matches;
|
||||||
|
s = ztrduppfx(lhstr + m->rm_so, m->rm_eo - m->rm_so);
|
||||||
|
setsparam("MATCH", s);
|
||||||
|
if (nelem)
|
||||||
|
setaparam("match", arr);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
else zregex_regerrwarn(r, &re, "regex matching error");
|
||||||
|
break;
|
||||||
|
default:
|
||||||
|
DPUTS(1, "bad regex option");
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (matches)
|
||||||
|
zfree(matches, matchessz);
|
||||||
|
regfree(&re);
|
||||||
|
return return_value;
|
||||||
|
}
|
||||||
|
|
||||||
|
static struct conddef cotab[] = {
|
||||||
|
CONDDEF("regex-match", CONDF_INFIX, zcond_regex_match, 0, 0, ZREGEX_EXTENDED)
|
||||||
|
};
|
||||||
|
|
||||||
|
/**/
|
||||||
|
int
|
||||||
|
setup_(UNUSED(Module m))
|
||||||
|
{
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
/**/
|
||||||
|
int
|
||||||
|
boot_(Module m)
|
||||||
|
{
|
||||||
|
return !addconddefs(m->nam, cotab, sizeof(cotab)/sizeof(*cotab));
|
||||||
|
}
|
||||||
|
|
||||||
|
/**/
|
||||||
|
int
|
||||||
|
cleanup_(Module m)
|
||||||
|
{
|
||||||
|
deleteconddefs(m->nam, cotab, sizeof(cotab)/sizeof(*cotab));
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
/**/
|
||||||
|
int
|
||||||
|
finish_(UNUSED(Module m))
|
||||||
|
{
|
||||||
|
return 0;
|
||||||
|
}
|
|
@ -0,0 +1,10 @@
|
||||||
|
name=zsh/regex
|
||||||
|
link=`if test x$ac_cv_func_regcomp = xyes && \
|
||||||
|
test x$ac_cv_func_regexec = xyes && \
|
||||||
|
test x$ac_cv_func_regerror = xyes && \
|
||||||
|
test x$ac_cv_func_regfree = xyes; then echo dynamic; else echo no; fi`
|
||||||
|
load=no
|
||||||
|
|
||||||
|
autobins=""
|
||||||
|
|
||||||
|
objects="regex.o"
|
26
Src/cond.c
26
Src/cond.c
|
@ -34,7 +34,7 @@ int tracingcond;
|
||||||
|
|
||||||
static char *condstr[COND_MOD] = {
|
static char *condstr[COND_MOD] = {
|
||||||
"!", "&&", "||", "==", "!=", "<", ">", "-nt", "-ot", "-ef", "-eq",
|
"!", "&&", "||", "==", "!=", "<", ">", "-nt", "-ot", "-ef", "-eq",
|
||||||
"-ne", "-lt", "-gt", "-le", "-ge"
|
"-ne", "-lt", "-gt", "-le", "-ge", "=~"
|
||||||
};
|
};
|
||||||
|
|
||||||
/*
|
/*
|
||||||
|
@ -53,14 +53,14 @@ int
|
||||||
evalcond(Estate state, char *fromtest)
|
evalcond(Estate state, char *fromtest)
|
||||||
{
|
{
|
||||||
struct stat *st;
|
struct stat *st;
|
||||||
char *left, *right;
|
char *left, *right, *overridename, overridebuf[13];
|
||||||
Wordcode pcode;
|
Wordcode pcode;
|
||||||
wordcode code;
|
wordcode code;
|
||||||
int ctype, htok = 0, ret;
|
int ctype, htok = 0, ret;
|
||||||
|
|
||||||
rec:
|
rec:
|
||||||
|
|
||||||
left = right = NULL;
|
left = right = overridename = NULL;
|
||||||
pcode = state->pc++;
|
pcode = state->pc++;
|
||||||
code = *pcode;
|
code = *pcode;
|
||||||
ctype = WC_COND_TYPE(code);
|
ctype = WC_COND_TYPE(code);
|
||||||
|
@ -92,13 +92,28 @@ evalcond(Estate state, char *fromtest)
|
||||||
state->pc = pcode + (WC_COND_SKIP(code) + 1);
|
state->pc = pcode + (WC_COND_SKIP(code) + 1);
|
||||||
return ret;
|
return ret;
|
||||||
}
|
}
|
||||||
|
case COND_REGEX:
|
||||||
|
{
|
||||||
|
char *modname = isset(REMATCHPCRE) ? "zsh/pcre" : "zsh/regex";
|
||||||
|
if (!load_module_silence(modname, 1)) {
|
||||||
|
zwarnnam(fromtest, "%s not available for regex",
|
||||||
|
modname);
|
||||||
|
return 2;
|
||||||
|
}
|
||||||
|
sprintf(overridename = overridebuf, "-%s-match", modname+4);
|
||||||
|
ctype = COND_MODI;
|
||||||
|
}
|
||||||
|
/*FALLTHROUGH*/
|
||||||
case COND_MOD:
|
case COND_MOD:
|
||||||
case COND_MODI:
|
case COND_MODI:
|
||||||
{
|
{
|
||||||
Conddef cd;
|
Conddef cd;
|
||||||
char *name = ecgetstr(state, EC_NODUP, NULL), **strs;
|
char *name = overridename;
|
||||||
|
char **strs;
|
||||||
int l = WC_COND_SKIP(code);
|
int l = WC_COND_SKIP(code);
|
||||||
|
|
||||||
|
if (name == NULL)
|
||||||
|
name = ecgetstr(state, EC_NODUP, NULL);
|
||||||
if (ctype == COND_MOD)
|
if (ctype == COND_MOD)
|
||||||
strs = ecgetarr(state, l, EC_DUP, NULL);
|
strs = ecgetarr(state, l, EC_DUP, NULL);
|
||||||
else {
|
else {
|
||||||
|
@ -139,7 +154,8 @@ evalcond(Estate state, char *fromtest)
|
||||||
return !cd->handler(strs, cd->condid);
|
return !cd->handler(strs, cd->condid);
|
||||||
} else {
|
} else {
|
||||||
zwarnnam(fromtest,
|
zwarnnam(fromtest,
|
||||||
"unrecognized condition: `%s'", name);
|
"unrecognized condition: `%s'",
|
||||||
|
name ? name : "<null>");
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
/* module not found, error */
|
/* module not found, error */
|
||||||
|
|
|
@ -88,11 +88,13 @@ static struct optname optns[] = {
|
||||||
{{NULL, "banghist", OPT_NONBOURNE}, BANGHIST},
|
{{NULL, "banghist", OPT_NONBOURNE}, BANGHIST},
|
||||||
{{NULL, "bareglobqual", OPT_EMULATE|OPT_ZSH}, BAREGLOBQUAL},
|
{{NULL, "bareglobqual", OPT_EMULATE|OPT_ZSH}, BAREGLOBQUAL},
|
||||||
{{NULL, "bashautolist", 0}, BASHAUTOLIST},
|
{{NULL, "bashautolist", 0}, BASHAUTOLIST},
|
||||||
|
{{NULL, "bashrematch", 0}, BASHREMATCH},
|
||||||
{{NULL, "beep", OPT_ALL}, BEEP},
|
{{NULL, "beep", OPT_ALL}, BEEP},
|
||||||
{{NULL, "bgnice", OPT_EMULATE|OPT_NONBOURNE},BGNICE},
|
{{NULL, "bgnice", OPT_EMULATE|OPT_NONBOURNE},BGNICE},
|
||||||
{{NULL, "braceccl", OPT_EMULATE}, BRACECCL},
|
{{NULL, "braceccl", OPT_EMULATE}, BRACECCL},
|
||||||
{{NULL, "bsdecho", OPT_EMULATE|OPT_SH}, BSDECHO},
|
{{NULL, "bsdecho", OPT_EMULATE|OPT_SH}, BSDECHO},
|
||||||
{{NULL, "caseglob", OPT_ALL}, CASEGLOB},
|
{{NULL, "caseglob", OPT_ALL}, CASEGLOB},
|
||||||
|
{{NULL, "casematch", OPT_ALL}, CASEMATCH},
|
||||||
{{NULL, "cbases", 0}, CBASES},
|
{{NULL, "cbases", 0}, CBASES},
|
||||||
{{NULL, "cdablevars", OPT_EMULATE}, CDABLEVARS},
|
{{NULL, "cdablevars", OPT_EMULATE}, CDABLEVARS},
|
||||||
{{NULL, "chasedots", OPT_EMULATE}, CHASEDOTS},
|
{{NULL, "chasedots", OPT_EMULATE}, CHASEDOTS},
|
||||||
|
@ -201,6 +203,7 @@ static struct optname optns[] = {
|
||||||
{{NULL, "rcquotes", OPT_EMULATE}, RCQUOTES},
|
{{NULL, "rcquotes", OPT_EMULATE}, RCQUOTES},
|
||||||
{{NULL, "rcs", OPT_ALL}, RCS},
|
{{NULL, "rcs", OPT_ALL}, RCS},
|
||||||
{{NULL, "recexact", 0}, RECEXACT},
|
{{NULL, "recexact", 0}, RECEXACT},
|
||||||
|
{{NULL, "rematchpcre", 0}, REMATCHPCRE},
|
||||||
{{NULL, "restricted", OPT_SPECIAL}, RESTRICTED},
|
{{NULL, "restricted", OPT_SPECIAL}, RESTRICTED},
|
||||||
{{NULL, "rmstarsilent", OPT_BOURNE}, RMSTARSILENT},
|
{{NULL, "rmstarsilent", OPT_BOURNE}, RMSTARSILENT},
|
||||||
{{NULL, "rmstarwait", 0}, RMSTARWAIT},
|
{{NULL, "rmstarwait", 0}, RMSTARWAIT},
|
||||||
|
|
|
@ -2124,6 +2124,12 @@ par_cond_triple(char *a, char *b, char *c)
|
||||||
ecstr(a);
|
ecstr(a);
|
||||||
ecstr(c);
|
ecstr(c);
|
||||||
ecadd(ecnpats++);
|
ecadd(ecnpats++);
|
||||||
|
} else if ((b[0] == Equals || b[0] == '=') &&
|
||||||
|
(b[1] == '~' || b[1] == Tilde) && ~b[2]) {
|
||||||
|
ecadd(WCB_COND(COND_REGEX, 0));
|
||||||
|
ecstr(a);
|
||||||
|
ecstr(c);
|
||||||
|
ecadd(ecnpats++);
|
||||||
} else if (b[0] == '-') {
|
} else if (b[0] == '-') {
|
||||||
if ((t0 = get_cond_num(b + 1)) > -1) {
|
if ((t0 = get_cond_num(b + 1)) > -1) {
|
||||||
ecadd(WCB_COND(t0 + COND_NT, 0));
|
ecadd(WCB_COND(t0 + COND_NT, 0));
|
||||||
|
|
|
@ -640,7 +640,7 @@ gettext2(Estate state)
|
||||||
{
|
{
|
||||||
static char *c1[] = {
|
static char *c1[] = {
|
||||||
"=", "!=", "<", ">", "-nt", "-ot", "-ef", "-eq",
|
"=", "!=", "<", ">", "-nt", "-ot", "-ef", "-eq",
|
||||||
"-ne", "-lt", "-gt", "-le", "-ge"
|
"-ne", "-lt", "-gt", "-le", "-ge", "=~"
|
||||||
};
|
};
|
||||||
|
|
||||||
int ctype;
|
int ctype;
|
||||||
|
@ -724,7 +724,7 @@ gettext2(Estate state)
|
||||||
}
|
}
|
||||||
break;
|
break;
|
||||||
default:
|
default:
|
||||||
if (ctype <= COND_GE) {
|
if (ctype < COND_MOD) {
|
||||||
/* Binary test: `a = b' etc. */
|
/* Binary test: `a = b' etc. */
|
||||||
taddstr(ecgetstr(state, EC_NODUP, NULL));
|
taddstr(ecgetstr(state, EC_NODUP, NULL));
|
||||||
taddstr(" ");
|
taddstr(" ");
|
||||||
|
|
|
@ -519,8 +519,9 @@ struct timedfn {
|
||||||
#define COND_GT 13
|
#define COND_GT 13
|
||||||
#define COND_LE 14
|
#define COND_LE 14
|
||||||
#define COND_GE 15
|
#define COND_GE 15
|
||||||
#define COND_MOD 16
|
#define COND_REGEX 16
|
||||||
#define COND_MODI 17
|
#define COND_MOD 17
|
||||||
|
#define COND_MODI 18
|
||||||
|
|
||||||
typedef int (*CondHandler) _((char **, int));
|
typedef int (*CondHandler) _((char **, int));
|
||||||
|
|
||||||
|
@ -1588,11 +1589,13 @@ enum {
|
||||||
BANGHIST,
|
BANGHIST,
|
||||||
BAREGLOBQUAL,
|
BAREGLOBQUAL,
|
||||||
BASHAUTOLIST,
|
BASHAUTOLIST,
|
||||||
|
BASHREMATCH,
|
||||||
BEEP,
|
BEEP,
|
||||||
BGNICE,
|
BGNICE,
|
||||||
BRACECCL,
|
BRACECCL,
|
||||||
BSDECHO,
|
BSDECHO,
|
||||||
CASEGLOB,
|
CASEGLOB,
|
||||||
|
CASEMATCH,
|
||||||
CBASES,
|
CBASES,
|
||||||
CDABLEVARS,
|
CDABLEVARS,
|
||||||
CHASEDOTS,
|
CHASEDOTS,
|
||||||
|
@ -1695,6 +1698,7 @@ enum {
|
||||||
RCQUOTES,
|
RCQUOTES,
|
||||||
RCS,
|
RCS,
|
||||||
RECEXACT,
|
RECEXACT,
|
||||||
|
REMATCHPCRE,
|
||||||
RESTRICTED,
|
RESTRICTED,
|
||||||
RMSTARSILENT,
|
RMSTARSILENT,
|
||||||
RMSTARWAIT,
|
RMSTARWAIT,
|
||||||
|
|
|
@ -1135,7 +1135,8 @@ AC_CHECK_FUNCS(strftime strptime mktime timelocal \
|
||||||
erand48 open_memstream \
|
erand48 open_memstream \
|
||||||
wctomb iconv \
|
wctomb iconv \
|
||||||
grantpt unlockpt ptsname \
|
grantpt unlockpt ptsname \
|
||||||
htons ntohs)
|
htons ntohs \
|
||||||
|
regcomp regexec regerror regfree)
|
||||||
AC_FUNC_STRCOLL
|
AC_FUNC_STRCOLL
|
||||||
|
|
||||||
if test x$enable_cap = xyes; then
|
if test x$enable_cap = xyes; then
|
||||||
|
|
Loading…
Reference in New Issue