mirror of
git://git.code.sf.net/p/zsh/code
synced 2024-06-09 16:56:04 +02:00
51728: assign pcre named capture groups to a hash
This commit is contained in:
parent
b62e911341
commit
f3f371deb3
|
@ -1,5 +1,8 @@
|
||||||
2023-05-13 Oliver Kiddle <opk@zsh.org>
|
2023-05-13 Oliver Kiddle <opk@zsh.org>
|
||||||
|
|
||||||
|
* 51728: Doc/Zsh/mod_pcre.yo, Src/Modules/pcre.c,
|
||||||
|
Test/V07pcre.ztst: assign pcre named capture groups to a hash
|
||||||
|
|
||||||
* 51723: Src/Modules/pcre.c, Test/V07pcre.ztst, configure.ac:
|
* 51723: Src/Modules/pcre.c, Test/V07pcre.ztst, configure.ac:
|
||||||
migrate pcre module to pcre2
|
migrate pcre module to pcre2
|
||||||
|
|
||||||
|
|
|
@ -20,12 +20,12 @@ including those that indicate newline.
|
||||||
)
|
)
|
||||||
findex(pcre_study)
|
findex(pcre_study)
|
||||||
item(tt(pcre_study))(
|
item(tt(pcre_study))(
|
||||||
Studies the previously-compiled PCRE which may result in faster
|
Requests JIT compilation for the previously-compiled PCRE which
|
||||||
matching.
|
may result in faster matching.
|
||||||
)
|
)
|
||||||
findex(pcre_match)
|
findex(pcre_match)
|
||||||
item(tt(pcre_match) [ tt(-v) var(var) ] [ tt(-a) var(arr) ] \
|
item(tt(pcre_match) [ tt(-v) var(var) ] [ tt(-a) var(arr) ] \
|
||||||
[ tt(-n) var(offset) ] [ tt(-b) ] var(string))(
|
[ tt(-A) var(assoc) ] [ tt(-n) var(offset) ] [ tt(-b) ] var(string))(
|
||||||
Returns successfully if tt(string) matches the previously-compiled
|
Returns successfully if tt(string) matches the previously-compiled
|
||||||
PCRE.
|
PCRE.
|
||||||
|
|
||||||
|
@ -36,7 +36,9 @@ substrings, unless the tt(-a) option is given, in which
|
||||||
case it will set the array var(arr). Similarly, the variable
|
case it will set the array var(arr). Similarly, the variable
|
||||||
tt(MATCH) will be set to the entire matched portion of the
|
tt(MATCH) will be set to the entire matched portion of the
|
||||||
string, unless the tt(-v) option is given, in which case the variable
|
string, unless the tt(-v) option is given, in which case the variable
|
||||||
var(var) will be set.
|
var(var) will be set. Furthermore, any named captures will
|
||||||
|
be stored in the associative array tt(.pcre.match) unless an
|
||||||
|
alternative is given with tt(-A).
|
||||||
No variables are altered if there is no successful match.
|
No variables are altered if there is no successful match.
|
||||||
A tt(-n) option starts searching for a match from the
|
A tt(-n) option starts searching for a match from the
|
||||||
byte var(offset) position in var(string). If the tt(-b) option is given,
|
byte var(offset) position in var(string). If the tt(-b) option is given,
|
||||||
|
|
|
@ -129,14 +129,17 @@ bin_pcre_study(char *nam, UNUSED(char **args), UNUSED(Options ops), UNUSED(int f
|
||||||
}
|
}
|
||||||
|
|
||||||
static int
|
static int
|
||||||
zpcre_get_substrings(char *arg, pcre2_match_data *mdata, int captured_count,
|
zpcre_get_substrings(pcre2_code *pat, char *arg, pcre2_match_data *mdata,
|
||||||
char *matchvar, char *substravar, int want_offset_pair,
|
int captured_count, char *matchvar, char *substravar, char *namedassoc,
|
||||||
int matchedinarr, int want_begin_end)
|
int want_offset_pair, int matchedinarr, int want_begin_end)
|
||||||
{
|
{
|
||||||
PCRE2_SIZE *ovec;
|
PCRE2_SIZE *ovec;
|
||||||
char *match_all, **matches;
|
char *match_all, **matches;
|
||||||
char offset_all[50];
|
char offset_all[50];
|
||||||
int capture_start = 1;
|
int capture_start = 1;
|
||||||
|
int vec_off;
|
||||||
|
PCRE2_SPTR ntable; /* table of named captures */
|
||||||
|
uint32_t ncount, nsize;
|
||||||
|
|
||||||
if (matchedinarr) {
|
if (matchedinarr) {
|
||||||
/* bash-style ovec[0] entire-matched string in the array */
|
/* bash-style ovec[0] entire-matched string in the array */
|
||||||
|
@ -174,7 +177,7 @@ zpcre_get_substrings(char *arg, pcre2_match_data *mdata, int captured_count,
|
||||||
if (substravar &&
|
if (substravar &&
|
||||||
(!want_begin_end || nelem)) {
|
(!want_begin_end || nelem)) {
|
||||||
char **x;
|
char **x;
|
||||||
int vec_off, i;
|
int i;
|
||||||
matches = x = (char **) zalloc(sizeof(char *) * (captured_count+1-capture_start));
|
matches = x = (char **) zalloc(sizeof(char *) * (captured_count+1-capture_start));
|
||||||
for (i = capture_start; i < captured_count; i++) {
|
for (i = capture_start; i < captured_count; i++) {
|
||||||
vec_off = 2*i;
|
vec_off = 2*i;
|
||||||
|
@ -184,6 +187,23 @@ zpcre_get_substrings(char *arg, pcre2_match_data *mdata, int captured_count,
|
||||||
setaparam(substravar, matches);
|
setaparam(substravar, matches);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if (!pcre2_pattern_info(pat, PCRE2_INFO_NAMECOUNT, &ncount) && ncount
|
||||||
|
&& !pcre2_pattern_info(pat, PCRE2_INFO_NAMEENTRYSIZE, &nsize)
|
||||||
|
&& !pcre2_pattern_info(pat, PCRE2_INFO_NAMETABLE, &ntable))
|
||||||
|
{
|
||||||
|
char **hash, **hashptr;
|
||||||
|
uint32_t nidx;
|
||||||
|
hashptr = hash = (char **)zshcalloc((ncount+1)*2*sizeof(char *));
|
||||||
|
for (nidx = 0; nidx < ncount; nidx++) {
|
||||||
|
vec_off = (ntable[nsize * nidx] << 9) + 2 * ntable[nsize * nidx + 1];
|
||||||
|
/* would metafy the key but pcre limits characters in the name */
|
||||||
|
*hashptr++ = ztrdup((char *) ntable + nsize * nidx + 2);
|
||||||
|
*hashptr++ = metafy(arg + ovec[vec_off],
|
||||||
|
ovec[vec_off+1]-ovec[vec_off], META_DUP);
|
||||||
|
}
|
||||||
|
sethparam(namedassoc, hash);
|
||||||
|
}
|
||||||
|
|
||||||
if (want_begin_end) {
|
if (want_begin_end) {
|
||||||
/*
|
/*
|
||||||
* cond-infix rather than builtin; also not bash; so we set a bunch
|
* cond-infix rather than builtin; also not bash; so we set a bunch
|
||||||
|
@ -286,6 +306,7 @@ bin_pcre_match(char *nam, char **args, Options ops, UNUSED(int func))
|
||||||
char *matched_portion = NULL;
|
char *matched_portion = NULL;
|
||||||
char *plaintext = NULL;
|
char *plaintext = NULL;
|
||||||
char *receptacle = NULL;
|
char *receptacle = NULL;
|
||||||
|
char *named = ".pcre.match";
|
||||||
int return_value = 1;
|
int return_value = 1;
|
||||||
/* The subject length and offset start are both int values in pcre_exec */
|
/* The subject length and offset start are both int values in pcre_exec */
|
||||||
int subject_len;
|
int subject_len;
|
||||||
|
@ -305,6 +326,9 @@ bin_pcre_match(char *nam, char **args, Options ops, UNUSED(int func))
|
||||||
if(OPT_HASARG(ops,c='v')) {
|
if(OPT_HASARG(ops,c='v')) {
|
||||||
matched_portion = OPT_ARG(ops,c);
|
matched_portion = OPT_ARG(ops,c);
|
||||||
}
|
}
|
||||||
|
if (OPT_HASARG(ops, c='A')) {
|
||||||
|
named = OPT_ARG(ops, c);
|
||||||
|
}
|
||||||
if(OPT_HASARG(ops,c='n')) { /* The offset position to start the search, in bytes. */
|
if(OPT_HASARG(ops,c='n')) { /* The offset position to start the search, in bytes. */
|
||||||
if ((offset_start = getposint(OPT_ARG(ops,c), nam)) < 0)
|
if ((offset_start = getposint(OPT_ARG(ops,c), nam)) < 0)
|
||||||
return 1;
|
return 1;
|
||||||
|
@ -326,8 +350,8 @@ bin_pcre_match(char *nam, char **args, Options ops, UNUSED(int func))
|
||||||
if (ret==0) return_value = 0;
|
if (ret==0) return_value = 0;
|
||||||
else if (ret == PCRE2_ERROR_NOMATCH) /* no match */;
|
else if (ret == PCRE2_ERROR_NOMATCH) /* no match */;
|
||||||
else if (ret>0) {
|
else if (ret>0) {
|
||||||
zpcre_get_substrings(plaintext, pcre_mdata, ret, matched_portion, receptacle,
|
zpcre_get_substrings(pcre_pattern, plaintext, pcre_mdata, ret, matched_portion,
|
||||||
want_offset_pair, 0, 0);
|
receptacle, named, want_offset_pair, 0, 0);
|
||||||
return_value = 0;
|
return_value = 0;
|
||||||
}
|
}
|
||||||
else {
|
else {
|
||||||
|
@ -405,9 +429,8 @@ cond_pcre_match(char **a, int id)
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
else if (r>0) {
|
else if (r>0) {
|
||||||
zpcre_get_substrings(lhstr_plain, pcre_mdata, r, svar, avar, 0,
|
zpcre_get_substrings(pcre_pat, lhstr_plain, pcre_mdata, r, svar, avar,
|
||||||
isset(BASHREMATCH),
|
".pcre.match", 0, isset(BASHREMATCH), !isset(BASHREMATCH));
|
||||||
!isset(BASHREMATCH));
|
|
||||||
return_value = 1;
|
return_value = 1;
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
|
@ -443,7 +466,7 @@ static struct conddef cotab[] = {
|
||||||
|
|
||||||
static struct builtin bintab[] = {
|
static struct builtin bintab[] = {
|
||||||
BUILTIN("pcre_compile", 0, bin_pcre_compile, 1, 1, 0, "aimxs", NULL),
|
BUILTIN("pcre_compile", 0, bin_pcre_compile, 1, 1, 0, "aimxs", NULL),
|
||||||
BUILTIN("pcre_match", 0, bin_pcre_match, 1, 1, 0, "a:v:n:b", NULL),
|
BUILTIN("pcre_match", 0, bin_pcre_match, 1, 1, 0, "A:a:v:n:b", NULL),
|
||||||
BUILTIN("pcre_study", 0, bin_pcre_study, 0, 0, 0, NULL, NULL)
|
BUILTIN("pcre_study", 0, bin_pcre_study, 0, 0, 0, NULL, NULL)
|
||||||
};
|
};
|
||||||
|
|
||||||
|
|
|
@ -182,3 +182,17 @@
|
||||||
[[ abc =~ 'a(d*)bc' ]] && print "$#MATCH; $#match; ${#match[1]}"
|
[[ abc =~ 'a(d*)bc' ]] && print "$#MATCH; $#match; ${#match[1]}"
|
||||||
0:empty capture
|
0:empty capture
|
||||||
>3; 1; 0
|
>3; 1; 0
|
||||||
|
|
||||||
|
[[ category/name-12345 =~ '(?x)^
|
||||||
|
(?<category> [^/]* ) /
|
||||||
|
(?<package>
|
||||||
|
(?<name> \w+ ) -
|
||||||
|
(?<version> \d+ ))$' ]]
|
||||||
|
typeset -p1 .pcre.match
|
||||||
|
0:named captures
|
||||||
|
>typeset -g -A .pcre.match=(
|
||||||
|
> [category]=category
|
||||||
|
> [name]=name
|
||||||
|
> [package]=name-12345
|
||||||
|
> [version]=12345
|
||||||
|
>)
|
||||||
|
|
Loading…
Reference in New Issue