40 Star 0 Fork 11

openKylin/perl

加入 Gitee
与超过 1200万 开发者一起发现、参与优秀开源项目,私有仓库也完全免费 :)
免费加入
克隆/下载
dquote.c 18.30 KB
一键复制 编辑 原始数据 按行查看 历史
xinjiahao 提交于 2024-03-10 12:24 . New upstream version 5.36.0
123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566
/* dquote.c
*
* This file contains functions that are related to
* parsing double-quotish expressions.
*
*/
#include "EXTERN.h"
#define PERL_IN_DQUOTE_C
#include "perl.h"
/* XXX Add documentation after final interface and behavior is decided */
bool
Perl_grok_bslash_c(pTHX_ const char source,
U8 * result,
const char** message,
U32 * packed_warn)
{
PERL_ARGS_ASSERT_GROK_BSLASH_C;
/* This returns TRUE if the \c? sequence is valid; FALSE otherwise. If it
* is valid, the sequence evaluates to a single character, which will be
* stored into *result.
*
* source is the character immediately after a '\c' sequence.
* result points to a char variable into which this function will store
* what the sequence evaluates to, if valid; unchanged otherwise.
* message A pointer to any warning or error message will be stored into
* this pointer; NULL if none.
* packed_warn if NULL on input asks that this routine display any warning
* messages. Otherwise, if the function found a warning, the
* packed warning categories will be stored into *packed_warn (and
* the corresponding message text into *message); 0 if none.
*/
*message = NULL;
if (packed_warn) *packed_warn = 0;
if (! isPRINT_A(source)) {
*message = "Character following \"\\c\" must be printable ASCII";
return FALSE;
}
if (source == '{') {
const char control = toCTRL('{');
if (isPRINT_A(control)) {
/* diag_listed_as: Use "%s" instead of "%s" */
*message = Perl_form(aTHX_ "Use \"%c\" instead of \"\\c{\"", control);
}
else {
*message = "Sequence \"\\c{\" invalid";
}
return FALSE;
}
*result = toCTRL(source);
if (isPRINT_A(*result) && ckWARN(WARN_SYNTAX)) {
U8 clearer[3];
U8 i = 0;
char format[] = "\"\\c%c\" is more clearly written simply as \"%s\"";
if (! isWORDCHAR(*result)) {
clearer[i++] = '\\';
}
clearer[i++] = *result;
clearer[i++] = '\0';
if (packed_warn) {
*message = Perl_form(aTHX_ format, source, clearer);
*packed_warn = packWARN(WARN_SYNTAX);
}
else {
Perl_warner(aTHX_ packWARN(WARN_SYNTAX), format, source, clearer);
}
}
return TRUE;
}
const char *
Perl_form_alien_digit_msg(pTHX_
const U8 which, /* 8 or 16 */
const STRLEN valids_len, /* length of input before first bad char */
const char * const first_bad, /* Ptr to that bad char */
const char * const send, /* End of input string */
const bool UTF, /* Is it in UTF-8? */
const bool braced) /* Is it enclosed in {} */
{
/* Generate a mortal SV containing an appropriate warning message about
* alien characters found in an octal or hex constant given by the inputs,
* and return a pointer to that SV's string. The message looks like:
*
* Non-hex character '?' terminates \x early. Resolved as "\x{...}"
*
*/
/* The usual worst case scenario: 2 chars to display per byte, plus \x{}
* (leading zeros could take up more space, and the scalar will
* automatically grow if necessary). Space for NUL is added by the newSV()
* function */
SV * display_char = newSV(2 * UTF8_MAXBYTES + 4);
SV * message_sv = sv_newmortal();
char symbol;
PERL_ARGS_ASSERT_FORM_ALIEN_DIGIT_MSG;
assert(which == 8 || which == 16);
/* Calculate the display form of the character */
if ( UVCHR_IS_INVARIANT(*first_bad)
|| (UTF && isUTF8_CHAR((U8 *) first_bad, (U8 *) send)))
{
pv_uni_display(display_char, (U8 *) first_bad, UTF8SKIP(first_bad),
(STRLEN) -1, UNI_DISPLAY_QQ);
}
else { /* Is not UTF-8, or is illegal UTF-8. Show just the one byte */
/* It also isn't a UTF-8 invariant character, so no display shortcuts
* are available. Use \\x{...} */
Perl_sv_setpvf(aTHX_ display_char, "\\x{%02x}", *first_bad);
}
/* Ready to start building the message */
sv_setpvs(message_sv, "Non-");
if (which == 8) {
sv_catpvs(message_sv, "octal");
if (braced) {
symbol = 'o';
}
else {
symbol = '0'; /* \008, for example */
}
}
else {
sv_catpvs(message_sv, "hex");
symbol = 'x';
}
sv_catpvs(message_sv, " character ");
if (isPRINT(*first_bad)) {
sv_catpvs(message_sv, "'");
}
sv_catsv(message_sv, display_char);
if (isPRINT(*first_bad)) {
sv_catpvs(message_sv, "'");
}
Perl_sv_catpvf(aTHX_ message_sv, " terminates \\%c early. Resolved as "
"\"\\%c", symbol, symbol);
if (braced) {
sv_catpvs(message_sv, "{");
}
/* Octal constants have an extra leading 0, but \0 already includes that */
if (symbol == 'o' && valids_len < 3) {
sv_catpvs(message_sv, "0");
}
if (valids_len == 0) { /* No legal digits at all */
sv_catpvs(message_sv, "00");
}
else if (valids_len == 1) { /* Just one is legal */
sv_catpvs(message_sv, "0");
}
sv_catpvn(message_sv, first_bad - valids_len, valids_len);
if (braced) {
sv_catpvs(message_sv, "}");
}
else {
sv_catsv(message_sv, display_char);
}
sv_catpvs(message_sv, "\"");
SvREFCNT_dec_NN(display_char);
return SvPVX_const(message_sv);
}
const char *
Perl_form_cp_too_large_msg(pTHX_
const U8 which, /* 8 or 16 */
const char * string, /* NULL, or the text that is supposed to
represent a code point */
const Size_t len, /* length of 'string' if not NULL; else 0 */
const UV cp) /* 0 if 'string' not NULL; else the too-large
code point */
{
/* Generate a mortal SV containing an appropriate warning message about
* code points that are too large for this system, given by the inputs,
* and return a pointer to that SV's string. Either the text of the string
* to be converted to a code point is input, or a code point itself. The
* former is needed to accurately represent something that overflows.
*
* The message looks like:
*
* Use of code point %s is not allowed; the permissible max is %s
*
*/
SV * message_sv = sv_newmortal();
const char * format;
const char * prefix;
PERL_ARGS_ASSERT_FORM_CP_TOO_LARGE_MSG;
assert(which == 8 || which == 16);
/* One but not both must be non-zero */
assert((string != NULL) ^ (cp != 0));
assert((string == NULL) || len);
if (which == 8) {
format = "%" UVof;
prefix = "0";
}
else {
format = "%" UVXf;
prefix = "0x";
}
Perl_sv_setpvf(aTHX_ message_sv, "Use of code point %s", prefix);
if (string) {
Perl_sv_catpvf(aTHX_ message_sv, "%.*s", (int) len, string);
}
else {
Perl_sv_catpvf(aTHX_ message_sv, format, cp);
}
Perl_sv_catpvf(aTHX_ message_sv, " is not allowed; the permissible max is %s", prefix);
Perl_sv_catpvf(aTHX_ message_sv, format, MAX_LEGAL_CP);
return SvPVX_const(message_sv);
}
bool
Perl_grok_bslash_o(pTHX_ char **s, const char * const send, UV *uv,
const char** message,
U32 * packed_warn,
const bool strict,
const bool allow_UV_MAX,
const bool UTF)
{
/* Documentation to be supplied when interface nailed down finally
* This returns FALSE if there is an error the caller should probably die
* from; otherwise TRUE.
* s is the address of a pointer to a string. **s is 'o', and the
* previous character was a backslash. At exit, *s will be advanced
* to the byte just after those absorbed by this function. Hence the
* caller can continue parsing from there. In the case of an error
* when this function returns FALSE, continuing to parse is not an
* option, this routine has generally positioned *s to point just to
* the right of the first bad spot, so that a message that has a "<--"
* to mark the spot will be correctly positioned.
* send - 1 gives a limit in *s that this function is not permitted to
* look beyond. That is, the function may look at bytes only in the
* range *s..send-1
* uv points to a UV that will hold the output value, valid only if the
* return from the function is TRUE; may be changed from the input
* value even when FALSE is returned.
* message A pointer to any warning or error message will be stored into
* this pointer; NULL if none.
* packed_warn if NULL on input asks that this routine display any warning
* messages. Otherwise, if the function found a warning, the packed
* warning categories will be stored into *packed_warn (and the
* corresponding message text into *message); 0 if none.
* strict is true if this should fail instead of warn if there are
* non-octal digits within the braces
* allow_UV_MAX is true if this shouldn't fail if the input code point is
* UV_MAX, which is normally illegal, reserved for internal use.
* UTF is true iff the string *s is encoded in UTF-8.
*/
char * e;
char * rbrace;
STRLEN numbers_len;
STRLEN trailing_blanks_len = 0;
I32 flags = PERL_SCAN_ALLOW_UNDERSCORES
| PERL_SCAN_DISALLOW_PREFIX
| PERL_SCAN_SILENT_NON_PORTABLE
| PERL_SCAN_SILENT_ILLDIGIT
| PERL_SCAN_SILENT_OVERFLOW;
PERL_ARGS_ASSERT_GROK_BSLASH_O;
assert(*(*s - 1) == '\\');
assert(* *s == 'o');
*message = NULL;
if (packed_warn) *packed_warn = 0;
(*s)++;
if (send <= *s || **s != '{') {
*message = "Missing braces on \\o{}";
return FALSE;
}
rbrace = (char *) memchr(*s, '}', send - *s);
if (!rbrace) {
(*s)++; /* Move past the '{' */
/* Position beyond the legal digits and blanks */
while (*s < send && isBLANK(**s)) {
(*s)++;
}
while (*s < send && isOCTAL(**s)) {
(*s)++;
}
*message = "Missing right brace on \\o{}";
return FALSE;
}
/* Point to expected first digit (could be first byte of utf8 sequence if
* not a digit) */
(*s)++;
while (isBLANK(**s)) {
(*s)++;
}
e = rbrace;
while (*s < e && isBLANK(*(e - 1))) {
e--;
}
numbers_len = e - *s;
if (numbers_len == 0) {
(*s)++; /* Move past the '}' */
*message = "Empty \\o{}";
return FALSE;
}
*uv = grok_oct(*s, &numbers_len, &flags, NULL);
if (UNLIKELY( (flags & PERL_SCAN_GREATER_THAN_UV_MAX)
|| (! allow_UV_MAX && *uv == UV_MAX)))
{
*message = form_cp_too_large_msg(8, *s, numbers_len, 0);
*s = rbrace + 1;
return FALSE;
}
while (isBLANK(**s)) {
trailing_blanks_len++;
(*s)++;
}
/* Note that if has non-octal, will ignore everything starting with that up
* to the '}' */
if (numbers_len + trailing_blanks_len != (STRLEN) (e - *s)) {
*s += numbers_len;
if (strict) {
*s += (UTF) ? UTF8_SAFE_SKIP(*s, send) : 1;
*message = "Non-octal character";
return FALSE;
}
if (ckWARN(WARN_DIGIT)) {
const char * failure = form_alien_digit_msg(8, numbers_len, *s, send,
UTF, TRUE);
if (packed_warn) {
*message = failure;
*packed_warn = packWARN(WARN_DIGIT);
}
else {
Perl_warner(aTHX_ packWARN(WARN_DIGIT), "%s", failure);
}
}
}
/* Return past the '}' */
*s = rbrace + 1;
return TRUE;
}
bool
Perl_grok_bslash_x(pTHX_ char ** s, const char * const send, UV *uv,
const char** message,
U32 * packed_warn,
const bool strict,
const bool allow_UV_MAX,
const bool UTF)
{
/* Documentation to be supplied when interface nailed down finally
* This returns FALSE if there is an error the caller should probably die
* from; otherwise TRUE.
* It guarantees that the returned codepoint, *uv, when expressed as
* utf8 bytes, would fit within the skipped "\x{...}" bytes.
*
* On input:
* s is the address of a pointer to a string. **s is 'x', and the
* previous character was a backslash. At exit, *s will be advanced
* to the byte just after those absorbed by this function. Hence the
* caller can continue parsing from there. In the case of an error,
* this routine has generally positioned *s to point just to the right
* of the first bad spot, so that a message that has a "<--" to mark
* the spot will be correctly positioned.
* send - 1 gives a limit in *s that this function is not permitted to
* look beyond. That is, the function may look at bytes only in the
* range *s..send-1
* uv points to a UV that will hold the output value, valid only if the
* return from the function is TRUE; may be changed from the input
* value even when FALSE is returned.
* message A pointer to any warning or error message will be stored into
* this pointer; NULL if none.
* packed_warn if NULL on input asks that this routine display any warning
* messages. Otherwise, if the function found a warning, the packed
* warning categories will be stored into *packed_warn (and the
* corresponding message text into *message); 0 if none.
* strict is true if anything out of the ordinary should cause this to
* fail instead of warn or be silent. For example, it requires
* exactly 2 digits following the \x (when there are no braces).
* 3 digits could be a mistake, so is forbidden in this mode.
* allow_UV_MAX is true if this shouldn't fail if the input code point is
* UV_MAX, which is normally illegal, reserved for internal use.
* UTF is true iff the string *s is encoded in UTF-8.
*/
char* e;
char * rbrace;
STRLEN numbers_len;
STRLEN trailing_blanks_len = 0;
I32 flags = PERL_SCAN_DISALLOW_PREFIX
| PERL_SCAN_SILENT_ILLDIGIT
| PERL_SCAN_NOTIFY_ILLDIGIT
| PERL_SCAN_SILENT_NON_PORTABLE
| PERL_SCAN_SILENT_OVERFLOW;
PERL_ARGS_ASSERT_GROK_BSLASH_X;
assert(*(*s - 1) == '\\');
assert(* *s == 'x');
*message = NULL;
if (packed_warn) *packed_warn = 0;
(*s)++;
if (send <= *s) {
if (strict) {
*message = "Empty \\x";
return FALSE;
}
/* Sadly, to preserve backcompat, an empty \x at the end of string is
* interpreted as a NUL */
*uv = 0;
return TRUE;
}
if (**s != '{') {
numbers_len = (strict) ? 3 : 2;
*uv = grok_hex(*s, &numbers_len, &flags, NULL);
*s += numbers_len;
if (numbers_len != 2 && (strict || (flags & PERL_SCAN_NOTIFY_ILLDIGIT))) {
if (numbers_len == 3) { /* numbers_len 3 only happens with strict */
*message = "Use \\x{...} for more than two hex characters";
return FALSE;
}
else if (strict) {
*s += (UTF) ? UTF8_SAFE_SKIP(*s, send) : 1;
*message = "Non-hex character";
return FALSE;
}
else if (ckWARN(WARN_DIGIT)) {
const char * failure = form_alien_digit_msg(16, numbers_len, *s,
send, UTF, FALSE);
if (! packed_warn) {
Perl_warner(aTHX_ packWARN(WARN_DIGIT), "%s", failure);
}
else {
*message = failure;
*packed_warn = packWARN(WARN_DIGIT);
}
}
}
return TRUE;
}
rbrace = (char *) memchr(*s, '}', send - *s);
if (!rbrace) {
(*s)++; /* Move past the '{' */
/* Position beyond legal blanks and digits */
while (*s < send && isBLANK(**s)) {
(*s)++;
}
while (*s < send && isXDIGIT(**s)) {
(*s)++;
}
*message = "Missing right brace on \\x{}";
return FALSE;
}
(*s)++; /* Point to expected first digit (could be first byte of utf8
sequence if not a digit) */
while (isBLANK(**s)) {
(*s)++;
}
e = rbrace;
while (*s < e && isBLANK(*(e - 1))) {
e--;
}
numbers_len = e - *s;
if (numbers_len == 0) {
if (strict) {
(*s)++; /* Move past the } */
*message = "Empty \\x{}";
return FALSE;
}
*s = rbrace + 1;
*uv = 0;
return TRUE;
}
flags |= PERL_SCAN_ALLOW_UNDERSCORES;
*uv = grok_hex(*s, &numbers_len, &flags, NULL);
if (UNLIKELY( (flags & PERL_SCAN_GREATER_THAN_UV_MAX)
|| (! allow_UV_MAX && *uv == UV_MAX)))
{
*message = form_cp_too_large_msg(16, *s, numbers_len, 0);
*s = e + 1;
return FALSE;
}
while (isBLANK(**s)) {
trailing_blanks_len++;
(*s)++;
}
if (numbers_len + trailing_blanks_len != (STRLEN) (e - *s)) {
*s += numbers_len;
if (strict) {
*s += (UTF) ? UTF8_SAFE_SKIP(*s, send) : 1;
*message = "Non-hex character";
return FALSE;
}
if (ckWARN(WARN_DIGIT)) {
const char * failure = form_alien_digit_msg(16, numbers_len, *s,
send, UTF, TRUE);
if (! packed_warn) {
Perl_warner(aTHX_ packWARN(WARN_DIGIT), "%s", failure);
}
else {
*message = failure;
*packed_warn = packWARN(WARN_DIGIT);
}
}
}
/* Return past the '}' */
*s = rbrace + 1;
return TRUE;
}
/*
* ex: set ts=8 sts=4 sw=4 et:
*/
马建仓 AI 助手
尝试更多
代码解读
代码找茬
代码优化
1
https://gitee.com/openkylin/perl.git
git@gitee.com:openkylin/perl.git
openkylin
perl
perl
upstream

搜索帮助