diff options
Diffstat (limited to 'Parser/lexer/lexer.c')
-rw-r--r-- | Parser/lexer/lexer.c | 76 |
1 files changed, 58 insertions, 18 deletions
diff --git a/Parser/lexer/lexer.c b/Parser/lexer/lexer.c index 98adf7447c5..0a078dd5941 100644 --- a/Parser/lexer/lexer.c +++ b/Parser/lexer/lexer.c @@ -405,6 +405,51 @@ tok_continuation_line(struct tok_state *tok) { } static int +maybe_raise_syntax_error_for_string_prefixes(struct tok_state *tok, + int saw_b, int saw_r, int saw_u, + int saw_f, int saw_t) { + // Supported: rb, rf, rt (in any order) + // Unsupported: ub, ur, uf, ut, bf, bt, ft (in any order) + +#define RETURN_SYNTAX_ERROR(PREFIX1, PREFIX2) \ + do { \ + (void)_PyTokenizer_syntaxerror_known_range( \ + tok, (int)(tok->start + 1 - tok->line_start), \ + (int)(tok->cur - tok->line_start), \ + "'" PREFIX1 "' and '" PREFIX2 "' prefixes are incompatible"); \ + return -1; \ + } while (0) + + if (saw_u && saw_b) { + RETURN_SYNTAX_ERROR("u", "b"); + } + if (saw_u && saw_r) { + RETURN_SYNTAX_ERROR("u", "r"); + } + if (saw_u && saw_f) { + RETURN_SYNTAX_ERROR("u", "f"); + } + if (saw_u && saw_t) { + RETURN_SYNTAX_ERROR("u", "t"); + } + + if (saw_b && saw_f) { + RETURN_SYNTAX_ERROR("b", "f"); + } + if (saw_b && saw_t) { + RETURN_SYNTAX_ERROR("b", "t"); + } + + if (saw_f && saw_t) { + RETURN_SYNTAX_ERROR("f", "t"); + } + +#undef RETURN_SYNTAX_ERROR + + return 0; +} + +static int tok_get_normal_mode(struct tok_state *tok, tokenizer_mode* current_tok, struct token *token) { int c; @@ -648,22 +693,22 @@ tok_get_normal_mode(struct tok_state *tok, tokenizer_mode* current_tok, struct t /* Process the various legal combinations of b"", r"", u"", and f"". */ int saw_b = 0, saw_r = 0, saw_u = 0, saw_f = 0, saw_t = 0; while (1) { - if (!(saw_b || saw_u || saw_f) && (c == 'b' || c == 'B')) + if (!saw_b && (c == 'b' || c == 'B')) { saw_b = 1; + } /* Since this is a backwards compatibility support literal we don't want to support it in arbitrary order like byte literals. */ - else if (!(saw_b || saw_u || saw_r || saw_f || saw_t) - && (c == 'u'|| c == 'U')) { + else if (!saw_u && (c == 'u'|| c == 'U')) { saw_u = 1; } /* ur"" and ru"" are not supported */ - else if (!(saw_r || saw_u) && (c == 'r' || c == 'R')) { + else if (!saw_r && (c == 'r' || c == 'R')) { saw_r = 1; } - else if (!(saw_f || saw_b || saw_u) && (c == 'f' || c == 'F')) { + else if (!saw_f && (c == 'f' || c == 'F')) { saw_f = 1; } - else if (!(saw_t || saw_u) && (c == 't' || c == 'T')) { + else if (!saw_t && (c == 't' || c == 'T')) { saw_t = 1; } else { @@ -671,17 +716,11 @@ tok_get_normal_mode(struct tok_state *tok, tokenizer_mode* current_tok, struct t } c = tok_nextc(tok); if (c == '"' || c == '\'') { - if (saw_b && saw_t) { - return MAKE_TOKEN(_PyTokenizer_syntaxerror_known_range( - tok, (int)(tok->start + 1 - tok->line_start), - (int)(tok->cur - tok->line_start), - "can't use 'b' and 't' string prefixes together")); - } - if (saw_f && saw_t) { - return MAKE_TOKEN(_PyTokenizer_syntaxerror_known_range( - tok, (int)(tok->start + 1 - tok->line_start), - (int)(tok->cur - tok->line_start), - "can't use 'f' and 't' string prefixes together")); + // Raise error on incompatible string prefixes: + int status = maybe_raise_syntax_error_for_string_prefixes( + tok, saw_b, saw_r, saw_u, saw_f, saw_t); + if (status < 0) { + return MAKE_TOKEN(ERRORTOKEN); } // Handle valid f or t string creation: @@ -1382,7 +1421,8 @@ f_string_middle: return MAKE_TOKEN( _PyTokenizer_syntaxerror( tok, - "f-string: newlines are not allowed in format specifiers for single quoted f-strings" + "%c-string: newlines are not allowed in format specifiers for single quoted %c-strings", + TOK_GET_STRING_PREFIX(tok), TOK_GET_STRING_PREFIX(tok) ) ); } |