aboutsummaryrefslogtreecommitdiffstatshomepage
path: root/Parser/lexer/lexer.c
diff options
context:
space:
mode:
Diffstat (limited to 'Parser/lexer/lexer.c')
-rw-r--r--Parser/lexer/lexer.c76
1 files changed, 58 insertions, 18 deletions
diff --git a/Parser/lexer/lexer.c b/Parser/lexer/lexer.c
index 98adf7447c5..0a078dd5941 100644
--- a/Parser/lexer/lexer.c
+++ b/Parser/lexer/lexer.c
@@ -405,6 +405,51 @@ tok_continuation_line(struct tok_state *tok) {
}
static int
+maybe_raise_syntax_error_for_string_prefixes(struct tok_state *tok,
+ int saw_b, int saw_r, int saw_u,
+ int saw_f, int saw_t) {
+ // Supported: rb, rf, rt (in any order)
+ // Unsupported: ub, ur, uf, ut, bf, bt, ft (in any order)
+
+#define RETURN_SYNTAX_ERROR(PREFIX1, PREFIX2) \
+ do { \
+ (void)_PyTokenizer_syntaxerror_known_range( \
+ tok, (int)(tok->start + 1 - tok->line_start), \
+ (int)(tok->cur - tok->line_start), \
+ "'" PREFIX1 "' and '" PREFIX2 "' prefixes are incompatible"); \
+ return -1; \
+ } while (0)
+
+ if (saw_u && saw_b) {
+ RETURN_SYNTAX_ERROR("u", "b");
+ }
+ if (saw_u && saw_r) {
+ RETURN_SYNTAX_ERROR("u", "r");
+ }
+ if (saw_u && saw_f) {
+ RETURN_SYNTAX_ERROR("u", "f");
+ }
+ if (saw_u && saw_t) {
+ RETURN_SYNTAX_ERROR("u", "t");
+ }
+
+ if (saw_b && saw_f) {
+ RETURN_SYNTAX_ERROR("b", "f");
+ }
+ if (saw_b && saw_t) {
+ RETURN_SYNTAX_ERROR("b", "t");
+ }
+
+ if (saw_f && saw_t) {
+ RETURN_SYNTAX_ERROR("f", "t");
+ }
+
+#undef RETURN_SYNTAX_ERROR
+
+ return 0;
+}
+
+static int
tok_get_normal_mode(struct tok_state *tok, tokenizer_mode* current_tok, struct token *token)
{
int c;
@@ -648,22 +693,22 @@ tok_get_normal_mode(struct tok_state *tok, tokenizer_mode* current_tok, struct t
/* Process the various legal combinations of b"", r"", u"", and f"". */
int saw_b = 0, saw_r = 0, saw_u = 0, saw_f = 0, saw_t = 0;
while (1) {
- if (!(saw_b || saw_u || saw_f) && (c == 'b' || c == 'B'))
+ if (!saw_b && (c == 'b' || c == 'B')) {
saw_b = 1;
+ }
/* Since this is a backwards compatibility support literal we don't
want to support it in arbitrary order like byte literals. */
- else if (!(saw_b || saw_u || saw_r || saw_f || saw_t)
- && (c == 'u'|| c == 'U')) {
+ else if (!saw_u && (c == 'u'|| c == 'U')) {
saw_u = 1;
}
/* ur"" and ru"" are not supported */
- else if (!(saw_r || saw_u) && (c == 'r' || c == 'R')) {
+ else if (!saw_r && (c == 'r' || c == 'R')) {
saw_r = 1;
}
- else if (!(saw_f || saw_b || saw_u) && (c == 'f' || c == 'F')) {
+ else if (!saw_f && (c == 'f' || c == 'F')) {
saw_f = 1;
}
- else if (!(saw_t || saw_u) && (c == 't' || c == 'T')) {
+ else if (!saw_t && (c == 't' || c == 'T')) {
saw_t = 1;
}
else {
@@ -671,17 +716,11 @@ tok_get_normal_mode(struct tok_state *tok, tokenizer_mode* current_tok, struct t
}
c = tok_nextc(tok);
if (c == '"' || c == '\'') {
- if (saw_b && saw_t) {
- return MAKE_TOKEN(_PyTokenizer_syntaxerror_known_range(
- tok, (int)(tok->start + 1 - tok->line_start),
- (int)(tok->cur - tok->line_start),
- "can't use 'b' and 't' string prefixes together"));
- }
- if (saw_f && saw_t) {
- return MAKE_TOKEN(_PyTokenizer_syntaxerror_known_range(
- tok, (int)(tok->start + 1 - tok->line_start),
- (int)(tok->cur - tok->line_start),
- "can't use 'f' and 't' string prefixes together"));
+ // Raise error on incompatible string prefixes:
+ int status = maybe_raise_syntax_error_for_string_prefixes(
+ tok, saw_b, saw_r, saw_u, saw_f, saw_t);
+ if (status < 0) {
+ return MAKE_TOKEN(ERRORTOKEN);
}
// Handle valid f or t string creation:
@@ -1382,7 +1421,8 @@ f_string_middle:
return MAKE_TOKEN(
_PyTokenizer_syntaxerror(
tok,
- "f-string: newlines are not allowed in format specifiers for single quoted f-strings"
+ "%c-string: newlines are not allowed in format specifiers for single quoted %c-strings",
+ TOK_GET_STRING_PREFIX(tok), TOK_GET_STRING_PREFIX(tok)
)
);
}