aboutsummaryrefslogtreecommitdiffstatshomepage
path: root/Parser/lexer
diff options
context:
space:
mode:
Diffstat (limited to 'Parser/lexer')
-rw-r--r--Parser/lexer/buffer.c8
-rw-r--r--Parser/lexer/lexer.c203
-rw-r--r--Parser/lexer/lexer.h2
-rw-r--r--Parser/lexer/state.c2
-rw-r--r--Parser/lexer/state.h25
5 files changed, 163 insertions, 77 deletions
diff --git a/Parser/lexer/buffer.c b/Parser/lexer/buffer.c
index f6502bf8f7f..63aa1ea2ad4 100644
--- a/Parser/lexer/buffer.c
+++ b/Parser/lexer/buffer.c
@@ -13,8 +13,8 @@ _PyLexer_remember_fstring_buffers(struct tok_state *tok)
for (index = tok->tok_mode_stack_index; index >= 0; --index) {
mode = &(tok->tok_mode_stack[index]);
- mode->f_string_start_offset = mode->f_string_start - tok->buf;
- mode->f_string_multi_line_start_offset = mode->f_string_multi_line_start - tok->buf;
+ mode->start_offset = mode->start - tok->buf;
+ mode->multi_line_start_offset = mode->multi_line_start - tok->buf;
}
}
@@ -27,8 +27,8 @@ _PyLexer_restore_fstring_buffers(struct tok_state *tok)
for (index = tok->tok_mode_stack_index; index >= 0; --index) {
mode = &(tok->tok_mode_stack[index]);
- mode->f_string_start = tok->buf + mode->f_string_start_offset;
- mode->f_string_multi_line_start = tok->buf + mode->f_string_multi_line_start_offset;
+ mode->start = tok->buf + mode->start_offset;
+ mode->multi_line_start = tok->buf + mode->multi_line_start_offset;
}
}
diff --git a/Parser/lexer/lexer.c b/Parser/lexer/lexer.c
index e81867a5fad..0a078dd5941 100644
--- a/Parser/lexer/lexer.c
+++ b/Parser/lexer/lexer.c
@@ -38,6 +38,9 @@ static inline tokenizer_mode* TOK_NEXT_MODE(struct tok_state* tok) {
#define TOK_NEXT_MODE(tok) (&(tok->tok_mode_stack[++tok->tok_mode_stack_index]))
#endif
+#define FTSTRING_MIDDLE(tok_mode) (tok_mode->string_kind == TSTRING ? TSTRING_MIDDLE : FSTRING_MIDDLE)
+#define FTSTRING_END(tok_mode) (tok_mode->string_kind == TSTRING ? TSTRING_END : FSTRING_END)
+#define TOK_GET_STRING_PREFIX(tok) (TOK_GET_MODE(tok)->string_kind == TSTRING ? 't' : 'f')
#define MAKE_TOKEN(token_type) _PyLexer_token_setup(tok, token, token_type, p_start, p_end)
#define MAKE_TYPE_COMMENT_TOKEN(token_type, col_offset, end_col_offset) (\
_PyLexer_type_comment_token_setup(tok, token, token_type, col_offset, end_col_offset, p_start, p_end))
@@ -108,12 +111,12 @@ tok_backup(struct tok_state *tok, int c)
}
static int
-set_fstring_expr(struct tok_state* tok, struct token *token, char c) {
+set_ftstring_expr(struct tok_state* tok, struct token *token, char c) {
assert(token != NULL);
assert(c == '}' || c == ':' || c == '!');
tokenizer_mode *tok_mode = TOK_GET_MODE(tok);
- if (!tok_mode->f_string_debug || token->metadata) {
+ if (!(tok_mode->in_debug || tok_mode->string_kind == TSTRING) || token->metadata) {
return 0;
}
PyObject *res = NULL;
@@ -173,7 +176,7 @@ set_fstring_expr(struct tok_state* tok, struct token *token, char c) {
}
int
-_PyLexer_update_fstring_expr(struct tok_state *tok, char cur)
+_PyLexer_update_ftstring_expr(struct tok_state *tok, char cur)
{
assert(tok->cur != NULL);
@@ -402,6 +405,51 @@ tok_continuation_line(struct tok_state *tok) {
}
static int
+maybe_raise_syntax_error_for_string_prefixes(struct tok_state *tok,
+ int saw_b, int saw_r, int saw_u,
+ int saw_f, int saw_t) {
+ // Supported: rb, rf, rt (in any order)
+ // Unsupported: ub, ur, uf, ut, bf, bt, ft (in any order)
+
+#define RETURN_SYNTAX_ERROR(PREFIX1, PREFIX2) \
+ do { \
+ (void)_PyTokenizer_syntaxerror_known_range( \
+ tok, (int)(tok->start + 1 - tok->line_start), \
+ (int)(tok->cur - tok->line_start), \
+ "'" PREFIX1 "' and '" PREFIX2 "' prefixes are incompatible"); \
+ return -1; \
+ } while (0)
+
+ if (saw_u && saw_b) {
+ RETURN_SYNTAX_ERROR("u", "b");
+ }
+ if (saw_u && saw_r) {
+ RETURN_SYNTAX_ERROR("u", "r");
+ }
+ if (saw_u && saw_f) {
+ RETURN_SYNTAX_ERROR("u", "f");
+ }
+ if (saw_u && saw_t) {
+ RETURN_SYNTAX_ERROR("u", "t");
+ }
+
+ if (saw_b && saw_f) {
+ RETURN_SYNTAX_ERROR("b", "f");
+ }
+ if (saw_b && saw_t) {
+ RETURN_SYNTAX_ERROR("b", "t");
+ }
+
+ if (saw_f && saw_t) {
+ RETURN_SYNTAX_ERROR("f", "t");
+ }
+
+#undef RETURN_SYNTAX_ERROR
+
+ return 0;
+}
+
+static int
tok_get_normal_mode(struct tok_state *tok, tokenizer_mode* current_tok, struct token *token)
{
int c;
@@ -643,29 +691,40 @@ tok_get_normal_mode(struct tok_state *tok, tokenizer_mode* current_tok, struct t
nonascii = 0;
if (is_potential_identifier_start(c)) {
/* Process the various legal combinations of b"", r"", u"", and f"". */
- int saw_b = 0, saw_r = 0, saw_u = 0, saw_f = 0;
+ int saw_b = 0, saw_r = 0, saw_u = 0, saw_f = 0, saw_t = 0;
while (1) {
- if (!(saw_b || saw_u || saw_f) && (c == 'b' || c == 'B'))
+ if (!saw_b && (c == 'b' || c == 'B')) {
saw_b = 1;
+ }
/* Since this is a backwards compatibility support literal we don't
want to support it in arbitrary order like byte literals. */
- else if (!(saw_b || saw_u || saw_r || saw_f)
- && (c == 'u'|| c == 'U')) {
+ else if (!saw_u && (c == 'u'|| c == 'U')) {
saw_u = 1;
}
/* ur"" and ru"" are not supported */
- else if (!(saw_r || saw_u) && (c == 'r' || c == 'R')) {
+ else if (!saw_r && (c == 'r' || c == 'R')) {
saw_r = 1;
}
- else if (!(saw_f || saw_b || saw_u) && (c == 'f' || c == 'F')) {
+ else if (!saw_f && (c == 'f' || c == 'F')) {
saw_f = 1;
}
+ else if (!saw_t && (c == 't' || c == 'T')) {
+ saw_t = 1;
+ }
else {
break;
}
c = tok_nextc(tok);
if (c == '"' || c == '\'') {
- if (saw_f) {
+ // Raise error on incompatible string prefixes:
+ int status = maybe_raise_syntax_error_for_string_prefixes(
+ tok, saw_b, saw_r, saw_u, saw_f, saw_t);
+ if (status < 0) {
+ return MAKE_TOKEN(ERRORTOKEN);
+ }
+
+ // Handle valid f or t string creation:
+ if (saw_f || saw_t) {
goto f_string_quote;
}
goto letter_quote;
@@ -939,7 +998,9 @@ tok_get_normal_mode(struct tok_state *tok, tokenizer_mode* current_tok, struct t
}
f_string_quote:
- if (((Py_TOLOWER(*tok->start) == 'f' || Py_TOLOWER(*tok->start) == 'r') && (c == '\'' || c == '"'))) {
+ if (((Py_TOLOWER(*tok->start) == 'f' || Py_TOLOWER(*tok->start) == 'r' || Py_TOLOWER(*tok->start) == 't')
+ && (c == '\'' || c == '"'))) {
+
int quote = c;
int quote_size = 1; /* 1 or 3 */
@@ -971,39 +1032,49 @@ tok_get_normal_mode(struct tok_state *tok, tokenizer_mode* current_tok, struct t
p_start = tok->start;
p_end = tok->cur;
if (tok->tok_mode_stack_index + 1 >= MAXFSTRINGLEVEL) {
- return MAKE_TOKEN(_PyTokenizer_syntaxerror(tok, "too many nested f-strings"));
+ return MAKE_TOKEN(_PyTokenizer_syntaxerror(tok, "too many nested f-strings or t-strings"));
}
tokenizer_mode *the_current_tok = TOK_NEXT_MODE(tok);
the_current_tok->kind = TOK_FSTRING_MODE;
- the_current_tok->f_string_quote = quote;
- the_current_tok->f_string_quote_size = quote_size;
- the_current_tok->f_string_start = tok->start;
- the_current_tok->f_string_multi_line_start = tok->line_start;
- the_current_tok->f_string_line_start = tok->lineno;
- the_current_tok->f_string_start_offset = -1;
- the_current_tok->f_string_multi_line_start_offset = -1;
+ the_current_tok->quote = quote;
+ the_current_tok->quote_size = quote_size;
+ the_current_tok->start = tok->start;
+ the_current_tok->multi_line_start = tok->line_start;
+ the_current_tok->first_line = tok->lineno;
+ the_current_tok->start_offset = -1;
+ the_current_tok->multi_line_start_offset = -1;
the_current_tok->last_expr_buffer = NULL;
the_current_tok->last_expr_size = 0;
the_current_tok->last_expr_end = -1;
the_current_tok->in_format_spec = 0;
- the_current_tok->f_string_debug = 0;
+ the_current_tok->in_debug = 0;
+ enum string_kind_t string_kind = FSTRING;
switch (*tok->start) {
+ case 'T':
+ case 't':
+ the_current_tok->raw = Py_TOLOWER(*(tok->start + 1)) == 'r';
+ string_kind = TSTRING;
+ break;
case 'F':
case 'f':
- the_current_tok->f_string_raw = Py_TOLOWER(*(tok->start + 1)) == 'r';
+ the_current_tok->raw = Py_TOLOWER(*(tok->start + 1)) == 'r';
break;
case 'R':
case 'r':
- the_current_tok->f_string_raw = 1;
+ the_current_tok->raw = 1;
+ if (Py_TOLOWER(*(tok->start + 1)) == 't') {
+ string_kind = TSTRING;
+ }
break;
default:
Py_UNREACHABLE();
}
+ the_current_tok->string_kind = string_kind;
the_current_tok->curly_bracket_depth = 0;
the_current_tok->curly_bracket_expr_start_depth = -1;
- return MAKE_TOKEN(FSTRING_START);
+ return string_kind == TSTRING ? MAKE_TOKEN(TSTRING_START) : MAKE_TOKEN(FSTRING_START);
}
letter_quote:
@@ -1063,9 +1134,10 @@ tok_get_normal_mode(struct tok_state *tok, tokenizer_mode* current_tok, struct t
* and if it is, then this must be a missing '}' token
* so raise the proper error */
tokenizer_mode *the_current_tok = TOK_GET_MODE(tok);
- if (the_current_tok->f_string_quote == quote &&
- the_current_tok->f_string_quote_size == quote_size) {
- return MAKE_TOKEN(_PyTokenizer_syntaxerror(tok, "f-string: expecting '}'", start));
+ if (the_current_tok->quote == quote &&
+ the_current_tok->quote_size == quote_size) {
+ return MAKE_TOKEN(_PyTokenizer_syntaxerror(tok,
+ "%c-string: expecting '}'", TOK_GET_STRING_PREFIX(tok)));
}
}
@@ -1136,12 +1208,12 @@ tok_get_normal_mode(struct tok_state *tok, tokenizer_mode* current_tok, struct t
int cursor = current_tok->curly_bracket_depth - (c != '{');
int in_format_spec = current_tok->in_format_spec;
int cursor_in_format_with_debug =
- cursor == 1 && (current_tok->f_string_debug || in_format_spec);
+ cursor == 1 && (current_tok->in_debug || in_format_spec);
int cursor_valid = cursor == 0 || cursor_in_format_with_debug;
- if ((cursor_valid) && !_PyLexer_update_fstring_expr(tok, c)) {
+ if ((cursor_valid) && !_PyLexer_update_ftstring_expr(tok, c)) {
return MAKE_TOKEN(ENDMARKER);
}
- if ((cursor_valid) && c != '{' && set_fstring_expr(tok, token, c)) {
+ if ((cursor_valid) && c != '{' && set_ftstring_expr(tok, token, c)) {
return MAKE_TOKEN(ERRORTOKEN);
}
@@ -1194,7 +1266,8 @@ tok_get_normal_mode(struct tok_state *tok, tokenizer_mode* current_tok, struct t
case ']':
case '}':
if (INSIDE_FSTRING(tok) && !current_tok->curly_bracket_depth && c == '}') {
- return MAKE_TOKEN(_PyTokenizer_syntaxerror(tok, "f-string: single '}' is not allowed"));
+ return MAKE_TOKEN(_PyTokenizer_syntaxerror(tok,
+ "%c-string: single '}' is not allowed", TOK_GET_STRING_PREFIX(tok)));
}
if (!tok->tok_extra_tokens && !tok->level) {
return MAKE_TOKEN(_PyTokenizer_syntaxerror(tok, "unmatched '%c'", c));
@@ -1214,7 +1287,8 @@ tok_get_normal_mode(struct tok_state *tok, tokenizer_mode* current_tok, struct t
assert(current_tok->curly_bracket_depth >= 0);
int previous_bracket = current_tok->curly_bracket_depth - 1;
if (previous_bracket == current_tok->curly_bracket_expr_start_depth) {
- return MAKE_TOKEN(_PyTokenizer_syntaxerror(tok, "f-string: unmatched '%c'", c));
+ return MAKE_TOKEN(_PyTokenizer_syntaxerror(tok,
+ "%c-string: unmatched '%c'", TOK_GET_STRING_PREFIX(tok), c));
}
}
if (tok->parenlinenostack[tok->level] != tok->lineno) {
@@ -1235,13 +1309,14 @@ tok_get_normal_mode(struct tok_state *tok, tokenizer_mode* current_tok, struct t
if (INSIDE_FSTRING(tok)) {
current_tok->curly_bracket_depth--;
if (current_tok->curly_bracket_depth < 0) {
- return MAKE_TOKEN(_PyTokenizer_syntaxerror(tok, "f-string: unmatched '%c'", c));
+ return MAKE_TOKEN(_PyTokenizer_syntaxerror(tok, "%c-string: unmatched '%c'",
+ TOK_GET_STRING_PREFIX(tok), c));
}
if (c == '}' && current_tok->curly_bracket_depth == current_tok->curly_bracket_expr_start_depth) {
current_tok->curly_bracket_expr_start_depth--;
current_tok->kind = TOK_FSTRING_MODE;
current_tok->in_format_spec = 0;
- current_tok->f_string_debug = 0;
+ current_tok->in_debug = 0;
}
}
break;
@@ -1254,7 +1329,7 @@ tok_get_normal_mode(struct tok_state *tok, tokenizer_mode* current_tok, struct t
}
if( c == '=' && INSIDE_FSTRING_EXPR(current_tok)) {
- current_tok->f_string_debug = 1;
+ current_tok->in_debug = 1;
}
/* Punctuation character */
@@ -1285,7 +1360,8 @@ tok_get_fstring_mode(struct tok_state *tok, tokenizer_mode* current_tok, struct
if (peek1 != '{') {
current_tok->curly_bracket_expr_start_depth++;
if (current_tok->curly_bracket_expr_start_depth >= MAX_EXPR_NESTING) {
- return MAKE_TOKEN(_PyTokenizer_syntaxerror(tok, "f-string: expressions nested too deeply"));
+ return MAKE_TOKEN(_PyTokenizer_syntaxerror(tok,
+ "%c-string: expressions nested too deeply", TOK_GET_STRING_PREFIX(tok)));
}
TOK_GET_MODE(tok)->kind = TOK_REGULAR_MODE;
return tok_get_normal_mode(tok, current_tok, token);
@@ -1296,9 +1372,9 @@ tok_get_fstring_mode(struct tok_state *tok, tokenizer_mode* current_tok, struct
}
// Check if we are at the end of the string
- for (int i = 0; i < current_tok->f_string_quote_size; i++) {
+ for (int i = 0; i < current_tok->quote_size; i++) {
int quote = tok_nextc(tok);
- if (quote != current_tok->f_string_quote) {
+ if (quote != current_tok->quote) {
tok_backup(tok, quote);
goto f_string_middle;
}
@@ -1314,14 +1390,14 @@ tok_get_fstring_mode(struct tok_state *tok, tokenizer_mode* current_tok, struct
p_start = tok->start;
p_end = tok->cur;
tok->tok_mode_stack_index--;
- return MAKE_TOKEN(FSTRING_END);
+ return MAKE_TOKEN(FTSTRING_END(current_tok));
f_string_middle:
// TODO: This is a bit of a hack, but it works for now. We need to find a better way to handle
// this.
tok->multi_line_start = tok->line_start;
- while (end_quote_size != current_tok->f_string_quote_size) {
+ while (end_quote_size != current_tok->quote_size) {
int c = tok_nextc(tok);
if (tok->done == E_ERROR || tok->done == E_DECODE) {
return MAKE_TOKEN(ERRORTOKEN);
@@ -1332,7 +1408,7 @@ f_string_middle:
INSIDE_FSTRING_EXPR(current_tok)
);
- if (c == EOF || (current_tok->f_string_quote_size == 1 && c == '\n')) {
+ if (c == EOF || (current_tok->quote_size == 1 && c == '\n')) {
if (tok->decoding_erred) {
return MAKE_TOKEN(ERRORTOKEN);
}
@@ -1341,11 +1417,12 @@ f_string_middle:
// it means that the format spec ends here and we should
// return to the regular mode.
if (in_format_spec && c == '\n') {
- if (current_tok->f_string_quote_size == 1) {
+ if (current_tok->quote_size == 1) {
return MAKE_TOKEN(
_PyTokenizer_syntaxerror(
tok,
- "f-string: newlines are not allowed in format specifiers for single quoted f-strings"
+ "%c-string: newlines are not allowed in format specifiers for single quoted %c-strings",
+ TOK_GET_STRING_PREFIX(tok), TOK_GET_STRING_PREFIX(tok)
)
);
}
@@ -1354,25 +1431,26 @@ f_string_middle:
current_tok->in_format_spec = 0;
p_start = tok->start;
p_end = tok->cur;
- return MAKE_TOKEN(FSTRING_MIDDLE);
+ return MAKE_TOKEN(FTSTRING_MIDDLE(current_tok));
}
assert(tok->multi_line_start != NULL);
// shift the tok_state's location into
// the start of string, and report the error
// from the initial quote character
- tok->cur = (char *)current_tok->f_string_start;
+ tok->cur = (char *)current_tok->start;
tok->cur++;
- tok->line_start = current_tok->f_string_multi_line_start;
+ tok->line_start = current_tok->multi_line_start;
int start = tok->lineno;
tokenizer_mode *the_current_tok = TOK_GET_MODE(tok);
- tok->lineno = the_current_tok->f_string_line_start;
+ tok->lineno = the_current_tok->first_line;
- if (current_tok->f_string_quote_size == 3) {
+ if (current_tok->quote_size == 3) {
_PyTokenizer_syntaxerror(tok,
- "unterminated triple-quoted f-string literal"
- " (detected at line %d)", start);
+ "unterminated triple-quoted %c-string literal"
+ " (detected at line %d)",
+ TOK_GET_STRING_PREFIX(tok), start);
if (c != '\n') {
tok->done = E_EOFS;
}
@@ -1380,12 +1458,12 @@ f_string_middle:
}
else {
return MAKE_TOKEN(_PyTokenizer_syntaxerror(tok,
- "unterminated f-string literal (detected at"
- " line %d)", start));
+ "unterminated %c-string literal (detected at"
+ " line %d)", TOK_GET_STRING_PREFIX(tok), start));
}
}
- if (c == current_tok->f_string_quote) {
+ if (c == current_tok->quote) {
end_quote_size += 1;
continue;
} else {
@@ -1393,7 +1471,7 @@ f_string_middle:
}
if (c == '{') {
- if (!_PyLexer_update_fstring_expr(tok, c)) {
+ if (!_PyLexer_update_ftstring_expr(tok, c)) {
return MAKE_TOKEN(ENDMARKER);
}
int peek = tok_nextc(tok);
@@ -1402,7 +1480,8 @@ f_string_middle:
tok_backup(tok, c);
current_tok->curly_bracket_expr_start_depth++;
if (current_tok->curly_bracket_expr_start_depth >= MAX_EXPR_NESTING) {
- return MAKE_TOKEN(_PyTokenizer_syntaxerror(tok, "f-string: expressions nested too deeply"));
+ return MAKE_TOKEN(_PyTokenizer_syntaxerror(tok,
+ "%c-string: expressions nested too deeply", TOK_GET_STRING_PREFIX(tok)));
}
TOK_GET_MODE(tok)->kind = TOK_REGULAR_MODE;
current_tok->in_format_spec = 0;
@@ -1412,12 +1491,12 @@ f_string_middle:
p_start = tok->start;
p_end = tok->cur - 1;
}
- return MAKE_TOKEN(FSTRING_MIDDLE);
+ return MAKE_TOKEN(FTSTRING_MIDDLE(current_tok));
} else if (c == '}') {
if (unicode_escape) {
p_start = tok->start;
p_end = tok->cur;
- return MAKE_TOKEN(FSTRING_MIDDLE);
+ return MAKE_TOKEN(FTSTRING_MIDDLE(current_tok));
}
int peek = tok_nextc(tok);
@@ -1437,7 +1516,7 @@ f_string_middle:
p_start = tok->start;
p_end = tok->cur;
}
- return MAKE_TOKEN(FSTRING_MIDDLE);
+ return MAKE_TOKEN(FTSTRING_MIDDLE(current_tok));
} else if (c == '\\') {
int peek = tok_nextc(tok);
if (peek == '\r') {
@@ -1447,7 +1526,7 @@ f_string_middle:
// brace. We have to restore and return the control back
// to the loop for the next iteration.
if (peek == '{' || peek == '}') {
- if (!current_tok->f_string_raw) {
+ if (!current_tok->raw) {
if (_PyTokenizer_warn_invalid_escape_sequence(tok, peek)) {
return MAKE_TOKEN(ERRORTOKEN);
}
@@ -1456,7 +1535,7 @@ f_string_middle:
continue;
}
- if (!current_tok->f_string_raw) {
+ if (!current_tok->raw) {
if (peek == 'N') {
/* Handle named unicode escapes (\N{BULLET}) */
peek = tok_nextc(tok);
@@ -1474,12 +1553,12 @@ f_string_middle:
// Backup the f-string quotes to emit a final FSTRING_MIDDLE and
// add the quotes to the FSTRING_END in the next tokenizer iteration.
- for (int i = 0; i < current_tok->f_string_quote_size; i++) {
- tok_backup(tok, current_tok->f_string_quote);
+ for (int i = 0; i < current_tok->quote_size; i++) {
+ tok_backup(tok, current_tok->quote);
}
p_start = tok->start;
p_end = tok->cur;
- return MAKE_TOKEN(FSTRING_MIDDLE);
+ return MAKE_TOKEN(FTSTRING_MIDDLE(current_tok));
}
static int
diff --git a/Parser/lexer/lexer.h b/Parser/lexer/lexer.h
index 7f21bf56bba..1d97ac57b74 100644
--- a/Parser/lexer/lexer.h
+++ b/Parser/lexer/lexer.h
@@ -3,7 +3,7 @@
#include "state.h"
-int _PyLexer_update_fstring_expr(struct tok_state *tok, char cur);
+int _PyLexer_update_ftstring_expr(struct tok_state *tok, char cur);
int _PyTokenizer_Get(struct tok_state *, struct token *);
diff --git a/Parser/lexer/state.c b/Parser/lexer/state.c
index 1665debea30..2de9004fe08 100644
--- a/Parser/lexer/state.c
+++ b/Parser/lexer/state.c
@@ -54,7 +54,7 @@ _PyTokenizer_tok_new(void)
tok->tok_extra_tokens = 0;
tok->comment_newline = 0;
tok->implicit_newline = 0;
- tok->tok_mode_stack[0] = (tokenizer_mode){.kind =TOK_REGULAR_MODE, .f_string_quote='\0', .f_string_quote_size = 0, .f_string_debug=0};
+ tok->tok_mode_stack[0] = (tokenizer_mode){.kind =TOK_REGULAR_MODE, .quote='\0', .quote_size = 0, .in_debug=0};
tok->tok_mode_stack_index = 0;
#ifdef Py_DEBUG
tok->debug = _Py_GetConfig()->parser_debug;
diff --git a/Parser/lexer/state.h b/Parser/lexer/state.h
index 9ed3babfdbf..5e8cac7249b 100644
--- a/Parser/lexer/state.h
+++ b/Parser/lexer/state.h
@@ -36,6 +36,11 @@ enum tokenizer_mode_kind_t {
TOK_FSTRING_MODE,
};
+enum string_kind_t {
+ FSTRING,
+ TSTRING,
+};
+
#define MAX_EXPR_NESTING 3
typedef struct _tokenizer_mode {
@@ -44,21 +49,23 @@ typedef struct _tokenizer_mode {
int curly_bracket_depth;
int curly_bracket_expr_start_depth;
- char f_string_quote;
- int f_string_quote_size;
- int f_string_raw;
- const char* f_string_start;
- const char* f_string_multi_line_start;
- int f_string_line_start;
+ char quote;
+ int quote_size;
+ int raw;
+ const char* start;
+ const char* multi_line_start;
+ int first_line;
- Py_ssize_t f_string_start_offset;
- Py_ssize_t f_string_multi_line_start_offset;
+ Py_ssize_t start_offset;
+ Py_ssize_t multi_line_start_offset;
Py_ssize_t last_expr_size;
Py_ssize_t last_expr_end;
char* last_expr_buffer;
- int f_string_debug;
+ int in_debug;
int in_format_spec;
+
+ enum string_kind_t string_kind;
} tokenizer_mode;
/* Tokenizer state */