5 files changed, 163 insertions, 77 deletions
diff --git a/Parser/lexer/buffer.c b/Parser/lexer/buffer.c
index f6502bf8f7f..63aa1ea2ad4 100644
--- a/Parser/lexer/buffer.c
+++ b/Parser/lexer/buffer.c
@@ -13,8 +13,8 @@ _PyLexer_remember_fstring_buffers(struct tok_state *tok)
 
     for (index = tok->tok_mode_stack_index; index >= 0; --index) {
         mode = &(tok->tok_mode_stack[index]);
-        mode->f_string_start_offset = mode->f_string_start - tok->buf;
-        mode->f_string_multi_line_start_offset = mode->f_string_multi_line_start - tok->buf;
+        mode->start_offset = mode->start - tok->buf;
+        mode->multi_line_start_offset = mode->multi_line_start - tok->buf;
     }
 }
 
@@ -27,8 +27,8 @@ _PyLexer_restore_fstring_buffers(struct tok_state *tok)
 
     for (index = tok->tok_mode_stack_index; index >= 0; --index) {
         mode = &(tok->tok_mode_stack[index]);
-        mode->f_string_start = tok->buf + mode->f_string_start_offset;
-        mode->f_string_multi_line_start = tok->buf + mode->f_string_multi_line_start_offset;
+        mode->start = tok->buf + mode->start_offset;
+        mode->multi_line_start = tok->buf + mode->multi_line_start_offset;
     }
 }
 
diff --git a/Parser/lexer/lexer.c b/Parser/lexer/lexer.c
index e81867a5fad..0a078dd5941 100644
--- a/Parser/lexer/lexer.c
+++ b/Parser/lexer/lexer.c
@@ -38,6 +38,9 @@ static inline tokenizer_mode* TOK_NEXT_MODE(struct tok_state* tok) {
 #define TOK_NEXT_MODE(tok) (&(tok->tok_mode_stack[++tok->tok_mode_stack_index]))
 #endif
 
+#define FTSTRING_MIDDLE(tok_mode) (tok_mode->string_kind == TSTRING ? TSTRING_MIDDLE : FSTRING_MIDDLE)
+#define FTSTRING_END(tok_mode) (tok_mode->string_kind == TSTRING ? TSTRING_END : FSTRING_END)
+#define TOK_GET_STRING_PREFIX(tok) (TOK_GET_MODE(tok)->string_kind == TSTRING ? 't' : 'f')
 #define MAKE_TOKEN(token_type) _PyLexer_token_setup(tok, token, token_type, p_start, p_end)
 #define MAKE_TYPE_COMMENT_TOKEN(token_type, col_offset, end_col_offset) (\
                 _PyLexer_type_comment_token_setup(tok, token, token_type, col_offset, end_col_offset, p_start, p_end))
@@ -108,12 +111,12 @@ tok_backup(struct tok_state *tok, int c)
 }
 
 static int
-set_fstring_expr(struct tok_state* tok, struct token *token, char c) {
+set_ftstring_expr(struct tok_state* tok, struct token *token, char c) {
     assert(token != NULL);
     assert(c == '}' || c == ':' || c == '!');
     tokenizer_mode *tok_mode = TOK_GET_MODE(tok);
 
-    if (!tok_mode->f_string_debug || token->metadata) {
+    if (!(tok_mode->in_debug || tok_mode->string_kind == TSTRING) || token->metadata) {
         return 0;
     }
     PyObject *res = NULL;
@@ -173,7 +176,7 @@ set_fstring_expr(struct tok_state* tok, struct token *token, char c) {
 }
 
 int
-_PyLexer_update_fstring_expr(struct tok_state *tok, char cur)
+_PyLexer_update_ftstring_expr(struct tok_state *tok, char cur)
 {
     assert(tok->cur != NULL);
 
@@ -402,6 +405,51 @@ tok_continuation_line(struct tok_state *tok) {
 }
 
 static int
+maybe_raise_syntax_error_for_string_prefixes(struct tok_state *tok,
+                                             int saw_b, int saw_r, int saw_u,
+                                             int saw_f, int saw_t) {
+    // Supported: rb, rf, rt (in any order)
+    // Unsupported: ub, ur, uf, ut, bf, bt, ft (in any order)
+
+#define RETURN_SYNTAX_ERROR(PREFIX1, PREFIX2)                             \
+    do {                                                                  \
+        (void)_PyTokenizer_syntaxerror_known_range(                       \
+            tok, (int)(tok->start + 1 - tok->line_start),                 \
+            (int)(tok->cur - tok->line_start),                            \
+            "'" PREFIX1 "' and '" PREFIX2 "' prefixes are incompatible"); \
+        return -1;                                                        \
+    } while (0)
+
+    if (saw_u && saw_b) {
+        RETURN_SYNTAX_ERROR("u", "b");
+    }
+    if (saw_u && saw_r) {
+        RETURN_SYNTAX_ERROR("u", "r");
+    }
+    if (saw_u && saw_f) {
+        RETURN_SYNTAX_ERROR("u", "f");
+    }
+    if (saw_u && saw_t) {
+        RETURN_SYNTAX_ERROR("u", "t");
+    }
+
+    if (saw_b && saw_f) {
+        RETURN_SYNTAX_ERROR("b", "f");
+    }
+    if (saw_b && saw_t) {
+        RETURN_SYNTAX_ERROR("b", "t");
+    }
+
+    if (saw_f && saw_t) {
+        RETURN_SYNTAX_ERROR("f", "t");
+    }
+
+#undef RETURN_SYNTAX_ERROR
+
+    return 0;
+}
+
+static int
 tok_get_normal_mode(struct tok_state *tok, tokenizer_mode* current_tok, struct token *token)
 {
     int c;
@@ -643,29 +691,40 @@ tok_get_normal_mode(struct tok_state *tok, tokenizer_mode* current_tok, struct t
     nonascii = 0;
     if (is_potential_identifier_start(c)) {
         /* Process the various legal combinations of b"", r"", u"", and f"". */
-        int saw_b = 0, saw_r = 0, saw_u = 0, saw_f = 0;
+        int saw_b = 0, saw_r = 0, saw_u = 0, saw_f = 0, saw_t = 0;
         while (1) {
-            if (!(saw_b || saw_u || saw_f) && (c == 'b' || c == 'B'))
+            if (!saw_b && (c == 'b' || c == 'B')) {
                 saw_b = 1;
+            }
             /* Since this is a backwards compatibility support literal we don't
                want to support it in arbitrary order like byte literals. */
-            else if (!(saw_b || saw_u || saw_r || saw_f)
-                     && (c == 'u'|| c == 'U')) {
+            else if (!saw_u && (c == 'u'|| c == 'U')) {
                 saw_u = 1;
             }
             /* ur"" and ru"" are not supported */
-            else if (!(saw_r || saw_u) && (c == 'r' || c == 'R')) {
+            else if (!saw_r && (c == 'r' || c == 'R')) {
                 saw_r = 1;
             }
-            else if (!(saw_f || saw_b || saw_u) && (c == 'f' || c == 'F')) {
+            else if (!saw_f && (c == 'f' || c == 'F')) {
                 saw_f = 1;
             }
+            else if (!saw_t && (c == 't' || c == 'T')) {
+                saw_t = 1;
+            }
             else {
                 break;
             }
             c = tok_nextc(tok);
             if (c == '"' || c == '\'') {
-                if (saw_f) {
+                // Raise error on incompatible string prefixes:
+                int status = maybe_raise_syntax_error_for_string_prefixes(
+                    tok, saw_b, saw_r, saw_u, saw_f, saw_t);
+                if (status < 0) {
+                    return MAKE_TOKEN(ERRORTOKEN);
+                }
+
+                // Handle valid f or t string creation:
+                if (saw_f || saw_t) {
                     goto f_string_quote;
                 }
                 goto letter_quote;
@@ -939,7 +998,9 @@ tok_get_normal_mode(struct tok_state *tok, tokenizer_mode* current_tok, struct t
     }
 
   f_string_quote:
-    if (((Py_TOLOWER(*tok->start) == 'f' || Py_TOLOWER(*tok->start) == 'r') && (c == '\'' || c == '"'))) {
+    if (((Py_TOLOWER(*tok->start) == 'f' || Py_TOLOWER(*tok->start) == 'r' || Py_TOLOWER(*tok->start) == 't')
+        && (c == '\'' || c == '"'))) {
+
         int quote = c;
         int quote_size = 1;             /* 1 or 3 */
 
@@ -971,39 +1032,49 @@ tok_get_normal_mode(struct tok_state *tok, tokenizer_mode* current_tok, struct t
         p_start = tok->start;
         p_end = tok->cur;
         if (tok->tok_mode_stack_index + 1 >= MAXFSTRINGLEVEL) {
-            return MAKE_TOKEN(_PyTokenizer_syntaxerror(tok, "too many nested f-strings"));
+            return MAKE_TOKEN(_PyTokenizer_syntaxerror(tok, "too many nested f-strings or t-strings"));
         }
         tokenizer_mode *the_current_tok = TOK_NEXT_MODE(tok);
         the_current_tok->kind = TOK_FSTRING_MODE;
-        the_current_tok->f_string_quote = quote;
-        the_current_tok->f_string_quote_size = quote_size;
-        the_current_tok->f_string_start = tok->start;
-        the_current_tok->f_string_multi_line_start = tok->line_start;
-        the_current_tok->f_string_line_start = tok->lineno;
-        the_current_tok->f_string_start_offset = -1;
-        the_current_tok->f_string_multi_line_start_offset = -1;
+        the_current_tok->quote = quote;
+        the_current_tok->quote_size = quote_size;
+        the_current_tok->start = tok->start;
+        the_current_tok->multi_line_start = tok->line_start;
+        the_current_tok->first_line = tok->lineno;
+        the_current_tok->start_offset = -1;
+        the_current_tok->multi_line_start_offset = -1;
         the_current_tok->last_expr_buffer = NULL;
         the_current_tok->last_expr_size = 0;
         the_current_tok->last_expr_end = -1;
         the_current_tok->in_format_spec = 0;
-        the_current_tok->f_string_debug = 0;
+        the_current_tok->in_debug = 0;
 
+        enum string_kind_t string_kind = FSTRING;
         switch (*tok->start) {
+            case 'T':
+            case 't':
+                the_current_tok->raw = Py_TOLOWER(*(tok->start + 1)) == 'r';
+                string_kind = TSTRING;
+                break;
             case 'F':
             case 'f':
-                the_current_tok->f_string_raw = Py_TOLOWER(*(tok->start + 1)) == 'r';
+                the_current_tok->raw = Py_TOLOWER(*(tok->start + 1)) == 'r';
                 break;
             case 'R':
             case 'r':
-                the_current_tok->f_string_raw = 1;
+                the_current_tok->raw = 1;
+                if (Py_TOLOWER(*(tok->start + 1)) == 't') {
+                    string_kind = TSTRING;
+                }
                 break;
             default:
                 Py_UNREACHABLE();
         }
 
+        the_current_tok->string_kind = string_kind;
         the_current_tok->curly_bracket_depth = 0;
         the_current_tok->curly_bracket_expr_start_depth = -1;
-        return MAKE_TOKEN(FSTRING_START);
+        return string_kind == TSTRING ? MAKE_TOKEN(TSTRING_START) : MAKE_TOKEN(FSTRING_START);
     }
 
   letter_quote:
@@ -1063,9 +1134,10 @@ tok_get_normal_mode(struct tok_state *tok, tokenizer_mode* current_tok, struct t
                      * and if it is, then this must be a missing '}' token
                      * so raise the proper error */
                     tokenizer_mode *the_current_tok = TOK_GET_MODE(tok);
-                    if (the_current_tok->f_string_quote == quote &&
-                        the_current_tok->f_string_quote_size == quote_size) {
-                        return MAKE_TOKEN(_PyTokenizer_syntaxerror(tok, "f-string: expecting '}'", start));
+                    if (the_current_tok->quote == quote &&
+                        the_current_tok->quote_size == quote_size) {
+                        return MAKE_TOKEN(_PyTokenizer_syntaxerror(tok,
+                            "%c-string: expecting '}'", TOK_GET_STRING_PREFIX(tok)));
                     }
                 }
 
@@ -1136,12 +1208,12 @@ tok_get_normal_mode(struct tok_state *tok, tokenizer_mode* current_tok, struct t
         int cursor = current_tok->curly_bracket_depth - (c != '{');
         int in_format_spec = current_tok->in_format_spec;
          int cursor_in_format_with_debug =
-             cursor == 1 && (current_tok->f_string_debug || in_format_spec);
+             cursor == 1 && (current_tok->in_debug || in_format_spec);
          int cursor_valid = cursor == 0 || cursor_in_format_with_debug;
-        if ((cursor_valid) && !_PyLexer_update_fstring_expr(tok, c)) {
+        if ((cursor_valid) && !_PyLexer_update_ftstring_expr(tok, c)) {
             return MAKE_TOKEN(ENDMARKER);
         }
-        if ((cursor_valid) && c != '{' && set_fstring_expr(tok, token, c)) {
+        if ((cursor_valid) && c != '{' && set_ftstring_expr(tok, token, c)) {
             return MAKE_TOKEN(ERRORTOKEN);
         }
 
@@ -1194,7 +1266,8 @@ tok_get_normal_mode(struct tok_state *tok, tokenizer_mode* current_tok, struct t
     case ']':
     case '}':
         if (INSIDE_FSTRING(tok) && !current_tok->curly_bracket_depth && c == '}') {
-            return MAKE_TOKEN(_PyTokenizer_syntaxerror(tok, "f-string: single '}' is not allowed"));
+            return MAKE_TOKEN(_PyTokenizer_syntaxerror(tok,
+                "%c-string: single '}' is not allowed", TOK_GET_STRING_PREFIX(tok)));
         }
         if (!tok->tok_extra_tokens && !tok->level) {
             return MAKE_TOKEN(_PyTokenizer_syntaxerror(tok, "unmatched '%c'", c));
@@ -1214,7 +1287,8 @@ tok_get_normal_mode(struct tok_state *tok, tokenizer_mode* current_tok, struct t
                     assert(current_tok->curly_bracket_depth >= 0);
                     int previous_bracket = current_tok->curly_bracket_depth - 1;
                     if (previous_bracket == current_tok->curly_bracket_expr_start_depth) {
-                        return MAKE_TOKEN(_PyTokenizer_syntaxerror(tok, "f-string: unmatched '%c'", c));
+                        return MAKE_TOKEN(_PyTokenizer_syntaxerror(tok,
+                            "%c-string: unmatched '%c'", TOK_GET_STRING_PREFIX(tok), c));
                     }
                 }
                 if (tok->parenlinenostack[tok->level] != tok->lineno) {
@@ -1235,13 +1309,14 @@ tok_get_normal_mode(struct tok_state *tok, tokenizer_mode* current_tok, struct t
         if (INSIDE_FSTRING(tok)) {
             current_tok->curly_bracket_depth--;
             if (current_tok->curly_bracket_depth < 0) {
-                return MAKE_TOKEN(_PyTokenizer_syntaxerror(tok, "f-string: unmatched '%c'", c));
+                return MAKE_TOKEN(_PyTokenizer_syntaxerror(tok, "%c-string: unmatched '%c'",
+                    TOK_GET_STRING_PREFIX(tok), c));
             }
             if (c == '}' && current_tok->curly_bracket_depth == current_tok->curly_bracket_expr_start_depth) {
                 current_tok->curly_bracket_expr_start_depth--;
                 current_tok->kind = TOK_FSTRING_MODE;
                 current_tok->in_format_spec = 0;
-                current_tok->f_string_debug = 0;
+                current_tok->in_debug = 0;
             }
         }
         break;
@@ -1254,7 +1329,7 @@ tok_get_normal_mode(struct tok_state *tok, tokenizer_mode* current_tok, struct t
     }
 
     if( c == '=' && INSIDE_FSTRING_EXPR(current_tok)) {
-        current_tok->f_string_debug = 1;
+        current_tok->in_debug = 1;
     }
 
     /* Punctuation character */
@@ -1285,7 +1360,8 @@ tok_get_fstring_mode(struct tok_state *tok, tokenizer_mode* current_tok, struct
         if (peek1 != '{') {
             current_tok->curly_bracket_expr_start_depth++;
             if (current_tok->curly_bracket_expr_start_depth >= MAX_EXPR_NESTING) {
-                return MAKE_TOKEN(_PyTokenizer_syntaxerror(tok, "f-string: expressions nested too deeply"));
+                return MAKE_TOKEN(_PyTokenizer_syntaxerror(tok,
+                    "%c-string: expressions nested too deeply", TOK_GET_STRING_PREFIX(tok)));
             }
             TOK_GET_MODE(tok)->kind = TOK_REGULAR_MODE;
             return tok_get_normal_mode(tok, current_tok, token);
@@ -1296,9 +1372,9 @@ tok_get_fstring_mode(struct tok_state *tok, tokenizer_mode* current_tok, struct
     }
 
     // Check if we are at the end of the string
-    for (int i = 0; i < current_tok->f_string_quote_size; i++) {
+    for (int i = 0; i < current_tok->quote_size; i++) {
         int quote = tok_nextc(tok);
-        if (quote != current_tok->f_string_quote) {
+        if (quote != current_tok->quote) {
             tok_backup(tok, quote);
             goto f_string_middle;
         }
@@ -1314,14 +1390,14 @@ tok_get_fstring_mode(struct tok_state *tok, tokenizer_mode* current_tok, struct
     p_start = tok->start;
     p_end = tok->cur;
     tok->tok_mode_stack_index--;
-    return MAKE_TOKEN(FSTRING_END);
+    return MAKE_TOKEN(FTSTRING_END(current_tok));
 
 f_string_middle:
 
     // TODO: This is a bit of a hack, but it works for now. We need to find a better way to handle
     // this.
     tok->multi_line_start = tok->line_start;
-    while (end_quote_size != current_tok->f_string_quote_size) {
+    while (end_quote_size != current_tok->quote_size) {
         int c = tok_nextc(tok);
         if (tok->done == E_ERROR || tok->done == E_DECODE) {
             return MAKE_TOKEN(ERRORTOKEN);
@@ -1332,7 +1408,7 @@ f_string_middle:
                 INSIDE_FSTRING_EXPR(current_tok)
         );
 
-       if (c == EOF || (current_tok->f_string_quote_size == 1 && c == '\n')) {
+       if (c == EOF || (current_tok->quote_size == 1 && c == '\n')) {
             if (tok->decoding_erred) {
                 return MAKE_TOKEN(ERRORTOKEN);
             }
@@ -1341,11 +1417,12 @@ f_string_middle:
             // it means that the format spec ends here and we should
             // return to the regular mode.
             if (in_format_spec && c == '\n') {
-                if (current_tok->f_string_quote_size == 1) {
+                if (current_tok->quote_size == 1) {
                     return MAKE_TOKEN(
                         _PyTokenizer_syntaxerror(
                             tok,
-                            "f-string: newlines are not allowed in format specifiers for single quoted f-strings"
+                            "%c-string: newlines are not allowed in format specifiers for single quoted %c-strings",
+                            TOK_GET_STRING_PREFIX(tok), TOK_GET_STRING_PREFIX(tok)
                         )
                     );
                 }
@@ -1354,25 +1431,26 @@ f_string_middle:
                 current_tok->in_format_spec = 0;
                 p_start = tok->start;
                 p_end = tok->cur;
-                return MAKE_TOKEN(FSTRING_MIDDLE);
+                return MAKE_TOKEN(FTSTRING_MIDDLE(current_tok));
             }
 
             assert(tok->multi_line_start != NULL);
             // shift the tok_state's location into
             // the start of string, and report the error
             // from the initial quote character
-            tok->cur = (char *)current_tok->f_string_start;
+            tok->cur = (char *)current_tok->start;
             tok->cur++;
-            tok->line_start = current_tok->f_string_multi_line_start;
+            tok->line_start = current_tok->multi_line_start;
             int start = tok->lineno;
 
             tokenizer_mode *the_current_tok = TOK_GET_MODE(tok);
-            tok->lineno = the_current_tok->f_string_line_start;
+            tok->lineno = the_current_tok->first_line;
 
-            if (current_tok->f_string_quote_size == 3) {
+            if (current_tok->quote_size == 3) {
                 _PyTokenizer_syntaxerror(tok,
-                                    "unterminated triple-quoted f-string literal"
-                                    " (detected at line %d)", start);
+                                    "unterminated triple-quoted %c-string literal"
+                                    " (detected at line %d)",
+                                    TOK_GET_STRING_PREFIX(tok), start);
                 if (c != '\n') {
                     tok->done = E_EOFS;
                 }
@@ -1380,12 +1458,12 @@ f_string_middle:
             }
             else {
                 return MAKE_TOKEN(_PyTokenizer_syntaxerror(tok,
-                                    "unterminated f-string literal (detected at"
-                                    " line %d)", start));
+                                    "unterminated %c-string literal (detected at"
+                                    " line %d)", TOK_GET_STRING_PREFIX(tok), start));
             }
         }
 
-        if (c == current_tok->f_string_quote) {
+        if (c == current_tok->quote) {
             end_quote_size += 1;
             continue;
         } else {
@@ -1393,7 +1471,7 @@ f_string_middle:
         }
 
         if (c == '{') {
-            if (!_PyLexer_update_fstring_expr(tok, c)) {
+            if (!_PyLexer_update_ftstring_expr(tok, c)) {
                 return MAKE_TOKEN(ENDMARKER);
             }
             int peek = tok_nextc(tok);
@@ -1402,7 +1480,8 @@ f_string_middle:
                 tok_backup(tok, c);
                 current_tok->curly_bracket_expr_start_depth++;
                 if (current_tok->curly_bracket_expr_start_depth >= MAX_EXPR_NESTING) {
-                    return MAKE_TOKEN(_PyTokenizer_syntaxerror(tok, "f-string: expressions nested too deeply"));
+                    return MAKE_TOKEN(_PyTokenizer_syntaxerror(tok,
+                        "%c-string: expressions nested too deeply", TOK_GET_STRING_PREFIX(tok)));
                 }
                 TOK_GET_MODE(tok)->kind = TOK_REGULAR_MODE;
                 current_tok->in_format_spec = 0;
@@ -1412,12 +1491,12 @@ f_string_middle:
                 p_start = tok->start;
                 p_end = tok->cur - 1;
             }
-            return MAKE_TOKEN(FSTRING_MIDDLE);
+            return MAKE_TOKEN(FTSTRING_MIDDLE(current_tok));
         } else if (c == '}') {
             if (unicode_escape) {
                 p_start = tok->start;
                 p_end = tok->cur;
-                return MAKE_TOKEN(FSTRING_MIDDLE);
+                return MAKE_TOKEN(FTSTRING_MIDDLE(current_tok));
             }
             int peek = tok_nextc(tok);
 
@@ -1437,7 +1516,7 @@ f_string_middle:
                 p_start = tok->start;
                 p_end = tok->cur;
             }
-            return MAKE_TOKEN(FSTRING_MIDDLE);
+            return MAKE_TOKEN(FTSTRING_MIDDLE(current_tok));
         } else if (c == '\\') {
             int peek = tok_nextc(tok);
             if (peek == '\r') {
@@ -1447,7 +1526,7 @@ f_string_middle:
             // brace. We have to restore and return the control back
             // to the loop for the next iteration.
             if (peek == '{' || peek == '}') {
-                if (!current_tok->f_string_raw) {
+                if (!current_tok->raw) {
                     if (_PyTokenizer_warn_invalid_escape_sequence(tok, peek)) {
                         return MAKE_TOKEN(ERRORTOKEN);
                     }
@@ -1456,7 +1535,7 @@ f_string_middle:
                 continue;
             }
 
-            if (!current_tok->f_string_raw) {
+            if (!current_tok->raw) {
                 if (peek == 'N') {
                     /* Handle named unicode escapes (\N{BULLET}) */
                     peek = tok_nextc(tok);
@@ -1474,12 +1553,12 @@ f_string_middle:
 
     // Backup the f-string quotes to emit a final FSTRING_MIDDLE and
     // add the quotes to the FSTRING_END in the next tokenizer iteration.
-    for (int i = 0; i < current_tok->f_string_quote_size; i++) {
-        tok_backup(tok, current_tok->f_string_quote);
+    for (int i = 0; i < current_tok->quote_size; i++) {
+        tok_backup(tok, current_tok->quote);
     }
     p_start = tok->start;
     p_end = tok->cur;
-    return MAKE_TOKEN(FSTRING_MIDDLE);
+    return MAKE_TOKEN(FTSTRING_MIDDLE(current_tok));
 }
 
 static int
diff --git a/Parser/lexer/lexer.h b/Parser/lexer/lexer.h
index 7f21bf56bba..1d97ac57b74 100644
--- a/Parser/lexer/lexer.h
+++ b/Parser/lexer/lexer.h
@@ -3,7 +3,7 @@
 
 #include "state.h"
 
-int _PyLexer_update_fstring_expr(struct tok_state *tok, char cur);
+int _PyLexer_update_ftstring_expr(struct tok_state *tok, char cur);
 
 int _PyTokenizer_Get(struct tok_state *, struct token *);
 
diff --git a/Parser/lexer/state.c b/Parser/lexer/state.c
index 1665debea30..2de9004fe08 100644
--- a/Parser/lexer/state.c
+++ b/Parser/lexer/state.c
@@ -54,7 +54,7 @@ _PyTokenizer_tok_new(void)
     tok->tok_extra_tokens = 0;
     tok->comment_newline = 0;
     tok->implicit_newline = 0;
-    tok->tok_mode_stack[0] = (tokenizer_mode){.kind =TOK_REGULAR_MODE, .f_string_quote='\0', .f_string_quote_size = 0, .f_string_debug=0};
+    tok->tok_mode_stack[0] = (tokenizer_mode){.kind =TOK_REGULAR_MODE, .quote='\0', .quote_size = 0, .in_debug=0};
     tok->tok_mode_stack_index = 0;
 #ifdef Py_DEBUG
     tok->debug = _Py_GetConfig()->parser_debug;
diff --git a/Parser/lexer/state.h b/Parser/lexer/state.h
index 9ed3babfdbf..5e8cac7249b 100644
--- a/Parser/lexer/state.h
+++ b/Parser/lexer/state.h
@@ -36,6 +36,11 @@ enum tokenizer_mode_kind_t {
     TOK_FSTRING_MODE,
 };
 
+enum string_kind_t {
+    FSTRING,
+    TSTRING,
+};
+
 #define MAX_EXPR_NESTING 3
 
 typedef struct _tokenizer_mode {
@@ -44,21 +49,23 @@ typedef struct _tokenizer_mode {
     int curly_bracket_depth;
     int curly_bracket_expr_start_depth;
 
-    char f_string_quote;
-    int f_string_quote_size;
-    int f_string_raw;
-    const char* f_string_start;
-    const char* f_string_multi_line_start;
-    int f_string_line_start;
+    char quote;
+    int quote_size;
+    int raw;
+    const char* start;
+    const char* multi_line_start;
+    int first_line;
 
-    Py_ssize_t f_string_start_offset;
-    Py_ssize_t f_string_multi_line_start_offset;
+    Py_ssize_t start_offset;
+    Py_ssize_t multi_line_start_offset;
 
     Py_ssize_t last_expr_size;
     Py_ssize_t last_expr_end;
     char* last_expr_buffer;
-    int f_string_debug;
+    int in_debug;
     int in_format_spec;
+
+    enum string_kind_t string_kind;
 } tokenizer_mode;
 
 /* Tokenizer state */