From 8638838c04735283a6f8e90792dc7b38cf177da2 Mon Sep 17 00:00:00 2001
From: Dhruv Manilawala <dhruvmanila@gmail.com>
Date: Tue, 3 Oct 2023 22:40:11 +0530
Subject: [PATCH 1/5] Allow multi-line f-string with format spec

---
 crates/ruff_python_parser/src/lexer.rs        |  32 +++-
 ...s__fstring_with_multiline_format_spec.snap | 175 ++++++++++++++++++
 2 files changed, 203 insertions(+), 4 deletions(-)
 create mode 100644 crates/ruff_python_parser/src/snapshots/ruff_python_parser__lexer__tests__fstring_with_multiline_format_spec.snap

diff --git a/crates/ruff_python_parser/src/lexer.rs b/crates/ruff_python_parser/src/lexer.rs
index 3ba20e78e88e8..463e92a208ca8 100644
--- a/crates/ruff_python_parser/src/lexer.rs
+++ b/crates/ruff_python_parser/src/lexer.rs
@@ -566,6 +566,9 @@ impl<'source> Lexer<'source> {
         // Tracks the last offset of token value that has been written to `normalized`.
         let mut last_offset = self.offset();
 
+        // This isn't going to change for the duration of the loop.
+        let in_format_spec = fstring.is_in_format_spec(self.nesting);
+
         let mut in_named_unicode = false;
 
         loop {
@@ -585,6 +588,13 @@ impl<'source> Lexer<'source> {
                     });
                 }
                 '\n' | '\r' if !fstring.is_triple_quoted() => {
+                    // If we encounter a newline while we're in a format spec, then
+                    // we stop here and let the lexer emit the newline token.
+                    //
+                    // Relevant discussion: https://github.com/python/cpython/issues/110259
+                    if in_format_spec {
+                        break;
+                    }
                     return Err(LexicalError {
                         error: LexicalErrorType::FStringError(FStringErrorType::UnterminatedString),
                         location: self.offset(),
@@ -620,7 +630,7 @@ impl<'source> Lexer<'source> {
                     }
                 }
                 '{' => {
-                    if self.cursor.second() == '{' && !fstring.is_in_format_spec(self.nesting) {
+                    if self.cursor.second() == '{' && !in_format_spec {
                         self.cursor.bump();
                         normalized
                             .push_str(&self.source[TextRange::new(last_offset, self.offset())]);
@@ -634,9 +644,7 @@ impl<'source> Lexer<'source> {
                     if in_named_unicode {
                         in_named_unicode = false;
                         self.cursor.bump();
-                    } else if self.cursor.second() == '}'
-                        && !fstring.is_in_format_spec(self.nesting)
-                    {
+                    } else if self.cursor.second() == '}' && !in_format_spec {
                         self.cursor.bump();
                         normalized
                             .push_str(&self.source[TextRange::new(last_offset, self.offset())]);
@@ -2051,6 +2059,22 @@ def f(arg=%timeit a = b):
         assert_debug_snapshot!(lex_source(source));
     }
 
+    #[test]
+    fn test_fstring_with_multiline_format_spec() {
+        let source = r"f'''__{
+    x:d
+}__'''
+f'''__{
+    x:a
+        b
+          c
+}__'''
+f'__{
+    x:d
+}__'";
+        assert_debug_snapshot!(lex_source(source));
+    }
+
     #[test]
     fn test_fstring_conversion() {
         let source = r#"f"{x!s} {x=!r} {x:.3f!r} {{x!r}}""#;
diff --git a/crates/ruff_python_parser/src/snapshots/ruff_python_parser__lexer__tests__fstring_with_multiline_format_spec.snap b/crates/ruff_python_parser/src/snapshots/ruff_python_parser__lexer__tests__fstring_with_multiline_format_spec.snap
new file mode 100644
index 0000000000000..c88d685d40a68
--- /dev/null
+++ b/crates/ruff_python_parser/src/snapshots/ruff_python_parser__lexer__tests__fstring_with_multiline_format_spec.snap
@@ -0,0 +1,175 @@
+---
+source: crates/ruff_python_parser/src/lexer.rs
+expression: lex_source(source)
+---
+[
+    (
+        FStringStart,
+        0..4,
+    ),
+    (
+        FStringMiddle {
+            value: "__",
+            is_raw: false,
+        },
+        4..6,
+    ),
+    (
+        Lbrace,
+        6..7,
+    ),
+    (
+        NonLogicalNewline,
+        7..8,
+    ),
+    (
+        Name {
+            name: "x",
+        },
+        12..13,
+    ),
+    (
+        Colon,
+        13..14,
+    ),
+    (
+        FStringMiddle {
+            value: "d\n",
+            is_raw: false,
+        },
+        14..16,
+    ),
+    (
+        Rbrace,
+        16..17,
+    ),
+    (
+        FStringMiddle {
+            value: "__",
+            is_raw: false,
+        },
+        17..19,
+    ),
+    (
+        FStringEnd,
+        19..22,
+    ),
+    (
+        Newline,
+        22..23,
+    ),
+    (
+        FStringStart,
+        23..27,
+    ),
+    (
+        FStringMiddle {
+            value: "__",
+            is_raw: false,
+        },
+        27..29,
+    ),
+    (
+        Lbrace,
+        29..30,
+    ),
+    (
+        NonLogicalNewline,
+        30..31,
+    ),
+    (
+        Name {
+            name: "x",
+        },
+        35..36,
+    ),
+    (
+        Colon,
+        36..37,
+    ),
+    (
+        FStringMiddle {
+            value: "a\n        b\n          c\n",
+            is_raw: false,
+        },
+        37..61,
+    ),
+    (
+        Rbrace,
+        61..62,
+    ),
+    (
+        FStringMiddle {
+            value: "__",
+            is_raw: false,
+        },
+        62..64,
+    ),
+    (
+        FStringEnd,
+        64..67,
+    ),
+    (
+        Newline,
+        67..68,
+    ),
+    (
+        FStringStart,
+        68..70,
+    ),
+    (
+        FStringMiddle {
+            value: "__",
+            is_raw: false,
+        },
+        70..72,
+    ),
+    (
+        Lbrace,
+        72..73,
+    ),
+    (
+        NonLogicalNewline,
+        73..74,
+    ),
+    (
+        Name {
+            name: "x",
+        },
+        78..79,
+    ),
+    (
+        Colon,
+        79..80,
+    ),
+    (
+        FStringMiddle {
+            value: "d",
+            is_raw: false,
+        },
+        80..81,
+    ),
+    (
+        NonLogicalNewline,
+        81..82,
+    ),
+    (
+        Rbrace,
+        82..83,
+    ),
+    (
+        FStringMiddle {
+            value: "__",
+            is_raw: false,
+        },
+        83..85,
+    ),
+    (
+        FStringEnd,
+        85..86,
+    ),
+    (
+        Newline,
+        86..86,
+    ),
+]

From 0f6ee6292741253e127d8e2b1ac65b45e320857e Mon Sep 17 00:00:00 2001
From: Dhruv Manilawala <dhruvmanila@gmail.com>
Date: Thu, 5 Oct 2023 21:59:05 +0530
Subject: [PATCH 2/5] End format spec when newline in single-quoted format spec

---
 crates/ruff_python_parser/src/lexer.rs        | 15 +++-
 ...s__fstring_with_multiline_format_spec.snap | 71 ++++++++++++++++++-
 2 files changed, 84 insertions(+), 2 deletions(-)

diff --git a/crates/ruff_python_parser/src/lexer.rs b/crates/ruff_python_parser/src/lexer.rs
index 463e92a208ca8..448a3e7b34681 100644
--- a/crates/ruff_python_parser/src/lexer.rs
+++ b/crates/ruff_python_parser/src/lexer.rs
@@ -1202,6 +1202,9 @@ impl<'source> Lexer<'source> {
                         self.state = State::AfterNewline;
                         Tok::Newline
                     } else {
+                        if let Some(fstring) = self.fstrings.current_mut() {
+                            fstring.try_end_format_spec(self.nesting);
+                        }
                         Tok::NonLogicalNewline
                     },
                     self.token_range(),
@@ -1215,6 +1218,9 @@ impl<'source> Lexer<'source> {
                         self.state = State::AfterNewline;
                         Tok::Newline
                     } else {
+                        if let Some(fstring) = self.fstrings.current_mut() {
+                            fstring.try_end_format_spec(self.nesting);
+                        }
                         Tok::NonLogicalNewline
                     },
                     self.token_range(),
@@ -2061,6 +2067,8 @@ def f(arg=%timeit a = b):
 
     #[test]
     fn test_fstring_with_multiline_format_spec() {
+        // The last f-string is invalid syntactically but we should still lex it.
+        // Note that the `b` is a `Name` token and not a `FStringMiddle` token.
         let source = r"f'''__{
     x:d
 }__'''
@@ -2071,7 +2079,12 @@ f'''__{
 }__'''
 f'__{
     x:d
-}__'";
+}__'
+f'__{
+    x:a
+        b
+}__'
+";
         assert_debug_snapshot!(lex_source(source));
     }
 
diff --git a/crates/ruff_python_parser/src/snapshots/ruff_python_parser__lexer__tests__fstring_with_multiline_format_spec.snap b/crates/ruff_python_parser/src/snapshots/ruff_python_parser__lexer__tests__fstring_with_multiline_format_spec.snap
index c88d685d40a68..6cab3fb5a5bda 100644
--- a/crates/ruff_python_parser/src/snapshots/ruff_python_parser__lexer__tests__fstring_with_multiline_format_spec.snap
+++ b/crates/ruff_python_parser/src/snapshots/ruff_python_parser__lexer__tests__fstring_with_multiline_format_spec.snap
@@ -170,6 +170,75 @@ expression: lex_source(source)
     ),
     (
         Newline,
-        86..86,
+        86..87,
+    ),
+    (
+        FStringStart,
+        87..89,
+    ),
+    (
+        FStringMiddle {
+            value: "__",
+            is_raw: false,
+        },
+        89..91,
+    ),
+    (
+        Lbrace,
+        91..92,
+    ),
+    (
+        NonLogicalNewline,
+        92..93,
+    ),
+    (
+        Name {
+            name: "x",
+        },
+        97..98,
+    ),
+    (
+        Colon,
+        98..99,
+    ),
+    (
+        FStringMiddle {
+            value: "a",
+            is_raw: false,
+        },
+        99..100,
+    ),
+    (
+        NonLogicalNewline,
+        100..101,
+    ),
+    (
+        Name {
+            name: "b",
+        },
+        109..110,
+    ),
+    (
+        NonLogicalNewline,
+        110..111,
+    ),
+    (
+        Rbrace,
+        111..112,
+    ),
+    (
+        FStringMiddle {
+            value: "__",
+            is_raw: false,
+        },
+        112..114,
+    ),
+    (
+        FStringEnd,
+        114..115,
+    ),
+    (
+        Newline,
+        115..116,
     ),
 ]

From 637862cba58bc76112569b1963e5a1eaf92fd71b Mon Sep 17 00:00:00 2001
From: Dhruv Manilawala <dhruvmanila@gmail.com>
Date: Thu, 5 Oct 2023 22:28:07 +0530
Subject: [PATCH 3/5] Implement `State` solution

---
 crates/ruff_python_parser/src/lexer.rs | 23 +++++++++++++++--------
 1 file changed, 15 insertions(+), 8 deletions(-)

diff --git a/crates/ruff_python_parser/src/lexer.rs b/crates/ruff_python_parser/src/lexer.rs
index 448a3e7b34681..935dd3a059ffd 100644
--- a/crates/ruff_python_parser/src/lexer.rs
+++ b/crates/ruff_python_parser/src/lexer.rs
@@ -767,13 +767,17 @@ impl<'source> Lexer<'source> {
     // This is the main entry point. Call this function to retrieve the next token.
     // This function is used by the iterator implementation.
     pub fn next_token(&mut self) -> LexResult {
-        if let Some(fstring) = self.fstrings.current() {
-            if !fstring.is_in_expression(self.nesting) {
+        if let Some(fstring) = self.fstrings.current_mut() {
+            if self.state.is_after_non_logical_newline() && fstring.is_in_format_spec(self.nesting)
+            {
+                fstring.try_end_format_spec(self.nesting);
+            } else if !fstring.is_in_expression(self.nesting) {
                 match self.lex_fstring_middle_or_end() {
                     Ok(Some(tok)) => {
                         if tok == Tok::FStringEnd {
                             self.fstrings.pop();
                         }
+                        self.state = State::Other;
                         return Ok((tok, self.token_range()));
                     }
                     Err(e) => {
@@ -1202,9 +1206,7 @@ impl<'source> Lexer<'source> {
                         self.state = State::AfterNewline;
                         Tok::Newline
                     } else {
-                        if let Some(fstring) = self.fstrings.current_mut() {
-                            fstring.try_end_format_spec(self.nesting);
-                        }
+                        self.state = State::AfterNonLogicalNewline;
                         Tok::NonLogicalNewline
                     },
                     self.token_range(),
@@ -1218,9 +1220,7 @@ impl<'source> Lexer<'source> {
                         self.state = State::AfterNewline;
                         Tok::Newline
                     } else {
-                        if let Some(fstring) = self.fstrings.current_mut() {
-                            fstring.try_end_format_spec(self.nesting);
-                        }
+                        self.state = State::AfterNonLogicalNewline;
                         Tok::NonLogicalNewline
                     },
                     self.token_range(),
@@ -1400,6 +1400,9 @@ enum State {
     /// The lexer is at the start of a new logical line but **after** the indentation
     NonEmptyLogicalLine,
 
+    /// Lexer is right after a `NonLogicalNewline` token.
+    AfterNonLogicalNewline,
+
     /// Lexer is right after an equal token
     AfterEqual,
 
@@ -1416,6 +1419,10 @@ impl State {
         matches!(self, State::AfterNewline | State::NonEmptyLogicalLine)
     }
 
+    const fn is_after_non_logical_newline(self) -> bool {
+        matches!(self, State::AfterNonLogicalNewline)
+    }
+
     const fn is_after_equal(self) -> bool {
         matches!(self, State::AfterEqual)
     }

From 21975ac8dcae564a3a61741fcb4f5e0c3df9690b Mon Sep 17 00:00:00 2001
From: Dhruv Manilawala <dhruvmanila@gmail.com>
Date: Thu, 5 Oct 2023 22:37:02 +0530
Subject: [PATCH 4/5] Revert "Implement `State` solution"

This reverts commit 637862cba58bc76112569b1963e5a1eaf92fd71b.

Guess this doesn't work at all :)
---
 crates/ruff_python_parser/src/lexer.rs | 23 ++++++++---------------
 1 file changed, 8 insertions(+), 15 deletions(-)

diff --git a/crates/ruff_python_parser/src/lexer.rs b/crates/ruff_python_parser/src/lexer.rs
index 935dd3a059ffd..448a3e7b34681 100644
--- a/crates/ruff_python_parser/src/lexer.rs
+++ b/crates/ruff_python_parser/src/lexer.rs
@@ -767,17 +767,13 @@ impl<'source> Lexer<'source> {
     // This is the main entry point. Call this function to retrieve the next token.
     // This function is used by the iterator implementation.
     pub fn next_token(&mut self) -> LexResult {
-        if let Some(fstring) = self.fstrings.current_mut() {
-            if self.state.is_after_non_logical_newline() && fstring.is_in_format_spec(self.nesting)
-            {
-                fstring.try_end_format_spec(self.nesting);
-            } else if !fstring.is_in_expression(self.nesting) {
+        if let Some(fstring) = self.fstrings.current() {
+            if !fstring.is_in_expression(self.nesting) {
                 match self.lex_fstring_middle_or_end() {
                     Ok(Some(tok)) => {
                         if tok == Tok::FStringEnd {
                             self.fstrings.pop();
                         }
-                        self.state = State::Other;
                         return Ok((tok, self.token_range()));
                     }
                     Err(e) => {
@@ -1206,7 +1202,9 @@ impl<'source> Lexer<'source> {
                         self.state = State::AfterNewline;
                         Tok::Newline
                     } else {
-                        self.state = State::AfterNonLogicalNewline;
+                        if let Some(fstring) = self.fstrings.current_mut() {
+                            fstring.try_end_format_spec(self.nesting);
+                        }
                         Tok::NonLogicalNewline
                     },
                     self.token_range(),
@@ -1220,7 +1218,9 @@ impl<'source> Lexer<'source> {
                         self.state = State::AfterNewline;
                         Tok::Newline
                     } else {
-                        self.state = State::AfterNonLogicalNewline;
+                        if let Some(fstring) = self.fstrings.current_mut() {
+                            fstring.try_end_format_spec(self.nesting);
+                        }
                         Tok::NonLogicalNewline
                     },
                     self.token_range(),
@@ -1400,9 +1400,6 @@ enum State {
     /// The lexer is at the start of a new logical line but **after** the indentation
     NonEmptyLogicalLine,
 
-    /// Lexer is right after a `NonLogicalNewline` token.
-    AfterNonLogicalNewline,
-
     /// Lexer is right after an equal token
     AfterEqual,
 
@@ -1419,10 +1416,6 @@ impl State {
         matches!(self, State::AfterNewline | State::NonEmptyLogicalLine)
     }
 
-    const fn is_after_non_logical_newline(self) -> bool {
-        matches!(self, State::AfterNonLogicalNewline)
-    }
-
     const fn is_after_equal(self) -> bool {
         matches!(self, State::AfterEqual)
     }

From 9e6ae10e769e2041df7449b2ef2231148bc318af Mon Sep 17 00:00:00 2001
From: Dhruv Manilawala <dhruvmanila@gmail.com>
Date: Thu, 5 Oct 2023 22:42:16 +0530
Subject: [PATCH 5/5] Add parser test case

---
 crates/ruff_python_parser/src/parser.rs       |  5 ++
 ...ython_parser__parser__tests__fstrings.snap | 49 +++++++++++++++++++
 2 files changed, 54 insertions(+)

diff --git a/crates/ruff_python_parser/src/parser.rs b/crates/ruff_python_parser/src/parser.rs
index 5b7943d56889c..ae6d9a53a2f94 100644
--- a/crates/ruff_python_parser/src/parser.rs
+++ b/crates/ruff_python_parser/src/parser.rs
@@ -1290,6 +1290,11 @@ match foo:
 
 f"\{foo}\{bar:\}"
 f"\\{{foo\\}}"
+f"""{
+    foo:x
+        y
+        z
+}"""
 "#
             .trim(),
             "<test>",
diff --git a/crates/ruff_python_parser/src/snapshots/ruff_python_parser__parser__tests__fstrings.snap b/crates/ruff_python_parser/src/snapshots/ruff_python_parser__parser__tests__fstrings.snap
index c897a798b5d76..0188a187e7aed 100644
--- a/crates/ruff_python_parser/src/snapshots/ruff_python_parser__parser__tests__fstrings.snap
+++ b/crates/ruff_python_parser/src/snapshots/ruff_python_parser__parser__tests__fstrings.snap
@@ -845,4 +845,53 @@ expression: parse_ast
             ),
         },
     ),
+    Expr(
+        StmtExpr {
+            range: 304..344,
+            value: FString(
+                ExprFString {
+                    range: 304..344,
+                    values: [
+                        FormattedValue(
+                            ExprFormattedValue {
+                                range: 308..341,
+                                value: Name(
+                                    ExprName {
+                                        range: 314..317,
+                                        id: "foo",
+                                        ctx: Load,
+                                    },
+                                ),
+                                debug_text: None,
+                                conversion: None,
+                                format_spec: Some(
+                                    FString(
+                                        ExprFString {
+                                            range: 318..340,
+                                            values: [
+                                                Constant(
+                                                    ExprConstant {
+                                                        range: 318..340,
+                                                        value: Str(
+                                                            StringConstant {
+                                                                value: "x\n        y\n        z\n",
+                                                                unicode: false,
+                                                                implicit_concatenated: false,
+                                                            },
+                                                        ),
+                                                    },
+                                                ),
+                                            ],
+                                            implicit_concatenated: false,
+                                        },
+                                    ),
+                                ),
+                            },
+                        ),
+                    ],
+                    implicit_concatenated: false,
+                },
+            ),
+        },
+    ),
 ]