From 8638838c04735283a6f8e90792dc7b38cf177da2 Mon Sep 17 00:00:00 2001 From: Dhruv Manilawala Date: Tue, 3 Oct 2023 22:40:11 +0530 Subject: [PATCH 1/5] Allow multi-line f-string with format spec --- crates/ruff_python_parser/src/lexer.rs | 32 +++- ...s__fstring_with_multiline_format_spec.snap | 175 ++++++++++++++++++ 2 files changed, 203 insertions(+), 4 deletions(-) create mode 100644 crates/ruff_python_parser/src/snapshots/ruff_python_parser__lexer__tests__fstring_with_multiline_format_spec.snap diff --git a/crates/ruff_python_parser/src/lexer.rs b/crates/ruff_python_parser/src/lexer.rs index 3ba20e78e88e8..463e92a208ca8 100644 --- a/crates/ruff_python_parser/src/lexer.rs +++ b/crates/ruff_python_parser/src/lexer.rs @@ -566,6 +566,9 @@ impl<'source> Lexer<'source> { // Tracks the last offset of token value that has been written to `normalized`. let mut last_offset = self.offset(); + // This isn't going to change for the duration of the loop. + let in_format_spec = fstring.is_in_format_spec(self.nesting); + let mut in_named_unicode = false; loop { @@ -585,6 +588,13 @@ impl<'source> Lexer<'source> { }); } '\n' | '\r' if !fstring.is_triple_quoted() => { + // If we encounter a newline while we're in a format spec, then + // we stop here and let the lexer emit the newline token. + // + // Relevant discussion: https://github.com/python/cpython/issues/110259 + if in_format_spec { + break; + } return Err(LexicalError { error: LexicalErrorType::FStringError(FStringErrorType::UnterminatedString), location: self.offset(), @@ -620,7 +630,7 @@ impl<'source> Lexer<'source> { } } '{' => { - if self.cursor.second() == '{' && !fstring.is_in_format_spec(self.nesting) { + if self.cursor.second() == '{' && !in_format_spec { self.cursor.bump(); normalized .push_str(&self.source[TextRange::new(last_offset, self.offset())]); @@ -634,9 +644,7 @@ impl<'source> Lexer<'source> { if in_named_unicode { in_named_unicode = false; self.cursor.bump(); - } else if self.cursor.second() == '}' - && !fstring.is_in_format_spec(self.nesting) - { + } else if self.cursor.second() == '}' && !in_format_spec { self.cursor.bump(); normalized .push_str(&self.source[TextRange::new(last_offset, self.offset())]); @@ -2051,6 +2059,22 @@ def f(arg=%timeit a = b): assert_debug_snapshot!(lex_source(source)); } + #[test] + fn test_fstring_with_multiline_format_spec() { + let source = r"f'''__{ + x:d +}__''' +f'''__{ + x:a + b + c +}__''' +f'__{ + x:d +}__'"; + assert_debug_snapshot!(lex_source(source)); + } + #[test] fn test_fstring_conversion() { let source = r#"f"{x!s} {x=!r} {x:.3f!r} {{x!r}}""#; diff --git a/crates/ruff_python_parser/src/snapshots/ruff_python_parser__lexer__tests__fstring_with_multiline_format_spec.snap b/crates/ruff_python_parser/src/snapshots/ruff_python_parser__lexer__tests__fstring_with_multiline_format_spec.snap new file mode 100644 index 0000000000000..c88d685d40a68 --- /dev/null +++ b/crates/ruff_python_parser/src/snapshots/ruff_python_parser__lexer__tests__fstring_with_multiline_format_spec.snap @@ -0,0 +1,175 @@ +--- +source: crates/ruff_python_parser/src/lexer.rs +expression: lex_source(source) +--- +[ + ( + FStringStart, + 0..4, + ), + ( + FStringMiddle { + value: "__", + is_raw: false, + }, + 4..6, + ), + ( + Lbrace, + 6..7, + ), + ( + NonLogicalNewline, + 7..8, + ), + ( + Name { + name: "x", + }, + 12..13, + ), + ( + Colon, + 13..14, + ), + ( + FStringMiddle { + value: "d\n", + is_raw: false, + }, + 14..16, + ), + ( + Rbrace, + 16..17, + ), + ( + FStringMiddle { + value: "__", + is_raw: false, + }, + 17..19, + ), + ( + FStringEnd, + 19..22, + ), + ( + Newline, + 22..23, + ), + ( + FStringStart, + 23..27, + ), + ( + FStringMiddle { + value: "__", + is_raw: false, + }, + 27..29, + ), + ( + Lbrace, + 29..30, + ), + ( + NonLogicalNewline, + 30..31, + ), + ( + Name { + name: "x", + }, + 35..36, + ), + ( + Colon, + 36..37, + ), + ( + FStringMiddle { + value: "a\n b\n c\n", + is_raw: false, + }, + 37..61, + ), + ( + Rbrace, + 61..62, + ), + ( + FStringMiddle { + value: "__", + is_raw: false, + }, + 62..64, + ), + ( + FStringEnd, + 64..67, + ), + ( + Newline, + 67..68, + ), + ( + FStringStart, + 68..70, + ), + ( + FStringMiddle { + value: "__", + is_raw: false, + }, + 70..72, + ), + ( + Lbrace, + 72..73, + ), + ( + NonLogicalNewline, + 73..74, + ), + ( + Name { + name: "x", + }, + 78..79, + ), + ( + Colon, + 79..80, + ), + ( + FStringMiddle { + value: "d", + is_raw: false, + }, + 80..81, + ), + ( + NonLogicalNewline, + 81..82, + ), + ( + Rbrace, + 82..83, + ), + ( + FStringMiddle { + value: "__", + is_raw: false, + }, + 83..85, + ), + ( + FStringEnd, + 85..86, + ), + ( + Newline, + 86..86, + ), +] From 0f6ee6292741253e127d8e2b1ac65b45e320857e Mon Sep 17 00:00:00 2001 From: Dhruv Manilawala Date: Thu, 5 Oct 2023 21:59:05 +0530 Subject: [PATCH 2/5] End format spec when newline in single-quoted format spec --- crates/ruff_python_parser/src/lexer.rs | 15 +++- ...s__fstring_with_multiline_format_spec.snap | 71 ++++++++++++++++++- 2 files changed, 84 insertions(+), 2 deletions(-) diff --git a/crates/ruff_python_parser/src/lexer.rs b/crates/ruff_python_parser/src/lexer.rs index 463e92a208ca8..448a3e7b34681 100644 --- a/crates/ruff_python_parser/src/lexer.rs +++ b/crates/ruff_python_parser/src/lexer.rs @@ -1202,6 +1202,9 @@ impl<'source> Lexer<'source> { self.state = State::AfterNewline; Tok::Newline } else { + if let Some(fstring) = self.fstrings.current_mut() { + fstring.try_end_format_spec(self.nesting); + } Tok::NonLogicalNewline }, self.token_range(), @@ -1215,6 +1218,9 @@ impl<'source> Lexer<'source> { self.state = State::AfterNewline; Tok::Newline } else { + if let Some(fstring) = self.fstrings.current_mut() { + fstring.try_end_format_spec(self.nesting); + } Tok::NonLogicalNewline }, self.token_range(), @@ -2061,6 +2067,8 @@ def f(arg=%timeit a = b): #[test] fn test_fstring_with_multiline_format_spec() { + // The last f-string is invalid syntactically but we should still lex it. + // Note that the `b` is a `Name` token and not a `FStringMiddle` token. let source = r"f'''__{ x:d }__''' @@ -2071,7 +2079,12 @@ f'''__{ }__''' f'__{ x:d -}__'"; +}__' +f'__{ + x:a + b +}__' +"; assert_debug_snapshot!(lex_source(source)); } diff --git a/crates/ruff_python_parser/src/snapshots/ruff_python_parser__lexer__tests__fstring_with_multiline_format_spec.snap b/crates/ruff_python_parser/src/snapshots/ruff_python_parser__lexer__tests__fstring_with_multiline_format_spec.snap index c88d685d40a68..6cab3fb5a5bda 100644 --- a/crates/ruff_python_parser/src/snapshots/ruff_python_parser__lexer__tests__fstring_with_multiline_format_spec.snap +++ b/crates/ruff_python_parser/src/snapshots/ruff_python_parser__lexer__tests__fstring_with_multiline_format_spec.snap @@ -170,6 +170,75 @@ expression: lex_source(source) ), ( Newline, - 86..86, + 86..87, + ), + ( + FStringStart, + 87..89, + ), + ( + FStringMiddle { + value: "__", + is_raw: false, + }, + 89..91, + ), + ( + Lbrace, + 91..92, + ), + ( + NonLogicalNewline, + 92..93, + ), + ( + Name { + name: "x", + }, + 97..98, + ), + ( + Colon, + 98..99, + ), + ( + FStringMiddle { + value: "a", + is_raw: false, + }, + 99..100, + ), + ( + NonLogicalNewline, + 100..101, + ), + ( + Name { + name: "b", + }, + 109..110, + ), + ( + NonLogicalNewline, + 110..111, + ), + ( + Rbrace, + 111..112, + ), + ( + FStringMiddle { + value: "__", + is_raw: false, + }, + 112..114, + ), + ( + FStringEnd, + 114..115, + ), + ( + Newline, + 115..116, ), ] From 637862cba58bc76112569b1963e5a1eaf92fd71b Mon Sep 17 00:00:00 2001 From: Dhruv Manilawala Date: Thu, 5 Oct 2023 22:28:07 +0530 Subject: [PATCH 3/5] Implement `State` solution --- crates/ruff_python_parser/src/lexer.rs | 23 +++++++++++++++-------- 1 file changed, 15 insertions(+), 8 deletions(-) diff --git a/crates/ruff_python_parser/src/lexer.rs b/crates/ruff_python_parser/src/lexer.rs index 448a3e7b34681..935dd3a059ffd 100644 --- a/crates/ruff_python_parser/src/lexer.rs +++ b/crates/ruff_python_parser/src/lexer.rs @@ -767,13 +767,17 @@ impl<'source> Lexer<'source> { // This is the main entry point. Call this function to retrieve the next token. // This function is used by the iterator implementation. pub fn next_token(&mut self) -> LexResult { - if let Some(fstring) = self.fstrings.current() { - if !fstring.is_in_expression(self.nesting) { + if let Some(fstring) = self.fstrings.current_mut() { + if self.state.is_after_non_logical_newline() && fstring.is_in_format_spec(self.nesting) + { + fstring.try_end_format_spec(self.nesting); + } else if !fstring.is_in_expression(self.nesting) { match self.lex_fstring_middle_or_end() { Ok(Some(tok)) => { if tok == Tok::FStringEnd { self.fstrings.pop(); } + self.state = State::Other; return Ok((tok, self.token_range())); } Err(e) => { @@ -1202,9 +1206,7 @@ impl<'source> Lexer<'source> { self.state = State::AfterNewline; Tok::Newline } else { - if let Some(fstring) = self.fstrings.current_mut() { - fstring.try_end_format_spec(self.nesting); - } + self.state = State::AfterNonLogicalNewline; Tok::NonLogicalNewline }, self.token_range(), @@ -1218,9 +1220,7 @@ impl<'source> Lexer<'source> { self.state = State::AfterNewline; Tok::Newline } else { - if let Some(fstring) = self.fstrings.current_mut() { - fstring.try_end_format_spec(self.nesting); - } + self.state = State::AfterNonLogicalNewline; Tok::NonLogicalNewline }, self.token_range(), @@ -1400,6 +1400,9 @@ enum State { /// The lexer is at the start of a new logical line but **after** the indentation NonEmptyLogicalLine, + /// Lexer is right after a `NonLogicalNewline` token. + AfterNonLogicalNewline, + /// Lexer is right after an equal token AfterEqual, @@ -1416,6 +1419,10 @@ impl State { matches!(self, State::AfterNewline | State::NonEmptyLogicalLine) } + const fn is_after_non_logical_newline(self) -> bool { + matches!(self, State::AfterNonLogicalNewline) + } + const fn is_after_equal(self) -> bool { matches!(self, State::AfterEqual) } From 21975ac8dcae564a3a61741fcb4f5e0c3df9690b Mon Sep 17 00:00:00 2001 From: Dhruv Manilawala Date: Thu, 5 Oct 2023 22:37:02 +0530 Subject: [PATCH 4/5] Revert "Implement `State` solution" This reverts commit 637862cba58bc76112569b1963e5a1eaf92fd71b. Guess this doesn't work at all :) --- crates/ruff_python_parser/src/lexer.rs | 23 ++++++++--------------- 1 file changed, 8 insertions(+), 15 deletions(-) diff --git a/crates/ruff_python_parser/src/lexer.rs b/crates/ruff_python_parser/src/lexer.rs index 935dd3a059ffd..448a3e7b34681 100644 --- a/crates/ruff_python_parser/src/lexer.rs +++ b/crates/ruff_python_parser/src/lexer.rs @@ -767,17 +767,13 @@ impl<'source> Lexer<'source> { // This is the main entry point. Call this function to retrieve the next token. // This function is used by the iterator implementation. pub fn next_token(&mut self) -> LexResult { - if let Some(fstring) = self.fstrings.current_mut() { - if self.state.is_after_non_logical_newline() && fstring.is_in_format_spec(self.nesting) - { - fstring.try_end_format_spec(self.nesting); - } else if !fstring.is_in_expression(self.nesting) { + if let Some(fstring) = self.fstrings.current() { + if !fstring.is_in_expression(self.nesting) { match self.lex_fstring_middle_or_end() { Ok(Some(tok)) => { if tok == Tok::FStringEnd { self.fstrings.pop(); } - self.state = State::Other; return Ok((tok, self.token_range())); } Err(e) => { @@ -1206,7 +1202,9 @@ impl<'source> Lexer<'source> { self.state = State::AfterNewline; Tok::Newline } else { - self.state = State::AfterNonLogicalNewline; + if let Some(fstring) = self.fstrings.current_mut() { + fstring.try_end_format_spec(self.nesting); + } Tok::NonLogicalNewline }, self.token_range(), @@ -1220,7 +1218,9 @@ impl<'source> Lexer<'source> { self.state = State::AfterNewline; Tok::Newline } else { - self.state = State::AfterNonLogicalNewline; + if let Some(fstring) = self.fstrings.current_mut() { + fstring.try_end_format_spec(self.nesting); + } Tok::NonLogicalNewline }, self.token_range(), @@ -1400,9 +1400,6 @@ enum State { /// The lexer is at the start of a new logical line but **after** the indentation NonEmptyLogicalLine, - /// Lexer is right after a `NonLogicalNewline` token. - AfterNonLogicalNewline, - /// Lexer is right after an equal token AfterEqual, @@ -1419,10 +1416,6 @@ impl State { matches!(self, State::AfterNewline | State::NonEmptyLogicalLine) } - const fn is_after_non_logical_newline(self) -> bool { - matches!(self, State::AfterNonLogicalNewline) - } - const fn is_after_equal(self) -> bool { matches!(self, State::AfterEqual) } From 9e6ae10e769e2041df7449b2ef2231148bc318af Mon Sep 17 00:00:00 2001 From: Dhruv Manilawala Date: Thu, 5 Oct 2023 22:42:16 +0530 Subject: [PATCH 5/5] Add parser test case --- crates/ruff_python_parser/src/parser.rs | 5 ++ ...ython_parser__parser__tests__fstrings.snap | 49 +++++++++++++++++++ 2 files changed, 54 insertions(+) diff --git a/crates/ruff_python_parser/src/parser.rs b/crates/ruff_python_parser/src/parser.rs index 5b7943d56889c..ae6d9a53a2f94 100644 --- a/crates/ruff_python_parser/src/parser.rs +++ b/crates/ruff_python_parser/src/parser.rs @@ -1290,6 +1290,11 @@ match foo: f"\{foo}\{bar:\}" f"\\{{foo\\}}" +f"""{ + foo:x + y + z +}""" "# .trim(), "", diff --git a/crates/ruff_python_parser/src/snapshots/ruff_python_parser__parser__tests__fstrings.snap b/crates/ruff_python_parser/src/snapshots/ruff_python_parser__parser__tests__fstrings.snap index c897a798b5d76..0188a187e7aed 100644 --- a/crates/ruff_python_parser/src/snapshots/ruff_python_parser__parser__tests__fstrings.snap +++ b/crates/ruff_python_parser/src/snapshots/ruff_python_parser__parser__tests__fstrings.snap @@ -845,4 +845,53 @@ expression: parse_ast ), }, ), + Expr( + StmtExpr { + range: 304..344, + value: FString( + ExprFString { + range: 304..344, + values: [ + FormattedValue( + ExprFormattedValue { + range: 308..341, + value: Name( + ExprName { + range: 314..317, + id: "foo", + ctx: Load, + }, + ), + debug_text: None, + conversion: None, + format_spec: Some( + FString( + ExprFString { + range: 318..340, + values: [ + Constant( + ExprConstant { + range: 318..340, + value: Str( + StringConstant { + value: "x\n y\n z\n", + unicode: false, + implicit_concatenated: false, + }, + ), + }, + ), + ], + implicit_concatenated: false, + }, + ), + ), + }, + ), + ], + implicit_concatenated: false, + }, + ), + }, + ), ]