From 24e4cb20ab03c20af390fec7303207af2a4a09e8 Mon Sep 17 00:00:00 2001 From: GiGaGon <107241144+MeGaGiGaGon@users.noreply.github.com> Date: Thu, 5 Jun 2025 18:49:15 -0700 Subject: [PATCH] Fix backslash cr nl bug (#4673) * Update tokenize.py * Update CHANGES.md * Update test_black.py * Update test_black.py * Update test_black.py --- CHANGES.md | 1 + src/blib2to3/pgen2/tokenize.py | 12 +++++++++++- tests/test_black.py | 20 ++++++++++++++++++++ 3 files changed, 32 insertions(+), 1 deletion(-) diff --git a/CHANGES.md b/CHANGES.md index cf415f1..ae8bb78 100644 --- a/CHANGES.md +++ b/CHANGES.md @@ -16,6 +16,7 @@ - Handle `# fmt: skip` followed by a comment at the end of file (#4635) - Fix crash when a tuple appears in the `as` clause of a `with` statement (#4634) - Fix crash when tuple is used as a context manager inside a `with` statement (#4646) +- Fix crash on a `\\r\n` (#4673) ### Preview style diff --git a/src/blib2to3/pgen2/tokenize.py b/src/blib2to3/pgen2/tokenize.py index 5cbfd51..46c4319 100644 --- a/src/blib2to3/pgen2/tokenize.py +++ b/src/blib2to3/pgen2/tokenize.py @@ -113,7 +113,17 @@ def transform_whitespace( and prev_token.type not in (TokenType.nl, TokenType.newline) ): token_str = source[token.start_index : token.end_index] - if token_str.startswith("\\\n"): + if token_str.startswith("\\\r\n"): + return pytokens.Token( + TokenType.nl, + token.start_index, + token.start_index + 3, + token.start_line, + token.start_col, + token.start_line, + token.start_col + 3, + ) + elif token_str.startswith("\\\n") or token_str.startswith("\\\r"): return pytokens.Token( TokenType.nl, token.start_index, diff --git a/tests/test_black.py b/tests/test_black.py index ee026f3..4588add 100644 --- a/tests/test_black.py +++ b/tests/test_black.py @@ -2065,6 +2065,26 @@ def test_lines_with_leading_tabs_expanded(self) -> None: assert lines_with_leading_tabs_expanded("\t\tx") == [f"{tab}{tab}x"] assert lines_with_leading_tabs_expanded("\tx\n y") == [f"{tab}x", " y"] + def test_backslash_carriage_return(self) -> None: + # These tests are here instead of in the normal cases because + # of git's newline normalization and because it's hard to + # get `\r` vs `\r\n` vs `\n` to display properly in editors + assert black.format_str("x=\\\r\n1", mode=black.FileMode()) == "x = 1\n" + assert black.format_str("x=\\\n1", mode=black.FileMode()) == "x = 1\n" + assert black.format_str("x=\\\r1", mode=black.FileMode()) == "x = 1\n" + assert ( + black.format_str("class A\\\r\n:...", mode=black.FileMode()) + == "class A: ...\n" + ) + assert ( + black.format_str("class A\\\n:...", mode=black.FileMode()) + == "class A: ...\n" + ) + assert ( + black.format_str("class A\\\r:...", mode=black.FileMode()) + == "class A: ...\n" + ) + class TestCaching: def test_get_cache_dir(