Normalise string prefix order (#2297)

Closes #2171
2022-01-13 19:59:43 +02:00 · 2022-01-13 19:59:43 +02:00 · 799f76f537
commit 799f76f537
parent f298032ddb
5 changed files with 28 additions and 17 deletions
--- a/CHANGES.md
+++ b/CHANGES.md
@ -28,6 +28,7 @@
  `--target-version` is set to 3.10 and higher). (#2728)
 - Fix handling of standalone `match()` or `case()` when there is a trailing newline or a
  comment inside of the parentheses. (#2760)
+- Black now normalizes string prefix order (#2297)

 ### Packaging

--- a/docs/the_black_code_style/current_style.md
+++ b/docs/the_black_code_style/current_style.md
@ -233,10 +233,10 @@ _Black_ prefers double quotes (`"` and `"""`) over single quotes (`'` and `'''`)
 will replace the latter with the former as long as it does not result in more backslash
 escapes than before.

-_Black_ also standardizes string prefixes, making them always lowercase. On top of that,
-if your code is already Python 3.6+ only or it's using the `unicode_literals` future
-import, _Black_ will remove `u` from the string prefix as it is meaningless in those
-scenarios.
+_Black_ also standardizes string prefixes. Prefix characters are made lowercase with the
+exception of [capital "R" prefixes](#rstrings-and-rstrings), unicode literal markers
+(`u`) are removed because they are meaningless in Python 3, and in the case of multiple
+characters "r" is put first as in spoken language: "raw f-string".

 The main reason to standardize on a single form of quotes is aesthetics. Having one kind
 of quotes everywhere reduces reader distraction. It will also enable a future version of
--- a/src/black/strings.py
+++ b/src/black/strings.py
@ -149,6 +149,10 @@ def normalize_string_prefix(s: str) -> str:
        .replace("U", "")
        .replace("u", "")
    )
+
+    # Python syntax guarantees max 2 prefixes and that one of them is "r"
+    if len(new_prefix) == 2 and "r" != new_prefix[0].lower():
+        new_prefix = new_prefix[::-1]
    return f"{new_prefix}{match.group(2)}"


--- a/src/blib2to3/pgen2/tokenize.py
+++ b/src/blib2to3/pgen2/tokenize.py
@ -293,7 +293,7 @@ def compat(self, token: Tuple[int, Text], iterable: Iterable[TokenInfo]) -> None


 cookie_re = re.compile(r"^[ \t\f]*#.*?coding[:=][ \t]*([-\w.]+)", re.ASCII)
-blank_re = re.compile(br"^[ \t\f]*(?:[#\r\n]|$)", re.ASCII)
+blank_re = re.compile(rb"^[ \t\f]*(?:[#\r\n]|$)", re.ASCII)


 def _get_normal_name(orig_enc: str) -> str:
--- a/tests/data/string_prefixes.py
+++ b/tests/data/string_prefixes.py
@ -1,10 +1,13 @@
-#!/usr/bin/env python3.6
+#!/usr/bin/env python3

-name = R"Łukasz"
-F"hello {name}"
-B"hello"
-r"hello"
-fR"hello"
+name = "Łukasz"
+(f"hello {name}", F"hello {name}")
+(b"", B"")
+(u"", U"")
+(r"", R"")
+
+(rf"", fr"", Rf"", fR"", rF"", Fr"", RF"", FR"")
+(rb"", br"", Rb"", bR"", rB"", Br"", RB"", BR"")


 def docstring_singleline():
@ -20,13 +23,16 @@ def docstring_multiline():
 # output


-#!/usr/bin/env python3.6
+#!/usr/bin/env python3

-name = R"Łukasz"
-f"hello {name}"
-b"hello"
-r"hello"
-fR"hello"
+name = "Łukasz"
+(f"hello {name}", f"hello {name}")
+(b"", b"")
+("", "")
+(r"", R"")
+
+(rf"", rf"", Rf"", Rf"", rf"", rf"", Rf"", Rf"")
+(rb"", rb"", Rb"", Rb"", rb"", rb"", Rb"", Rb"")


 def docstring_singleline():