Handle unnecessarily escaped strings (#128)

2018-04-13 19:31:23 +01:00 · 2018-04-13 19:31:23 +01:00 · 2104b7cdcb
commit 2104b7cdcb
parent 8e7848c63e
4 changed files with 34 additions and 7 deletions
--- a/README.md
+++ b/README.md
@ -499,6 +499,9 @@ More details can be found in [CONTRIBUTING](CONTRIBUTING.md).

 * Vim plugin now works on Windows, too

+* fixed unstable formatting when encountering unnecessarily escaped quotes
+  in a string (#120)
+

 ### 18.4a1

--- a/black.py
+++ b/black.py
@ -24,6 +24,7 @@
    Iterator,
    List,
    Optional,
+    Pattern,
    Set,
    Tuple,
    Type,
@ -1984,9 +1985,10 @@ def normalize_string_quotes(leaf: Leaf) -> None:
        return  # There's an internal error

    prefix = leaf.value[:first_quote_pos]
-    body = leaf.value[first_quote_pos + len(orig_quote):-len(orig_quote)]
    unescaped_new_quote = re.compile(rf"(([^\\]|^)(\\\\)*){new_quote}")
-    escaped_orig_quote = re.compile(rf"\\(\\\\)*{orig_quote}")
+    escaped_new_quote = re.compile(rf"([^\\]|^)\\(\\\\)*{new_quote}")
+    escaped_orig_quote = re.compile(rf"([^\\]|^)\\(\\\\)*{orig_quote}")
+    body = leaf.value[first_quote_pos + len(orig_quote):-len(orig_quote)]
    if "r" in prefix.casefold():
        if unescaped_new_quote.search(body):
            # There's at least one unescaped new_quote in this raw string
@ -1996,11 +1998,14 @@ def normalize_string_quotes(leaf: Leaf) -> None:
        # Do not introduce or remove backslashes in raw strings
        new_body = body
    else:
-        new_body = escaped_orig_quote.sub(rf"\1{orig_quote}", body)
-        new_body = unescaped_new_quote.sub(rf"\1\\{new_quote}", new_body)
-        # Add escapes again for consecutive occurences of new_quote (sub
-        # doesn't match overlapping substrings).
-        new_body = unescaped_new_quote.sub(rf"\1\\{new_quote}", new_body)
+        # remove unnecessary quotes
+        new_body = sub_twice(escaped_new_quote, rf"\1\2{new_quote}", body)
+        if body != new_body:
+            # Consider the string without unnecessary quotes as the original
+            body = new_body
+            leaf.value = f"{prefix}{orig_quote}{body}{orig_quote}"
+        new_body = sub_twice(escaped_orig_quote, rf"\1\2{orig_quote}", new_body)
+        new_body = sub_twice(unescaped_new_quote, rf"\1\\{new_quote}", new_body)
    if new_quote == '"""' and new_body[-1] == '"':
        # edge case:
        new_body = new_body[:-1] + '\\"'
@ -2374,5 +2379,14 @@ def shutdown(loop: BaseEventLoop) -> None:
        loop.close()


+def sub_twice(regex: Pattern[str], replacement: str, original: str) -> str:
+    """Replace `regex` with `replacement` twice on `original`.
+
+    This is used by string normalization to perform replaces on
+    overlapping matches.
+    """
+    return regex.sub(replacement, regex.sub(replacement, original))
+
+
 if __name__ == "__main__":
    main()
--- a/docs/reference/reference_functions.rst
+++ b/docs/reference/reference_functions.rst
@ -91,4 +91,6 @@ Utilities

 .. autofunction:: black.preceding_leaf

+.. autofunction:: black.sub_twice
+
 .. autofunction:: black.whitespace
--- a/tests/string_quotes.py
+++ b/tests/string_quotes.py
@ -38,6 +38,10 @@
 "x = '''; y = \"\"\"\""
 "x = ''''; y = \"\"\"\"\""
 "x = '' ''; y = \"\"\"\"\""
+'unnecessary \"\"escaping'
+"unnecessary \'\'escaping"
+'\\""'
+"\\''"

 # output

@ -81,3 +85,7 @@
 'x = \'\'\'; y = """"'
 'x = \'\'\'\'; y = """""'
 'x = \'\' \'\'; y = """""'
+'unnecessary ""escaping'
+"unnecessary ''escaping"
+'\\""'
+"\\''"