Handle unnecessarily escaped strings (#128)

This commit is contained in:
Zsolt Dollenstein 2018-04-13 19:31:23 +01:00 committed by Łukasz Langa
parent 8e7848c63e
commit 2104b7cdcb
4 changed files with 34 additions and 7 deletions

View File

@ -499,6 +499,9 @@ More details can be found in [CONTRIBUTING](CONTRIBUTING.md).
* Vim plugin now works on Windows, too * Vim plugin now works on Windows, too
* fixed unstable formatting when encountering unnecessarily escaped quotes
in a string (#120)
### 18.4a1 ### 18.4a1

View File

@ -24,6 +24,7 @@
Iterator, Iterator,
List, List,
Optional, Optional,
Pattern,
Set, Set,
Tuple, Tuple,
Type, Type,
@ -1984,9 +1985,10 @@ def normalize_string_quotes(leaf: Leaf) -> None:
return # There's an internal error return # There's an internal error
prefix = leaf.value[:first_quote_pos] prefix = leaf.value[:first_quote_pos]
body = leaf.value[first_quote_pos + len(orig_quote):-len(orig_quote)]
unescaped_new_quote = re.compile(rf"(([^\\]|^)(\\\\)*){new_quote}") unescaped_new_quote = re.compile(rf"(([^\\]|^)(\\\\)*){new_quote}")
escaped_orig_quote = re.compile(rf"\\(\\\\)*{orig_quote}") escaped_new_quote = re.compile(rf"([^\\]|^)\\(\\\\)*{new_quote}")
escaped_orig_quote = re.compile(rf"([^\\]|^)\\(\\\\)*{orig_quote}")
body = leaf.value[first_quote_pos + len(orig_quote):-len(orig_quote)]
if "r" in prefix.casefold(): if "r" in prefix.casefold():
if unescaped_new_quote.search(body): if unescaped_new_quote.search(body):
# There's at least one unescaped new_quote in this raw string # There's at least one unescaped new_quote in this raw string
@ -1996,11 +1998,14 @@ def normalize_string_quotes(leaf: Leaf) -> None:
# Do not introduce or remove backslashes in raw strings # Do not introduce or remove backslashes in raw strings
new_body = body new_body = body
else: else:
new_body = escaped_orig_quote.sub(rf"\1{orig_quote}", body) # remove unnecessary quotes
new_body = unescaped_new_quote.sub(rf"\1\\{new_quote}", new_body) new_body = sub_twice(escaped_new_quote, rf"\1\2{new_quote}", body)
# Add escapes again for consecutive occurences of new_quote (sub if body != new_body:
# doesn't match overlapping substrings). # Consider the string without unnecessary quotes as the original
new_body = unescaped_new_quote.sub(rf"\1\\{new_quote}", new_body) body = new_body
leaf.value = f"{prefix}{orig_quote}{body}{orig_quote}"
new_body = sub_twice(escaped_orig_quote, rf"\1\2{orig_quote}", new_body)
new_body = sub_twice(unescaped_new_quote, rf"\1\\{new_quote}", new_body)
if new_quote == '"""' and new_body[-1] == '"': if new_quote == '"""' and new_body[-1] == '"':
# edge case: # edge case:
new_body = new_body[:-1] + '\\"' new_body = new_body[:-1] + '\\"'
@ -2374,5 +2379,14 @@ def shutdown(loop: BaseEventLoop) -> None:
loop.close() loop.close()
def sub_twice(regex: Pattern[str], replacement: str, original: str) -> str:
"""Replace `regex` with `replacement` twice on `original`.
This is used by string normalization to perform replaces on
overlapping matches.
"""
return regex.sub(replacement, regex.sub(replacement, original))
if __name__ == "__main__": if __name__ == "__main__":
main() main()

View File

@ -91,4 +91,6 @@ Utilities
.. autofunction:: black.preceding_leaf .. autofunction:: black.preceding_leaf
.. autofunction:: black.sub_twice
.. autofunction:: black.whitespace .. autofunction:: black.whitespace

View File

@ -38,6 +38,10 @@
"x = '''; y = \"\"\"\"" "x = '''; y = \"\"\"\""
"x = ''''; y = \"\"\"\"\"" "x = ''''; y = \"\"\"\"\""
"x = '' ''; y = \"\"\"\"\"" "x = '' ''; y = \"\"\"\"\""
'unnecessary \"\"escaping'
"unnecessary \'\'escaping"
'\\""'
"\\''"
# output # output
@ -81,3 +85,7 @@
'x = \'\'\'; y = """"' 'x = \'\'\'; y = """"'
'x = \'\'\'\'; y = """""' 'x = \'\'\'\'; y = """""'
'x = \'\' \'\'; y = """""' 'x = \'\' \'\'; y = """""'
'unnecessary ""escaping'
"unnecessary ''escaping"
'\\""'
"\\''"