Handle arbitrary number of backslashes during string normalization (#110)

This commit is contained in:
Zsolt Dollenstein 2018-04-05 10:28:46 +01:00 committed by Łukasz Langa
parent 30d921f74c
commit 2e0bb0fa9f
2 changed files with 28 additions and 4 deletions

View File

@ -10,6 +10,7 @@
from multiprocessing import Manager from multiprocessing import Manager
import os import os
from pathlib import Path from pathlib import Path
import re
import tokenize import tokenize
import signal import signal
import sys import sys
@ -1922,8 +1923,10 @@ def normalize_string_quotes(leaf: Leaf) -> None:
prefix = leaf.value[:first_quote_pos] prefix = leaf.value[:first_quote_pos]
body = leaf.value[first_quote_pos + len(orig_quote):-len(orig_quote)] body = leaf.value[first_quote_pos + len(orig_quote):-len(orig_quote)]
unescaped_new_quote = re.compile(r"(([^\\]|^)(\\\\)*)" + new_quote)
escaped_orig_quote = re.compile(r"\\(\\\\)*" + orig_quote)
if "r" in prefix.casefold(): if "r" in prefix.casefold():
if body.count(new_quote) != body.count(f"\\{new_quote}"): if unescaped_new_quote.search(body):
# There's at least one unescaped new_quote in this raw string # There's at least one unescaped new_quote in this raw string
# so converting is impossible # so converting is impossible
return return
@ -1931,9 +1934,8 @@ def normalize_string_quotes(leaf: Leaf) -> None:
# Do not introduce or remove backslashes in raw strings # Do not introduce or remove backslashes in raw strings
new_body = body new_body = body
else: else:
new_body = body.replace(f"\\{orig_quote}", orig_quote).replace( new_body = escaped_orig_quote.sub(f"\\1{orig_quote}", body)
new_quote, f"\\{new_quote}" new_body = unescaped_new_quote.sub(f"\\1\\\\{new_quote}", new_body)
)
if new_quote == '"""' and new_body[-1] == '"': if new_quote == '"""' and new_body[-1] == '"':
# edge case: # edge case:
new_body = new_body[:-1] + '\\"' new_body = new_body[:-1] + '\\"'

View File

@ -1,3 +1,7 @@
'\''
'"'
"'"
"\""
"Hello" "Hello"
"Don't do that" "Don't do that"
'Here is a "' 'Here is a "'
@ -18,9 +22,20 @@
r'Date d\'expiration:(.*)' r'Date d\'expiration:(.*)'
r'Tricky "quote' r'Tricky "quote'
r'Not-so-tricky \"quote' r'Not-so-tricky \"quote'
'\n\
The \"quick\"\n\
brown fox\n\
jumps over\n\
the \'lazy\' dog.\n\
'
re.compile(r'[\\"]')
# output # output
"'"
'"'
"'"
'"'
"Hello" "Hello"
"Don't do that" "Don't do that"
'Here is a "' 'Here is a "'
@ -41,3 +56,10 @@
r"Date d\'expiration:(.*)" r"Date d\'expiration:(.*)"
r'Tricky "quote' r'Tricky "quote'
r"Not-so-tricky \"quote" r"Not-so-tricky \"quote"
"\n\
The \"quick\"\n\
brown fox\n\
jumps over\n\
the 'lazy' dog.\n\
"
re.compile(r'[\\"]')