Simplify string tokenization regexes (#4331)

This commit is contained in:
Jelle Zijlstra 2024-04-24 23:11:31 -07:00 committed by GitHub
parent 5683242fd4
commit ba88fc372e
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
3 changed files with 18 additions and 12 deletions

View File

@ -30,6 +30,8 @@
<!-- Changes that improve Black's performance. --> <!-- Changes that improve Black's performance. -->
- Fix bad performance on certain complex string literals (#4331)
### Output ### Output
<!-- Changes to Black's terminal output and error messages --> <!-- Changes to Black's terminal output and error messages -->

View File

@ -119,13 +119,13 @@ def _combinations(*l: str) -> Set[str]:
Number = group(Imagnumber, Floatnumber, Intnumber) Number = group(Imagnumber, Floatnumber, Intnumber)
# Tail end of ' string. # Tail end of ' string.
Single = r"[^'\\]*(?:\\.[^'\\]*)*'" Single = r"(?:\\.|[^'\\])*'"
# Tail end of " string. # Tail end of " string.
Double = r'[^"\\]*(?:\\.[^"\\]*)*"' Double = r'(?:\\.|[^"\\])*"'
# Tail end of ''' string. # Tail end of ''' string.
Single3 = r"[^'\\]*(?:(?:\\.|'(?!''))[^'\\]*)*'''" Single3 = r"(?:\\.|'(?!'')|[^'\\])*'''"
# Tail end of """ string. # Tail end of """ string.
Double3 = r'[^"\\]*(?:(?:\\.|"(?!""))[^"\\]*)*"""' Double3 = r'(?:\\.|"(?!"")|[^"\\])*"""'
_litprefix = r"(?:[uUrRbB]|[rR][bB]|[bBuU][rR])?" _litprefix = r"(?:[uUrRbB]|[rR][bB]|[bBuU][rR])?"
_fstringlitprefix = r"(?:rF|FR|Fr|fr|RF|F|rf|f|Rf|fR)" _fstringlitprefix = r"(?:rF|FR|Fr|fr|RF|F|rf|f|Rf|fR)"
Triple = group( Triple = group(
@ -136,12 +136,12 @@ def _combinations(*l: str) -> Set[str]:
) )
# beginning of a single quoted f-string. must not end with `{{` or `\N{` # beginning of a single quoted f-string. must not end with `{{` or `\N{`
SingleLbrace = r"[^'\\{]*(?:(?:\\N{|\\.|{{)[^'\\{]*)*(?<!\\N){(?!{)" SingleLbrace = r"(?:\\N{|\\.|{{|[^'\\{])*(?<!\\N){(?!{)"
DoubleLbrace = r'[^"\\{]*(?:(?:\\N{|\\.|{{)[^"\\{]*)*(?<!\\N){(?!{)' DoubleLbrace = r'(?:\\N{|\\.|{{|[^"\\{])*(?<!\\N){(?!{)'
# beginning of a triple quoted f-string. must not end with `{{` or `\N{` # beginning of a triple quoted f-string. must not end with `{{` or `\N{`
Single3Lbrace = r"[^'{]*(?:(?:\\N{|\\[^{]|{{|'(?!''))[^'{]*)*(?<!\\N){(?!{)" Single3Lbrace = r"(?:\\N{|\\[^{]|{{|'(?!'')|[^'{\\])*(?<!\\N){(?!{)"
Double3Lbrace = r'[^"{]*(?:(?:\\N{|\\[^{]|{{|"(?!""))[^"{]*)*(?<!\\N){(?!{)' Double3Lbrace = r'(?:\\N{|\\[^{]|{{|"(?!"")|[^"{\\])*(?<!\\N){(?!{)'
# ! format specifier inside an fstring brace, ensure it's not a `!=` token # ! format specifier inside an fstring brace, ensure it's not a `!=` token
Bang = Whitespace + group("!") + r"(?!=)" Bang = Whitespace + group("!") + r"(?!=)"
@ -171,12 +171,12 @@ def _combinations(*l: str) -> Set[str]:
Special = group(r"\r?\n", r"[:;.,`@]") Special = group(r"\r?\n", r"[:;.,`@]")
Funny = group(Operator, Bracket, Special) Funny = group(Operator, Bracket, Special)
_string_middle_single = r"[^\n'\\]*(?:\\.[^\n'\\]*)*" _string_middle_single = r"(?:[^\n'\\]|\\.)*"
_string_middle_double = r'[^\n"\\]*(?:\\.[^\n"\\]*)*' _string_middle_double = r'(?:[^\n"\\]|\\.)*'
# FSTRING_MIDDLE and LBRACE, must not end with a `{{` or `\N{` # FSTRING_MIDDLE and LBRACE, must not end with a `{{` or `\N{`
_fstring_middle_single = r"[^\n'{]*(?:(?:\\N{|\\[^{]|{{)[^\n'{]*)*(?<!\\N)({)(?!{)" _fstring_middle_single = r"(?:\\N{|\\[^{]|{{|[^\n'{\\])*(?<!\\N)({)(?!{)"
_fstring_middle_double = r'[^\n"{]*(?:(?:\\N{|\\[^{]|{{)[^\n"{]*)*(?<!\\N)({)(?!{)' _fstring_middle_double = r'(?:\\N{|\\[^{]|{{|[^\n"{\\])*(?<!\\N)({)(?!{)'
# First (or only) line of ' or " string. # First (or only) line of ' or " string.
ContStr = group( ContStr = group(

View File

@ -119,6 +119,8 @@
level=0, level=0,
) )
f'{{\\"kind\\":\\"ConfigMap\\",\\"metadata\\":{{\\"annotations\\":{{}},\\"name\\":\\"cluster-info\\",\\"namespace\\":\\"amazon-cloudwatch\\"}}}}'
# output # output
x = f"foo" x = f"foo"
@ -240,3 +242,5 @@
f"{self.writer._transport.get_extra_info('peername')}", # type: ignore[attr-defined] f"{self.writer._transport.get_extra_info('peername')}", # type: ignore[attr-defined]
level=0, level=0,
) )
f'{{\\"kind\\":\\"ConfigMap\\",\\"metadata\\":{{\\"annotations\\":{{}},\\"name\\":\\"cluster-info\\",\\"namespace\\":\\"amazon-cloudwatch\\"}}}}'