Add PEP 701 support (#3822)

Co-authored-by: Shantanu <12621235+hauntsaninja@users.noreply.github.com>
Co-authored-by: hauntsaninja <hauntsaninja@gmail.com>
Co-authored-by: Jelle Zijlstra <jelle.zijlstra@gmail.com>
This commit is contained in:
Tushar Sadhwani 2024-04-22 20:49:19 +05:30 committed by GitHub
parent 944b99aa91
commit 551ede2825
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
16 changed files with 943 additions and 104 deletions

View File

@ -6,6 +6,8 @@
<!-- Include any especially major or disruptive changes here -->
- Add support for the new Python 3.12 f-string syntax introduced by PEP 701 (#3822)
### Stable style
<!-- Changes that affect Black's stable style -->

View File

@ -69,13 +69,7 @@
from black.mode import FUTURE_FLAG_TO_FEATURE, VERSION_TO_FEATURES, Feature
from black.mode import Mode as Mode # re-exported
from black.mode import Preview, TargetVersion, supports_feature
from black.nodes import (
STARS,
is_number_token,
is_simple_decorator_expression,
is_string_token,
syms,
)
from black.nodes import STARS, is_number_token, is_simple_decorator_expression, syms
from black.output import color_diff, diff, dump_to_file, err, ipynb_diff, out
from black.parsing import ( # noqa F401
ASTSafetyError,
@ -91,7 +85,6 @@
sanitized_lines,
)
from black.report import Changed, NothingChanged, Report
from black.trans import iter_fexpr_spans
from blib2to3.pgen2 import token
from blib2to3.pytree import Leaf, Node
@ -1265,7 +1258,10 @@ def _format_str_once(
elt = EmptyLineTracker(mode=mode)
split_line_features = {
feature
for feature in {Feature.TRAILING_COMMA_IN_CALL, Feature.TRAILING_COMMA_IN_DEF}
for feature in {
Feature.TRAILING_COMMA_IN_CALL,
Feature.TRAILING_COMMA_IN_DEF,
}
if supports_feature(versions, feature)
}
block: Optional[LinesBlock] = None
@ -1337,15 +1333,14 @@ def get_features_used( # noqa: C901
}
for n in node.pre_order():
if is_string_token(n):
value_head = n.value[:2]
if value_head in {'f"', 'F"', "f'", "F'", "rf", "fr", "RF", "FR"}:
features.add(Feature.F_STRINGS)
if Feature.DEBUG_F_STRINGS not in features:
for span_beg, span_end in iter_fexpr_spans(n.value):
if n.value[span_beg : span_end - 1].rstrip().endswith("="):
features.add(Feature.DEBUG_F_STRINGS)
break
if n.type == token.FSTRING_START:
features.add(Feature.F_STRINGS)
elif (
n.type == token.RBRACE
and n.parent is not None
and any(child.type == token.EQUAL for child in n.parent.children)
):
features.add(Feature.DEBUG_F_STRINGS)
elif is_number_token(n):
if "_" in n.value:

View File

@ -502,6 +502,45 @@ def visit_NUMBER(self, leaf: Leaf) -> Iterator[Line]:
normalize_numeric_literal(leaf)
yield from self.visit_default(leaf)
def visit_fstring(self, node: Node) -> Iterator[Line]:
# currently we don't want to format and split f-strings at all.
string_leaf = _fstring_to_string(node)
node.replace(string_leaf)
yield from self.visit_STRING(string_leaf)
# TODO: Uncomment Implementation to format f-string children
# fstring_start = node.children[0]
# fstring_end = node.children[-1]
# assert isinstance(fstring_start, Leaf)
# assert isinstance(fstring_end, Leaf)
# quote_char = fstring_end.value[0]
# quote_idx = fstring_start.value.index(quote_char)
# prefix, quote = (
# fstring_start.value[:quote_idx],
# fstring_start.value[quote_idx:]
# )
# if not is_docstring(node, self.mode):
# prefix = normalize_string_prefix(prefix)
# assert quote == fstring_end.value
# is_raw_fstring = "r" in prefix or "R" in prefix
# middles = [
# leaf
# for leaf in node.leaves()
# if leaf.type == token.FSTRING_MIDDLE
# ]
# if self.mode.string_normalization:
# middles, quote = normalize_fstring_quotes(quote, middles, is_raw_fstring)
# fstring_start.value = prefix + quote
# fstring_end.value = quote
# yield from self.visit_default(node)
def __post_init__(self) -> None:
"""You are in a twisty little maze of passages."""
self.current_line = Line(mode=self.mode)
@ -535,6 +574,12 @@ def __post_init__(self) -> None:
self.visit_guard = partial(v, keywords=Ø, parens={"if"})
def _fstring_to_string(node: Node) -> Leaf:
"""Converts an fstring node back to a string node."""
string_without_prefix = str(node)[len(node.prefix) :]
return Leaf(token.STRING, string_without_prefix, prefix=node.prefix)
def _hugging_power_ops_line_to_string(
line: Line,
features: Collection[Feature],

View File

@ -72,7 +72,12 @@ def append(
Inline comments are put aside.
"""
has_value = leaf.type in BRACKETS or bool(leaf.value.strip())
has_value = (
leaf.type in BRACKETS
# empty fstring-middles must not be truncated
or leaf.type == token.FSTRING_MIDDLE
or bool(leaf.value.strip())
)
if not has_value:
return

View File

@ -46,6 +46,7 @@ class Feature(Enum):
DEBUG_F_STRINGS = 16
PARENTHESIZED_CONTEXT_MANAGERS = 17
TYPE_PARAMS = 18
FSTRING_PARSING = 19
FORCE_OPTIONAL_PARENTHESES = 50
# __future__ flags
@ -156,6 +157,7 @@ class Feature(Enum):
Feature.EXCEPT_STAR,
Feature.VARIADIC_GENERICS,
Feature.TYPE_PARAMS,
Feature.FSTRING_PARSING,
},
}

View File

@ -145,7 +145,13 @@
OPENING_BRACKETS: Final = set(BRACKET.keys())
CLOSING_BRACKETS: Final = set(BRACKET.values())
BRACKETS: Final = OPENING_BRACKETS | CLOSING_BRACKETS
ALWAYS_NO_SPACE: Final = CLOSING_BRACKETS | {token.COMMA, STANDALONE_COMMENT}
ALWAYS_NO_SPACE: Final = CLOSING_BRACKETS | {
token.COMMA,
STANDALONE_COMMENT,
token.FSTRING_MIDDLE,
token.FSTRING_END,
token.BANG,
}
RARROW = 55
@ -211,6 +217,9 @@ def whitespace(leaf: Leaf, *, complex_subscript: bool, mode: Mode) -> str: # no
}:
return NO
if t == token.LBRACE and p.type == syms.fstring_replacement_field:
return NO
prev = leaf.prev_sibling
if not prev:
prevp = preceding_leaf(p)
@ -272,6 +281,9 @@ def whitespace(leaf: Leaf, *, complex_subscript: bool, mode: Mode) -> str: # no
elif prev.type in OPENING_BRACKETS:
return NO
elif prev.type == token.BANG:
return NO
if p.type in {syms.parameters, syms.arglist}:
# untyped function signatures or calls
if not prev or prev.type != token.COMMA:
@ -393,6 +405,7 @@ def whitespace(leaf: Leaf, *, complex_subscript: bool, mode: Mode) -> str: # no
elif prevp.type == token.EQUAL and prevp_parent.type == syms.argument:
return NO
# TODO: add fstring here?
elif t in {token.NAME, token.NUMBER, token.STRING}:
return NO
@ -542,31 +555,32 @@ def is_arith_like(node: LN) -> bool:
}
def is_docstring(leaf: Leaf, mode: Mode) -> bool:
if leaf.type != token.STRING:
return False
def is_docstring(node: NL, mode: Mode) -> bool:
if isinstance(node, Leaf):
if node.type != token.STRING:
return False
prefix = get_string_prefix(leaf.value)
if set(prefix).intersection("bBfF"):
return False
prefix = get_string_prefix(node.value)
if set(prefix).intersection("bBfF"):
return False
if (
Preview.unify_docstring_detection in mode
and leaf.parent
and leaf.parent.type == syms.simple_stmt
and not leaf.parent.prev_sibling
and leaf.parent.parent
and leaf.parent.parent.type == syms.file_input
and node.parent
and node.parent.type == syms.simple_stmt
and not node.parent.prev_sibling
and node.parent.parent
and node.parent.parent.type == syms.file_input
):
return True
if prev_siblings_are(
leaf.parent, [None, token.NEWLINE, token.INDENT, syms.simple_stmt]
node.parent, [None, token.NEWLINE, token.INDENT, syms.simple_stmt]
):
return True
# Multiline docstring on the same line as the `def`.
if prev_siblings_are(leaf.parent, [syms.parameters, token.COLON, syms.simple_stmt]):
if prev_siblings_are(node.parent, [syms.parameters, token.COLON, syms.simple_stmt]):
# `syms.parameters` is only used in funcdefs and async_funcdefs in the Python
# grammar. We're safe to return True without further checks.
return True
@ -954,10 +968,6 @@ def is_rpar_token(nl: NL) -> TypeGuard[Leaf]:
return nl.type == token.RPAR
def is_string_token(nl: NL) -> TypeGuard[Leaf]:
return nl.type == token.STRING
def is_number_token(nl: NL) -> TypeGuard[Leaf]:
return nl.type == token.NUMBER

View File

@ -5,7 +5,7 @@
import re
import sys
from functools import lru_cache
from typing import Final, List, Match, Pattern
from typing import Final, List, Match, Pattern, Tuple
from black._width_table import WIDTH_TABLE
from blib2to3.pytree import Leaf
@ -169,8 +169,7 @@ def _cached_compile(pattern: str) -> Pattern[str]:
def normalize_string_quotes(s: str) -> str:
"""Prefer double quotes but only if it doesn't cause more escaping.
Adds or removes backslashes as appropriate. Doesn't parse and fix
strings nested in f-strings.
Adds or removes backslashes as appropriate.
"""
value = s.lstrip(STRING_PREFIX_CHARS)
if value[:3] == '"""':
@ -211,6 +210,7 @@ def normalize_string_quotes(s: str) -> str:
s = f"{prefix}{orig_quote}{body}{orig_quote}"
new_body = sub_twice(escaped_orig_quote, rf"\1\2{orig_quote}", new_body)
new_body = sub_twice(unescaped_new_quote, rf"\1\\{new_quote}", new_body)
if "f" in prefix.casefold():
matches = re.findall(
r"""
@ -240,6 +240,71 @@ def normalize_string_quotes(s: str) -> str:
return f"{prefix}{new_quote}{new_body}{new_quote}"
def normalize_fstring_quotes(
quote: str,
middles: List[Leaf],
is_raw_fstring: bool,
) -> Tuple[List[Leaf], str]:
"""Prefer double quotes but only if it doesn't cause more escaping.
Adds or removes backslashes as appropriate.
"""
if quote == '"""':
return middles, quote
elif quote == "'''":
new_quote = '"""'
elif quote == '"':
new_quote = "'"
else:
new_quote = '"'
unescaped_new_quote = _cached_compile(rf"(([^\\]|^)(\\\\)*){new_quote}")
escaped_new_quote = _cached_compile(rf"([^\\]|^)\\((?:\\\\)*){new_quote}")
escaped_orig_quote = _cached_compile(rf"([^\\]|^)\\((?:\\\\)*){quote}")
if is_raw_fstring:
for middle in middles:
if unescaped_new_quote.search(middle.value):
# There's at least one unescaped new_quote in this raw string
# so converting is impossible
return middles, quote
# Do not introduce or remove backslashes in raw strings, just use double quote
return middles, '"'
new_segments = []
for middle in middles:
segment = middle.value
# remove unnecessary escapes
new_segment = sub_twice(escaped_new_quote, rf"\1\2{new_quote}", segment)
if segment != new_segment:
# Consider the string without unnecessary escapes as the original
middle.value = new_segment
new_segment = sub_twice(escaped_orig_quote, rf"\1\2{quote}", new_segment)
new_segment = sub_twice(unescaped_new_quote, rf"\1\\{new_quote}", new_segment)
new_segments.append(new_segment)
if new_quote == '"""' and new_segments[-1].endswith('"'):
# edge case:
new_segments[-1] = new_segments[-1][:-1] + '\\"'
for middle, new_segment in zip(middles, new_segments):
orig_escape_count = middle.value.count("\\")
new_escape_count = new_segment.count("\\")
if new_escape_count > orig_escape_count:
return middles, quote # Do not introduce more escaping
if new_escape_count == orig_escape_count and quote == '"':
return middles, quote # Prefer double quotes
for middle, new_segment in zip(middles, new_segments):
middle.value = new_segment
return middles, new_quote
def normalize_unicode_escape_sequences(leaf: Leaf) -> None:
"""Replace hex codes in Unicode escape sequences with lowercase representation."""
text = leaf.value

View File

@ -163,7 +163,7 @@ atom: ('(' [yield_expr|testlist_gexp] ')' |
'[' [listmaker] ']' |
'{' [dictsetmaker] '}' |
'`' testlist1 '`' |
NAME | NUMBER | STRING+ | '.' '.' '.')
NAME | NUMBER | (STRING | fstring)+ | '.' '.' '.')
listmaker: (namedexpr_test|star_expr) ( old_comp_for | (',' (namedexpr_test|star_expr))* [','] )
testlist_gexp: (namedexpr_test|star_expr) ( old_comp_for | (',' (namedexpr_test|star_expr))* [','] )
lambdef: 'lambda' [varargslist] ':' test
@ -254,3 +254,8 @@ case_block: "case" patterns [guard] ':' suite
guard: 'if' namedexpr_test
patterns: pattern (',' pattern)* [',']
pattern: (expr|star_expr) ['as' expr]
fstring: FSTRING_START fstring_middle* FSTRING_END
fstring_middle: fstring_replacement_field | FSTRING_MIDDLE
fstring_replacement_field: '{' (yield_expr | testlist_star_expr) ['='] [ "!" NAME ] [ ':' fstring_format_spec* ] '}'
fstring_format_spec: FSTRING_MIDDLE | fstring_replacement_field

View File

@ -167,7 +167,9 @@ def parse_tokens(self, tokens: Iterable[GoodTokenInfo], debug: bool = False) ->
if type in {token.INDENT, token.DEDENT}:
prefix = _prefix
lineno, column = end
if value.endswith("\n"):
# FSTRING_MIDDLE is the only token that can end with a newline, and
# `end` will point to the next line. For that case, don't increment lineno.
if value.endswith("\n") and type != token.FSTRING_MIDDLE:
lineno += 1
column = 0
else:

View File

@ -218,6 +218,7 @@ def report(self) -> None:
//= DOUBLESLASHEQUAL
-> RARROW
:= COLONEQUAL
! BANG
"""
opmap = {}

View File

@ -66,7 +66,11 @@
ASYNC: Final = 57
ERRORTOKEN: Final = 58
COLONEQUAL: Final = 59
N_TOKENS: Final = 60
FSTRING_START: Final = 60
FSTRING_MIDDLE: Final = 61
FSTRING_END: Final = 62
BANG: Final = 63
N_TOKENS: Final = 64
NT_OFFSET: Final = 256
# --end constants--

View File

@ -27,6 +27,7 @@
function to which the 5 fields described above are passed as 5 arguments,
each time a new token is found."""
import builtins
import sys
from typing import (
Callable,
@ -49,12 +50,17 @@
DEDENT,
ENDMARKER,
ERRORTOKEN,
FSTRING_END,
FSTRING_MIDDLE,
FSTRING_START,
INDENT,
LBRACE,
NAME,
NEWLINE,
NL,
NUMBER,
OP,
RBRACE,
STRING,
tok_name,
)
@ -120,14 +126,32 @@ def _combinations(*l: str) -> Set[str]:
Single3 = r"[^'\\]*(?:(?:\\.|'(?!''))[^'\\]*)*'''"
# Tail end of """ string.
Double3 = r'[^"\\]*(?:(?:\\.|"(?!""))[^"\\]*)*"""'
_litprefix = r"(?:[uUrRbBfF]|[rR][fFbB]|[fFbBuU][rR])?"
Triple = group(_litprefix + "'''", _litprefix + '"""')
# Single-line ' or " string.
String = group(
_litprefix + r"'[^\n'\\]*(?:\\.[^\n'\\]*)*'",
_litprefix + r'"[^\n"\\]*(?:\\.[^\n"\\]*)*"',
_litprefix = r"(?:[uUrRbB]|[rR][bB]|[bBuU][rR])?"
_fstringlitprefix = r"(?:rF|FR|Fr|fr|RF|F|rf|f|Rf|fR)"
Triple = group(
_litprefix + "'''",
_litprefix + '"""',
_fstringlitprefix + '"""',
_fstringlitprefix + "'''",
)
# beginning of a single quoted f-string. must not end with `{{` or `\N{`
SingleLbrace = r"[^'\\{]*(?:(?:\\N{|\\.|{{)[^'\\{]*)*(?<!\\N){(?!{)"
DoubleLbrace = r'[^"\\{]*(?:(?:\\N{|\\.|{{)[^"\\{]*)*(?<!\\N){(?!{)'
# beginning of a triple quoted f-string. must not end with `{{` or `\N{`
Single3Lbrace = r"[^'{]*(?:(?:\\N{|\\[^{]|{{|'(?!''))[^'{]*)*(?<!\\N){(?!{)"
Double3Lbrace = r'[^"{]*(?:(?:\\N{|\\[^{]|{{|"(?!""))[^"{]*)*(?<!\\N){(?!{)'
# ! format specifier inside an fstring brace, ensure it's not a `!=` token
Bang = Whitespace + group("!") + r"(?!=)"
bang = re.compile(Bang)
Colon = Whitespace + group(":")
colon = re.compile(Colon)
FstringMiddleAfterColon = group(Whitespace + r".*?") + group("{", "}")
fstring_middle_after_colon = re.compile(FstringMiddleAfterColon)
# Because of leftmost-then-longest match semantics, be sure to put the
# longest operators first (e.g., if = came before ==, == would get
# recognized as two instances of =).
@ -147,42 +171,70 @@ def _combinations(*l: str) -> Set[str]:
Special = group(r"\r?\n", r"[:;.,`@]")
Funny = group(Operator, Bracket, Special)
_string_middle_single = r"[^\n'\\]*(?:\\.[^\n'\\]*)*"
_string_middle_double = r'[^\n"\\]*(?:\\.[^\n"\\]*)*'
# FSTRING_MIDDLE and LBRACE, must not end with a `{{` or `\N{`
_fstring_middle_single = r"[^\n'{]*(?:(?:\\N{|\\[^{]|{{)[^\n'{]*)*(?<!\\N)({)(?!{)"
_fstring_middle_double = r'[^\n"{]*(?:(?:\\N{|\\[^{]|{{)[^\n"{]*)*(?<!\\N)({)(?!{)'
# First (or only) line of ' or " string.
ContStr = group(
_litprefix + r"'[^\n'\\]*(?:\\.[^\n'\\]*)*" + group("'", r"\\\r?\n"),
_litprefix + r'"[^\n"\\]*(?:\\.[^\n"\\]*)*' + group('"', r"\\\r?\n"),
_litprefix + "'" + _string_middle_single + group("'", r"\\\r?\n"),
_litprefix + '"' + _string_middle_double + group('"', r"\\\r?\n"),
group(_fstringlitprefix + "'") + _fstring_middle_single,
group(_fstringlitprefix + '"') + _fstring_middle_double,
group(_fstringlitprefix + "'") + _string_middle_single + group("'", r"\\\r?\n"),
group(_fstringlitprefix + '"') + _string_middle_double + group('"', r"\\\r?\n"),
)
PseudoExtras = group(r"\\\r?\n", Comment, Triple)
PseudoToken = Whitespace + group(PseudoExtras, Number, Funny, ContStr, Name)
pseudoprog: Final = re.compile(PseudoToken, re.UNICODE)
single3prog = re.compile(Single3)
double3prog = re.compile(Double3)
_strprefixes = (
_combinations("r", "R", "f", "F")
| _combinations("r", "R", "b", "B")
| {"u", "U", "ur", "uR", "Ur", "UR"}
)
singleprog = re.compile(Single)
singleprog_plus_lbrace = re.compile(group(SingleLbrace, Single))
doubleprog = re.compile(Double)
doubleprog_plus_lbrace = re.compile(group(DoubleLbrace, Double))
single3prog = re.compile(Single3)
single3prog_plus_lbrace = re.compile(group(Single3Lbrace, Single3))
double3prog = re.compile(Double3)
double3prog_plus_lbrace = re.compile(group(Double3Lbrace, Double3))
_strprefixes = _combinations("r", "R", "b", "B") | {"u", "U", "ur", "uR", "Ur", "UR"}
_fstring_prefixes = _combinations("r", "R", "f", "F") - {"r", "R"}
endprogs: Final = {
"'": re.compile(Single),
'"': re.compile(Double),
"'": singleprog,
'"': doubleprog,
"'''": single3prog,
'"""': double3prog,
**{f"{prefix}'": singleprog for prefix in _strprefixes},
**{f'{prefix}"': doubleprog for prefix in _strprefixes},
**{f"{prefix}'": singleprog_plus_lbrace for prefix in _fstring_prefixes},
**{f'{prefix}"': doubleprog_plus_lbrace for prefix in _fstring_prefixes},
**{f"{prefix}'''": single3prog for prefix in _strprefixes},
**{f'{prefix}"""': double3prog for prefix in _strprefixes},
**{f"{prefix}'''": single3prog_plus_lbrace for prefix in _fstring_prefixes},
**{f'{prefix}"""': double3prog_plus_lbrace for prefix in _fstring_prefixes},
}
triple_quoted: Final = (
{"'''", '"""'}
| {f"{prefix}'''" for prefix in _strprefixes}
| {f'{prefix}"""' for prefix in _strprefixes}
| {f"{prefix}'''" for prefix in _strprefixes | _fstring_prefixes}
| {f'{prefix}"""' for prefix in _strprefixes | _fstring_prefixes}
)
single_quoted: Final = (
{"'", '"'}
| {f"{prefix}'" for prefix in _strprefixes}
| {f'{prefix}"' for prefix in _strprefixes}
| {f"{prefix}'" for prefix in _strprefixes | _fstring_prefixes}
| {f'{prefix}"' for prefix in _strprefixes | _fstring_prefixes}
)
fstring_prefix: Final = (
{f"{prefix}'" for prefix in _fstring_prefixes}
| {f'{prefix}"' for prefix in _fstring_prefixes}
| {f"{prefix}'''" for prefix in _fstring_prefixes}
| {f'{prefix}"""' for prefix in _fstring_prefixes}
)
tabsize = 8
@ -415,6 +467,19 @@ def untokenize(iterable: Iterable[TokenInfo]) -> str:
return ut.untokenize(iterable)
def is_fstring_start(token: str) -> bool:
return builtins.any(token.startswith(prefix) for prefix in fstring_prefix)
def _split_fstring_start_and_middle(token: str) -> Tuple[str, str]:
for prefix in fstring_prefix:
_, prefix, rest = token.partition(prefix)
if prefix != "":
return prefix, rest
raise ValueError(f"Token {token!r} is not a valid f-string start")
def generate_tokens(
readline: Callable[[], str], grammar: Optional[Grammar] = None
) -> Iterator[GoodTokenInfo]:
@ -433,7 +498,12 @@ def generate_tokens(
and the line on which the token was found. The line passed is the
logical line; continuation lines are included.
"""
lnum = parenlev = continued = 0
lnum = parenlev = fstring_level = continued = 0
parenlev_stack: List[int] = []
inside_fstring_braces = False
inside_fstring_colon = False
formatspec = ""
bracelev = 0
numchars: Final[str] = "0123456789"
contstr, needcont = "", 0
contline: Optional[str] = None
@ -449,7 +519,8 @@ def generate_tokens(
async_def_nl = False
strstart: Tuple[int, int]
endprog: Pattern[str]
endprog_stack: List[Pattern[str]] = []
formatspec_start: Tuple[int, int]
while 1: # loop over lines in stream
try:
@ -463,16 +534,72 @@ def generate_tokens(
assert contline is not None
if not line:
raise TokenError("EOF in multi-line string", strstart)
endprog = endprog_stack[-1]
endmatch = endprog.match(line)
if endmatch:
pos = end = endmatch.end(0)
yield (
STRING,
contstr + line[:end],
strstart,
(lnum, end),
contline + line,
)
end = endmatch.end(0)
token = contstr + line[:end]
spos = strstart
epos = (lnum, end)
tokenline = contline + line
if fstring_level == 0 and not is_fstring_start(token):
yield (STRING, token, spos, epos, tokenline)
endprog_stack.pop()
parenlev = parenlev_stack.pop()
else:
if is_fstring_start(token):
fstring_level += 1
fstring_start, token = _split_fstring_start_and_middle(token)
fstring_start_epos = (lnum, spos[1] + len(fstring_start))
yield (
FSTRING_START,
fstring_start,
spos,
fstring_start_epos,
tokenline,
)
# increase spos to the end of the fstring start
spos = fstring_start_epos
if token.endswith("{"):
fstring_middle, lbrace = token[:-1], token[-1]
fstring_middle_epos = lbrace_spos = (lnum, end - 1)
yield (
FSTRING_MIDDLE,
fstring_middle,
spos,
fstring_middle_epos,
line,
)
yield (LBRACE, lbrace, lbrace_spos, epos, line)
inside_fstring_braces = True
else:
if token.endswith(('"""', "'''")):
fstring_middle, fstring_end = token[:-3], token[-3:]
fstring_middle_epos = end_spos = (lnum, end - 3)
else:
fstring_middle, fstring_end = token[:-1], token[-1]
fstring_middle_epos = end_spos = (lnum, end - 1)
yield (
FSTRING_MIDDLE,
fstring_middle,
spos,
fstring_middle_epos,
line,
)
yield (
FSTRING_END,
fstring_end,
end_spos,
epos,
line,
)
fstring_level -= 1
endprog_stack.pop()
parenlev = parenlev_stack.pop()
if fstring_level > 0:
inside_fstring_braces = True
pos = end
contstr, needcont = "", 0
contline = None
elif needcont and line[-2:] != "\\\n" and line[-3:] != "\\\r\n":
@ -491,7 +618,8 @@ def generate_tokens(
contline = contline + line
continue
elif parenlev == 0 and not continued: # new statement
# new statement
elif parenlev == 0 and not continued and not inside_fstring_braces:
if not line:
break
column = 0
@ -559,6 +687,98 @@ def generate_tokens(
continued = 0
while pos < max:
if fstring_level > 0 and not inside_fstring_braces:
endprog = endprog_stack[-1]
endmatch = endprog.match(line, pos)
if endmatch: # all on one line
start, end = endmatch.span(0)
token = line[start:end]
if token.endswith(('"""', "'''")):
middle_token, end_token = token[:-3], token[-3:]
middle_epos = end_spos = (lnum, end - 3)
else:
middle_token, end_token = token[:-1], token[-1]
middle_epos = end_spos = (lnum, end - 1)
# TODO: unsure if this can be safely removed
if stashed:
yield stashed
stashed = None
yield (
FSTRING_MIDDLE,
middle_token,
(lnum, pos),
middle_epos,
line,
)
if not token.endswith("{"):
yield (
FSTRING_END,
end_token,
end_spos,
(lnum, end),
line,
)
fstring_level -= 1
endprog_stack.pop()
parenlev = parenlev_stack.pop()
if fstring_level > 0:
inside_fstring_braces = True
else:
yield (LBRACE, "{", (lnum, end - 1), (lnum, end), line)
inside_fstring_braces = True
pos = end
continue
else: # multiple lines
strstart = (lnum, end)
contstr = line[end:]
contline = line
break
if inside_fstring_colon:
match = fstring_middle_after_colon.match(line, pos)
if match is None:
formatspec += line[pos:]
pos = max
continue
start, end = match.span(1)
token = line[start:end]
formatspec += token
brace_start, brace_end = match.span(2)
brace_or_nl = line[brace_start:brace_end]
if brace_or_nl == "\n":
pos = brace_end
yield (FSTRING_MIDDLE, formatspec, formatspec_start, (lnum, end), line)
formatspec = ""
if brace_or_nl == "{":
yield (OP, "{", (lnum, brace_start), (lnum, brace_end), line)
bracelev += 1
end = brace_end
inside_fstring_colon = False
pos = end
continue
if inside_fstring_braces and parenlev == 0:
match = bang.match(line, pos)
if match:
start, end = match.span(1)
yield (OP, "!", (lnum, start), (lnum, end), line)
pos = end
continue
match = colon.match(line, pos)
if match:
start, end = match.span(1)
yield (OP, ":", (lnum, start), (lnum, end), line)
inside_fstring_colon = True
formatspec_start = (lnum, end)
pos = end
continue
pseudomatch = pseudoprog.match(line, pos)
if pseudomatch: # scan for tokens
start, end = pseudomatch.span(1)
@ -571,7 +791,7 @@ def generate_tokens(
yield (NUMBER, token, spos, epos, line)
elif initial in "\r\n":
newline = NEWLINE
if parenlev > 0:
if parenlev > 0 or inside_fstring_braces:
newline = NL
elif async_def:
async_def_nl = True
@ -588,17 +808,72 @@ def generate_tokens(
yield (COMMENT, token, spos, epos, line)
elif token in triple_quoted:
endprog = endprogs[token]
endprog_stack.append(endprog)
parenlev_stack.append(parenlev)
parenlev = 0
if is_fstring_start(token):
yield (FSTRING_START, token, spos, epos, line)
fstring_level += 1
endmatch = endprog.match(line, pos)
if endmatch: # all on one line
pos = endmatch.end(0)
token = line[start:pos]
if stashed:
yield stashed
stashed = None
yield (STRING, token, spos, (lnum, pos), line)
if not is_fstring_start(token):
pos = endmatch.end(0)
token = line[start:pos]
epos = (lnum, pos)
yield (STRING, token, spos, epos, line)
endprog_stack.pop()
parenlev = parenlev_stack.pop()
else:
end = endmatch.end(0)
token = line[pos:end]
spos, epos = (lnum, pos), (lnum, end)
if not token.endswith("{"):
fstring_middle, fstring_end = token[:-3], token[-3:]
fstring_middle_epos = fstring_end_spos = (lnum, end - 3)
yield (
FSTRING_MIDDLE,
fstring_middle,
spos,
fstring_middle_epos,
line,
)
yield (
FSTRING_END,
fstring_end,
fstring_end_spos,
epos,
line,
)
fstring_level -= 1
endprog_stack.pop()
parenlev = parenlev_stack.pop()
if fstring_level > 0:
inside_fstring_braces = True
else:
fstring_middle, lbrace = token[:-1], token[-1]
fstring_middle_epos = lbrace_spos = (lnum, end - 1)
yield (
FSTRING_MIDDLE,
fstring_middle,
spos,
fstring_middle_epos,
line,
)
yield (LBRACE, lbrace, lbrace_spos, epos, line)
inside_fstring_braces = True
pos = end
else:
strstart = (lnum, start) # multiple lines
contstr = line[start:]
# multiple lines
if is_fstring_start(token):
strstart = (lnum, pos)
contstr = line[pos:]
else:
strstart = (lnum, start)
contstr = line[start:]
contline = line
break
elif (
@ -606,17 +881,18 @@ def generate_tokens(
or token[:2] in single_quoted
or token[:3] in single_quoted
):
maybe_endprog = (
endprogs.get(initial)
or endprogs.get(token[:2])
or endprogs.get(token[:3])
)
assert maybe_endprog is not None, f"endprog not found for {token}"
endprog = maybe_endprog
if token[-1] == "\n": # continued string
endprog_stack.append(endprog)
parenlev_stack.append(parenlev)
parenlev = 0
strstart = (lnum, start)
maybe_endprog = (
endprogs.get(initial)
or endprogs.get(token[1])
or endprogs.get(token[2])
)
assert (
maybe_endprog is not None
), f"endprog not found for {token}"
endprog = maybe_endprog
contstr, needcont = line[start:], 1
contline = line
break
@ -624,7 +900,57 @@ def generate_tokens(
if stashed:
yield stashed
stashed = None
yield (STRING, token, spos, epos, line)
if not is_fstring_start(token):
yield (STRING, token, spos, epos, line)
else:
if pseudomatch[20] is not None:
fstring_start = pseudomatch[20]
offset = pseudomatch.end(20) - pseudomatch.start(1)
elif pseudomatch[22] is not None:
fstring_start = pseudomatch[22]
offset = pseudomatch.end(22) - pseudomatch.start(1)
elif pseudomatch[24] is not None:
fstring_start = pseudomatch[24]
offset = pseudomatch.end(24) - pseudomatch.start(1)
else:
fstring_start = pseudomatch[26]
offset = pseudomatch.end(26) - pseudomatch.start(1)
start_epos = (lnum, start + offset)
yield (FSTRING_START, fstring_start, spos, start_epos, line)
fstring_level += 1
endprog = endprogs[fstring_start]
endprog_stack.append(endprog)
parenlev_stack.append(parenlev)
parenlev = 0
end_offset = pseudomatch.end(1) - 1
fstring_middle = line[start + offset : end_offset]
middle_spos = (lnum, start + offset)
middle_epos = (lnum, end_offset)
yield (
FSTRING_MIDDLE,
fstring_middle,
middle_spos,
middle_epos,
line,
)
if not token.endswith("{"):
end_spos = (lnum, end_offset)
end_epos = (lnum, end_offset + 1)
yield (FSTRING_END, token[-1], end_spos, end_epos, line)
fstring_level -= 1
endprog_stack.pop()
parenlev = parenlev_stack.pop()
if fstring_level > 0:
inside_fstring_braces = True
else:
end_spos = (lnum, end_offset)
end_epos = (lnum, end_offset + 1)
yield (LBRACE, "{", end_spos, end_epos, line)
inside_fstring_braces = True
elif initial.isidentifier(): # ordinary name
if token in ("async", "await"):
if async_keywords or async_def:
@ -669,8 +995,22 @@ def generate_tokens(
stashed = None
yield (NL, token, spos, (lnum, pos), line)
continued = 1
elif (
initial == "}"
and parenlev == 0
and bracelev == 0
and fstring_level > 0
):
yield (RBRACE, token, spos, epos, line)
inside_fstring_braces = False
else:
if initial in "([{":
if parenlev == 0 and bracelev > 0 and initial == "}":
bracelev -= 1
# if we're still inside fstrings, we're still part of the format spec
if inside_fstring_braces:
inside_fstring_colon = True
formatspec_start = (lnum, pos)
elif initial in "([{":
parenlev += 1
elif initial in ")]}":
parenlev -= 1
@ -689,6 +1029,8 @@ def generate_tokens(
for _indent in indents[1:]: # pop remaining indent levels
yield (DEDENT, "", (lnum, 0), (lnum, 0), "")
yield (ENDMARKER, "", (lnum, 0), (lnum, 0), "")
assert len(endprog_stack) == 0
assert len(parenlev_stack) == 0
if __name__ == "__main__": # testing

View File

@ -70,6 +70,10 @@ class _python_symbols(Symbols):
file_input: int
flow_stmt: int
for_stmt: int
fstring: int
fstring_format_spec: int
fstring_middle: int
fstring_replacement_field: int
funcdef: int
global_stmt: int
guard: int

224
tests/data/cases/pep_701.py Normal file
View File

@ -0,0 +1,224 @@
# flags: --minimum-version=3.12
x = f"foo"
x = f'foo'
x = f"""foo"""
x = f'''foo'''
x = f"foo {{ bar {{ baz"
x = f"foo {{ {2 + 2}bar {{ baz"
x = f'foo {{ {2 + 2}bar {{ baz'
x = f"""foo {{ {2 + 2}bar {{ baz"""
x = f'''foo {{ {2 + 2}bar {{ baz'''
# edge case: FSTRING_MIDDLE containing only whitespace should not be stripped
x = f"{a} {b}"
x = f"foo {
2 + 2
} bar baz"
x = f"foo {{ {"a {2 + 2} b"}bar {{ baz"
x = f"foo {{ {f'a {2 + 2} b'}bar {{ baz"
x = f"foo {{ {f"a {2 + 2} b"}bar {{ baz"
x = f"foo {{ {f'a {f"a {2 + 2} b"} b'}bar {{ baz"
x = f"foo {{ {f"a {f"a {2 + 2} b"} b"}bar {{ baz"
x = """foo {{ {2 + 2}bar
baz"""
x = f"""foo {{ {2 + 2}bar {{ baz"""
x = f"""foo {{ {
2 + 2
}bar {{ baz"""
x = f"""foo {{ {
2 + 2
}bar
baz"""
x = f"""foo {{ a
foo {2 + 2}bar {{ baz
x = f"foo {{ {
2 + 2 # comment
}bar"
{{ baz
}} buzz
{print("abc" + "def"
)}
abc"""
# edge case: end triple quotes at index zero
f"""foo {2+2} bar
"""
f' \' {f"'"} \' '
f" \" {f'"'} \" "
x = f"a{2+2:=^72}b"
x = f"a{2+2:x}b"
rf'foo'
rf'{foo}'
f"{x:{y}d}"
x = f"a{2+2:=^{x}}b"
x = f"a{2+2:=^{foo(x+y**2):something else}}b"
x = f"a{2+2:=^{foo(x+y**2):something else}one more}b"
f'{(abc:=10)}'
f"This is a really long string, but just make sure that you reflow fstrings {
2+2:d
}"
f"This is a really long string, but just make sure that you reflow fstrings correctly {2+2:d}"
f"{2+2=}"
f"{2+2 = }"
f"{ 2 + 2 = }"
f"""foo {
datetime.datetime.now():%Y
%m
%d
}"""
f"{
X
!r
}"
raise ValueError(
"xxxxxxxxxxxIncorrect --line-ranges format, expect START-END, found"
f" {lines_str!r}"
)
f"`escape` only permitted in {{'html', 'latex', 'latex-math'}}, \
got {escape}"
x = f'\N{GREEK CAPITAL LETTER DELTA} \N{SNOWMAN} {x}'
fr'\{{\}}'
f"""
WITH {f'''
{1}_cte AS ()'''}
"""
# output
x = f"foo"
x = f"foo"
x = f"""foo"""
x = f"""foo"""
x = f"foo {{ bar {{ baz"
x = f"foo {{ {2 + 2}bar {{ baz"
x = f"foo {{ {2 + 2}bar {{ baz"
x = f"""foo {{ {2 + 2}bar {{ baz"""
x = f"""foo {{ {2 + 2}bar {{ baz"""
# edge case: FSTRING_MIDDLE containing only whitespace should not be stripped
x = f"{a} {b}"
x = f"foo {
2 + 2
} bar baz"
x = f"foo {{ {"a {2 + 2} b"}bar {{ baz"
x = f"foo {{ {f'a {2 + 2} b'}bar {{ baz"
x = f"foo {{ {f"a {2 + 2} b"}bar {{ baz"
x = f"foo {{ {f'a {f"a {2 + 2} b"} b'}bar {{ baz"
x = f"foo {{ {f"a {f"a {2 + 2} b"} b"}bar {{ baz"
x = """foo {{ {2 + 2}bar
baz"""
x = f"""foo {{ {2 + 2}bar {{ baz"""
x = f"""foo {{ {
2 + 2
}bar {{ baz"""
x = f"""foo {{ {
2 + 2
}bar
baz"""
x = f"""foo {{ a
foo {2 + 2}bar {{ baz
x = f"foo {{ {
2 + 2 # comment
}bar"
{{ baz
}} buzz
{print("abc" + "def"
)}
abc"""
# edge case: end triple quotes at index zero
f"""foo {2+2} bar
"""
f' \' {f"'"} \' '
f" \" {f'"'} \" "
x = f"a{2+2:=^72}b"
x = f"a{2+2:x}b"
rf"foo"
rf"{foo}"
f"{x:{y}d}"
x = f"a{2+2:=^{x}}b"
x = f"a{2+2:=^{foo(x+y**2):something else}}b"
x = f"a{2+2:=^{foo(x+y**2):something else}one more}b"
f"{(abc:=10)}"
f"This is a really long string, but just make sure that you reflow fstrings {
2+2:d
}"
f"This is a really long string, but just make sure that you reflow fstrings correctly {2+2:d}"
f"{2+2=}"
f"{2+2 = }"
f"{ 2 + 2 = }"
f"""foo {
datetime.datetime.now():%Y
%m
%d
}"""
f"{
X
!r
}"
raise ValueError(
"xxxxxxxxxxxIncorrect --line-ranges format, expect START-END, found"
f" {lines_str!r}"
)
f"`escape` only permitted in {{'html', 'latex', 'latex-math'}}, \
got {escape}"
x = f"\N{GREEK CAPITAL LETTER DELTA} \N{SNOWMAN} {x}"
rf"\{{\}}"
f"""
WITH {f'''
{1}_cte AS ()'''}
"""

View File

@ -229,8 +229,34 @@ file_input
LPAR
'('
arglist
STRING
"f'{indent}{_type}'"
fstring
FSTRING_START
"f'"
FSTRING_MIDDLE
''
fstring_replacement_field
LBRACE
'{'
NAME
'indent'
RBRACE
'}'
/fstring_replacement_field
FSTRING_MIDDLE
''
fstring_replacement_field
LBRACE
'{'
NAME
'_type'
RBRACE
'}'
/fstring_replacement_field
FSTRING_MIDDLE
''
FSTRING_END
"'"
/fstring
COMMA
','
argument
@ -370,8 +396,34 @@ file_input
LPAR
'('
arglist
STRING
"f'{indent}/{_type}'"
fstring
FSTRING_START
"f'"
FSTRING_MIDDLE
''
fstring_replacement_field
LBRACE
'{'
NAME
'indent'
RBRACE
'}'
/fstring_replacement_field
FSTRING_MIDDLE
'/'
fstring_replacement_field
LBRACE
'{'
NAME
'_type'
RBRACE
'}'
/fstring_replacement_field
FSTRING_MIDDLE
''
FSTRING_END
"'"
/fstring
COMMA
','
argument
@ -494,8 +546,34 @@ file_input
LPAR
'('
arglist
STRING
"f'{indent}{_type}'"
fstring
FSTRING_START
"f'"
FSTRING_MIDDLE
''
fstring_replacement_field
LBRACE
'{'
NAME
'indent'
RBRACE
'}'
/fstring_replacement_field
FSTRING_MIDDLE
''
fstring_replacement_field
LBRACE
'{'
NAME
'_type'
RBRACE
'}'
/fstring_replacement_field
FSTRING_MIDDLE
''
FSTRING_END
"'"
/fstring
COMMA
','
argument
@ -557,8 +635,36 @@ file_input
LPAR
'('
arglist
STRING
"f' {node.prefix!r}'"
fstring
FSTRING_START
"f'"
FSTRING_MIDDLE
' '
fstring_replacement_field
LBRACE
'{'
power
NAME
'node'
trailer
DOT
'.'
NAME
'prefix'
/trailer
/power
BANG
'!'
NAME
'r'
RBRACE
'}'
/fstring_replacement_field
FSTRING_MIDDLE
''
FSTRING_END
"'"
/fstring
COMMA
','
argument
@ -613,8 +719,36 @@ file_input
LPAR
'('
arglist
STRING
"f' {node.value!r}'"
fstring
FSTRING_START
"f'"
FSTRING_MIDDLE
' '
fstring_replacement_field
LBRACE
'{'
power
NAME
'node'
trailer
DOT
'.'
NAME
'value'
/trailer
/power
BANG
'!'
NAME
'r'
RBRACE
'}'
/fstring_replacement_field
FSTRING_MIDDLE
''
FSTRING_END
"'"
/fstring
COMMA
','
argument

View File

@ -343,12 +343,11 @@ def test_detect_debug_f_strings(self) -> None:
features = black.get_features_used(root)
self.assertNotIn(black.Feature.DEBUG_F_STRINGS, features)
# We don't yet support feature version detection in nested f-strings
root = black.lib2to3_parse(
"""f"heard a rumour that { f'{1+1=}' } ... seems like it could be true" """
)
features = black.get_features_used(root)
self.assertNotIn(black.Feature.DEBUG_F_STRINGS, features)
self.assertIn(black.Feature.DEBUG_F_STRINGS, features)
@patch("black.dump_to_file", dump_to_stderr)
def test_string_quotes(self) -> None: