Improve multiline string handling (#1879)

Co-authored-by: Olivia Hong <ohong@lyft.com>
Co-authored-by: Olivia Hong <24500729+olivia-hong@users.noreply.github.com>
This commit is contained in:
Aneesh Agrawal 2023-03-07 14:52:19 -05:00 committed by GitHub
parent 25d886f52c
commit 4a063a9f8d
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
6 changed files with 514 additions and 33 deletions

View File

@ -133,6 +133,7 @@ versions separately.
code. Implicitly concatenated f-strings with different quotes can now be merged or
quote-normalized by changing the quotes used in expressions. (#3509)
- Fix crash on `await (yield)` when Black is compiled with mypyc (#3533)
- Improve handling of multiline strings by changing line split behavior (#1879)
### Configuration

View File

@ -111,3 +111,51 @@ my_dict = {
"another key": short_value,
}
```
### Improved multiline string handling
_Black_ is smarter when formatting multiline strings, especially in function arguments,
to avoid introducing extra line breaks. Previously, it would always consider multiline
strings as not fitting on a single line. With this new feature, _Black_ looks at the
context around the multiline string to decide if it should be inlined or split to a
separate line. For example, when a multiline string is passed to a function, _Black_
will only split the multiline string if a line is too long or if multiple arguments are
being passed.
For example, _Black_ will reformat
```python
textwrap.dedent(
"""\
This is a
multiline string
"""
)
```
to:
```python
textwrap.dedent("""\
This is a
multiline string
""")
```
And:
```python
MULTILINE = """
foobar
""".replace(
"\n", ""
)
```
to:
```python
MULTILINE = """
foobar
""".replace("\n", "")
```

View File

@ -2,7 +2,7 @@
Generating lines of code.
"""
import sys
from dataclasses import dataclass
from dataclasses import dataclass, replace
from enum import Enum, auto
from functools import partial, wraps
from typing import Collection, Iterator, List, Optional, Set, Union, cast
@ -505,7 +505,7 @@ def transform_line(
and not line.should_split_rhs
and not line.magic_trailing_comma
and (
is_line_short_enough(line, line_length=mode.line_length, line_str=line_str)
is_line_short_enough(line, mode=mode, line_str=line_str)
or line.contains_unsplittable_type_ignore()
)
and not (line.inside_brackets and line.contains_standalone_comments())
@ -529,14 +529,12 @@ def _rhs(
bracket pair instead.
"""
for omit in generate_trailers_to_omit(line, mode.line_length):
lines = list(
right_hand_split(line, mode.line_length, features, omit=omit)
)
lines = list(right_hand_split(line, mode, features, omit=omit))
# Note: this check is only able to figure out if the first line of the
# *current* transformation fits in the line length. This is true only
# for simple cases. All others require running more transforms via
# `transform_line()`. This check doesn't know if those would succeed.
if is_line_short_enough(lines[0], line_length=mode.line_length):
if is_line_short_enough(lines[0], mode=mode):
yield from lines
return
@ -544,9 +542,7 @@ def _rhs(
# This mostly happens to multiline strings that are by definition
# reported as not fitting a single line, as well as lines that contain
# trailing commas (those have to be exploded).
yield from right_hand_split(
line, line_length=mode.line_length, features=features
)
yield from right_hand_split(line, mode, features=features)
# HACK: nested functions (like _rhs) compiled by mypyc don't retain their
# __name__ attribute which is needed in `run_transformer` further down.
@ -664,7 +660,7 @@ class _RHSResult:
def right_hand_split(
line: Line,
line_length: int,
mode: Mode,
features: Collection[Feature] = (),
omit: Collection[LeafID] = (),
) -> Iterator[Line]:
@ -678,7 +674,7 @@ def right_hand_split(
"""
rhs_result = _first_right_hand_split(line, omit=omit)
yield from _maybe_split_omitting_optional_parens(
rhs_result, line, line_length, features=features, omit=omit
rhs_result, line, mode, features=features, omit=omit
)
@ -733,7 +729,7 @@ def _first_right_hand_split(
def _maybe_split_omitting_optional_parens(
rhs: _RHSResult,
line: Line,
line_length: int,
mode: Mode,
features: Collection[Feature] = (),
omit: Collection[LeafID] = (),
) -> Iterator[Line]:
@ -751,7 +747,7 @@ def _maybe_split_omitting_optional_parens(
# there are no standalone comments in the body
and not rhs.body.contains_standalone_comments(0)
# and we can actually remove the parens
and can_omit_invisible_parens(rhs.body, line_length)
and can_omit_invisible_parens(rhs.body, mode.line_length)
):
omit = {id(rhs.closing_bracket), *omit}
try:
@ -766,23 +762,24 @@ def _maybe_split_omitting_optional_parens(
and any(leaf.type in BRACKETS for leaf in rhs.head.leaves[:-1])
# the left side of assignment is short enough (the -1 is for the ending
# optional paren)
and is_line_short_enough(rhs.head, line_length=line_length - 1)
and is_line_short_enough(
rhs.head, mode=replace(mode, line_length=mode.line_length - 1)
)
# the left side of assignment won't explode further because of magic
# trailing comma
and rhs.head.magic_trailing_comma is None
# the split by omitting optional parens isn't preferred by some other
# reason
and not _prefer_split_rhs_oop(rhs_oop, line_length=line_length)
and not _prefer_split_rhs_oop(rhs_oop, mode)
):
yield from _maybe_split_omitting_optional_parens(
rhs_oop, line, line_length, features=features, omit=omit
rhs_oop, line, mode, features=features, omit=omit
)
return
except CannotSplit as e:
if not (
can_be_split(rhs.body)
or is_line_short_enough(rhs.body, line_length=line_length)
can_be_split(rhs.body) or is_line_short_enough(rhs.body, mode=mode)
):
raise CannotSplit(
"Splitting failed, body is still too long and can't be split."
@ -806,7 +803,7 @@ def _maybe_split_omitting_optional_parens(
yield result
def _prefer_split_rhs_oop(rhs_oop: _RHSResult, line_length: int) -> bool:
def _prefer_split_rhs_oop(rhs_oop: _RHSResult, mode: Mode) -> bool:
"""
Returns whether we should prefer the result from a split omitting optional parens.
"""
@ -826,7 +823,7 @@ def _prefer_split_rhs_oop(rhs_oop: _RHSResult, line_length: int) -> bool:
# the first line still contains the `=`)
any(leaf.type == token.EQUAL for leaf in rhs_oop.head.leaves)
# the first line is short enough
and is_line_short_enough(rhs_oop.head, line_length=line_length)
and is_line_short_enough(rhs_oop.head, mode=mode)
)
# contains unsplittable type ignore
or rhs_oop.head.contains_unsplittable_type_ignore()
@ -1525,7 +1522,7 @@ def run_transformer(
or line.contains_multiline_strings()
or result[0].contains_uncollapsable_type_comments()
or result[0].contains_unsplittable_type_ignore()
or is_line_short_enough(result[0], line_length=mode.line_length)
or is_line_short_enough(result[0], mode=mode)
# If any leaves have no parents (which _can_ occur since
# `transform(line)` potentially destroys the line's underlying node
# structure), then we can't proceed. Doing so would cause the below
@ -1540,8 +1537,6 @@ def run_transformer(
second_opinion = run_transformer(
line_copy, transform, mode, features_fop, line_str=line_str
)
if all(
is_line_short_enough(ln, line_length=mode.line_length) for ln in second_opinion
):
if all(is_line_short_enough(ln, mode=mode) for ln in second_opinion):
result = second_opinion
return result

View File

@ -1,4 +1,5 @@
import itertools
import math
import sys
from dataclasses import dataclass, field
from typing import (
@ -10,11 +11,12 @@
Sequence,
Tuple,
TypeVar,
Union,
cast,
)
from black.brackets import DOT_PRIORITY, BracketTracker
from black.mode import Mode
from black.mode import Mode, Preview
from black.nodes import (
BRACKETS,
CLOSING_BRACKETS,
@ -37,6 +39,7 @@
T = TypeVar("T")
Index = int
LeafID = int
LN = Union[Leaf, Node]
@dataclass
@ -701,18 +704,93 @@ def append_leaves(
new_line.append(comment_leaf, preformatted=True)
def is_line_short_enough(line: Line, *, line_length: int, line_str: str = "") -> bool:
"""Return True if `line` is no longer than `line_length`.
def is_line_short_enough( # noqa: C901
line: Line, *, mode: Mode, line_str: str = ""
) -> bool:
"""For non-multiline strings, return True if `line` is no longer than `line_length`.
For multiline strings, looks at the context around `line` to determine
if it should be inlined or split up.
Uses the provided `line_str` rendering, if any, otherwise computes a new one.
"""
if not line_str:
line_str = line_to_string(line)
return (
len(line_str) <= line_length
and "\n" not in line_str # multiline strings
and not line.contains_standalone_comments()
)
if Preview.multiline_string_handling not in mode:
return (
len(line_str) <= mode.line_length
and "\n" not in line_str # multiline strings
and not line.contains_standalone_comments()
)
if line.contains_standalone_comments():
return False
if "\n" not in line_str:
# No multiline strings (MLS) present
return len(line_str) <= mode.line_length
first, *_, last = line_str.split("\n")
if len(first) > mode.line_length or len(last) > mode.line_length:
return False
# Traverse the AST to examine the context of the multiline string (MLS),
# tracking aspects such as depth and comma existence,
# to determine whether to split the MLS or keep it together.
# Depth (which is based on the existing bracket_depth concept)
# is needed to determine nesting level of the MLS.
# Includes special case for trailing commas.
commas: List[int] = [] # tracks number of commas per depth level
multiline_string: Optional[Leaf] = None
# store the leaves that contain parts of the MLS
multiline_string_contexts: List[LN] = []
max_level_to_update = math.inf # track the depth of the MLS
for i, leaf in enumerate(line.leaves):
if max_level_to_update == math.inf:
had_comma: Optional[int] = None
if leaf.bracket_depth + 1 > len(commas):
commas.append(0)
elif leaf.bracket_depth + 1 < len(commas):
had_comma = commas.pop()
if (
had_comma is not None
and multiline_string is not None
and multiline_string.bracket_depth == leaf.bracket_depth + 1
):
# Have left the level with the MLS, stop tracking commas
max_level_to_update = leaf.bracket_depth
if had_comma > 0:
# MLS was in parens with at least one comma - force split
return False
if leaf.bracket_depth <= max_level_to_update and leaf.type == token.COMMA:
# Ignore non-nested trailing comma
# directly after MLS/MLS-containing expression
ignore_ctxs: List[Optional[LN]] = [None]
ignore_ctxs += multiline_string_contexts
if not (leaf.prev_sibling in ignore_ctxs and i == len(line.leaves) - 1):
commas[leaf.bracket_depth] += 1
if max_level_to_update != math.inf:
max_level_to_update = min(max_level_to_update, leaf.bracket_depth)
if is_multiline_string(leaf):
if len(multiline_string_contexts) > 0:
# >1 multiline string cannot fit on a single line - force split
return False
multiline_string = leaf
ctx: LN = leaf
# fetch the leaf components of the MLS in the AST
while str(ctx) in line_str:
multiline_string_contexts.append(ctx)
if ctx.parent is None:
break
ctx = ctx.parent
# May not have a triple-quoted multiline string at all,
# in case of a regular string with embedded newlines and line continuations
if len(multiline_string_contexts) == 0:
return True
return all(val == 0 for val in commas)
def can_be_split(line: Line) -> bool:

View File

@ -155,6 +155,7 @@ class Preview(Enum):
add_trailing_comma_consistently = auto()
hex_codes_in_unicode_sequences = auto()
multiline_string_handling = auto()
prefer_splitting_right_hand_side_of_assignments = auto()
# NOTE: string_processing requires wrap_long_dict_values_in_parens
# for https://github.com/psf/black/issues/3117 to be fixed.

View File

@ -0,0 +1,358 @@
"""cow
say""",
call(3, "dogsay", textwrap.dedent("""dove
coo""" % "cowabunga"))
call(3, "dogsay", textwrap.dedent("""dove
coo""" % "cowabunga"))
call(3, textwrap.dedent("""cow
moo""" % "cowabunga"), "dogsay")
call(3, "dogsay", textwrap.dedent("""crow
caw""" % "cowabunga"),)
call(3, textwrap.dedent("""cat
meow""" % "cowabunga"), {"dog", "say"})
call(3, {"dog", "say"}, textwrap.dedent("""horse
neigh""" % "cowabunga"))
call(3, {"dog", "say"}, textwrap.dedent("""pig
oink""" % "cowabunga"),)
textwrap.dedent("""A one-line triple-quoted string.""")
textwrap.dedent("""A two-line triple-quoted string
since it goes to the next line.""")
textwrap.dedent("""A three-line triple-quoted string
that not only goes to the next line
but also goes one line beyond.""")
textwrap.dedent("""\
A triple-quoted string
actually leveraging the textwrap.dedent functionality
that ends in a trailing newline,
representing e.g. file contents.
""")
path.write_text(textwrap.dedent("""\
A triple-quoted string
actually leveraging the textwrap.dedent functionality
that ends in a trailing newline,
representing e.g. file contents.
"""))
path.write_text(textwrap.dedent("""\
A triple-quoted string
actually leveraging the textwrap.dedent functionality
that ends in a trailing newline,
representing e.g. {config_filename} file contents.
""".format("config_filename", config_filename)))
# Another use case
data = yaml.load("""\
a: 1
b: 2
""")
data = yaml.load("""\
a: 1
b: 2
""",)
data = yaml.load(
"""\
a: 1
b: 2
"""
)
MULTILINE = """
foo
""".replace("\n", "")
generated_readme = lambda project_name: """
{}
<Add content here!>
""".strip().format(project_name)
parser.usage += """
Custom extra help summary.
Extra test:
- with
- bullets
"""
def get_stuff(cr, value):
# original
cr.execute("""
SELECT whatever
FROM some_table t
WHERE id = %s
""", [value])
return cr.fetchone()
def get_stuff(cr, value):
# preferred
cr.execute(
"""
SELECT whatever
FROM some_table t
WHERE id = %s
""",
[value],
)
return cr.fetchone()
call(arg1, arg2, """
short
""", arg3=True)
test_vectors = [
"one-liner\n",
"two\nliner\n",
"""expressed
as a three line
mulitline string""",
]
_wat = re.compile(
r"""
regex
""",
re.MULTILINE | re.VERBOSE,
)
dis_c_instance_method = """\
%3d 0 LOAD_FAST 1 (x)
2 LOAD_CONST 1 (1)
4 COMPARE_OP 2 (==)
6 LOAD_FAST 0 (self)
8 STORE_ATTR 0 (x)
10 LOAD_CONST 0 (None)
12 RETURN_VALUE
""" % (_C.__init__.__code__.co_firstlineno + 1,)
path.write_text(textwrap.dedent("""\
A triple-quoted string
actually {verb} the textwrap.dedent functionality
that ends in a trailing newline,
representing e.g. {file_type} file contents.
""".format(verb="using", file_type="test")))
{"""cow
moos"""}
["""cow
moos"""]
["""cow
moos""", """dog
woofs
and
barks"""]
def dastardly_default_value(
cow: String = json.loads("""this
is
quite
the
dastadardly
value!"""),
**kwargs,
):
pass
print(f"""
This {animal}
moos and barks
{animal} say
""")
msg = f"""The arguments {bad_arguments} were passed in.
Please use `--build-option` instead,
`--global-option` is reserved to flags like `--verbose` or `--quiet`.
"""
# output
"""cow
say""",
call(
3,
"dogsay",
textwrap.dedent("""dove
coo""" % "cowabunga"),
)
call(
3,
"dogsay",
textwrap.dedent("""dove
coo""" % "cowabunga"),
)
call(
3,
textwrap.dedent("""cow
moo""" % "cowabunga"),
"dogsay",
)
call(
3,
"dogsay",
textwrap.dedent("""crow
caw""" % "cowabunga"),
)
call(
3,
textwrap.dedent("""cat
meow""" % "cowabunga"),
{"dog", "say"},
)
call(
3,
{"dog", "say"},
textwrap.dedent("""horse
neigh""" % "cowabunga"),
)
call(
3,
{"dog", "say"},
textwrap.dedent("""pig
oink""" % "cowabunga"),
)
textwrap.dedent("""A one-line triple-quoted string.""")
textwrap.dedent("""A two-line triple-quoted string
since it goes to the next line.""")
textwrap.dedent("""A three-line triple-quoted string
that not only goes to the next line
but also goes one line beyond.""")
textwrap.dedent("""\
A triple-quoted string
actually leveraging the textwrap.dedent functionality
that ends in a trailing newline,
representing e.g. file contents.
""")
path.write_text(textwrap.dedent("""\
A triple-quoted string
actually leveraging the textwrap.dedent functionality
that ends in a trailing newline,
representing e.g. file contents.
"""))
path.write_text(textwrap.dedent("""\
A triple-quoted string
actually leveraging the textwrap.dedent functionality
that ends in a trailing newline,
representing e.g. {config_filename} file contents.
""".format("config_filename", config_filename)))
# Another use case
data = yaml.load("""\
a: 1
b: 2
""")
data = yaml.load(
"""\
a: 1
b: 2
""",
)
data = yaml.load("""\
a: 1
b: 2
""")
MULTILINE = """
foo
""".replace("\n", "")
generated_readme = lambda project_name: """
{}
<Add content here!>
""".strip().format(project_name)
parser.usage += """
Custom extra help summary.
Extra test:
- with
- bullets
"""
def get_stuff(cr, value):
# original
cr.execute(
"""
SELECT whatever
FROM some_table t
WHERE id = %s
""",
[value],
)
return cr.fetchone()
def get_stuff(cr, value):
# preferred
cr.execute(
"""
SELECT whatever
FROM some_table t
WHERE id = %s
""",
[value],
)
return cr.fetchone()
call(
arg1,
arg2,
"""
short
""",
arg3=True,
)
test_vectors = [
"one-liner\n",
"two\nliner\n",
"""expressed
as a three line
mulitline string""",
]
_wat = re.compile(
r"""
regex
""",
re.MULTILINE | re.VERBOSE,
)
dis_c_instance_method = """\
%3d 0 LOAD_FAST 1 (x)
2 LOAD_CONST 1 (1)
4 COMPARE_OP 2 (==)
6 LOAD_FAST 0 (self)
8 STORE_ATTR 0 (x)
10 LOAD_CONST 0 (None)
12 RETURN_VALUE
""" % (_C.__init__.__code__.co_firstlineno + 1,)
path.write_text(textwrap.dedent("""\
A triple-quoted string
actually {verb} the textwrap.dedent functionality
that ends in a trailing newline,
representing e.g. {file_type} file contents.
""".format(verb="using", file_type="test")))
{"""cow
moos"""}
["""cow
moos"""]
[
"""cow
moos""",
"""dog
woofs
and
barks""",
]
def dastardly_default_value(
cow: String = json.loads("""this
is
quite
the
dastadardly
value!"""),
**kwargs,
):
pass
print(f"""
This {animal}
moos and barks
{animal} say
""")
msg = f"""The arguments {bad_arguments} were passed in.
Please use `--build-option` instead,
`--global-option` is reserved to flags like `--verbose` or `--quiet`.
"""