fix: Don't remove parenthesis around long dictionary values (#4377)

2025-01-17 00:09:22 -06:00 · 2025-01-17 00:09:22 -06:00 · 584d0331c8
commit 584d0331c8
parent 6e9654065c
5 changed files with 290 additions and 66 deletions
--- a/CHANGES.md
+++ b/CHANGES.md
@ -24,6 +24,8 @@
  (#4498)
 - Remove parentheses around sole list items (#4312)
 - Collapse multiple empty lines after an import into one (#4489)
 - Prevent `string_processing` and `wrap_long_dict_values_in_parens` from removing
  parentheses around long dictionary values (#4377)
 ### Configuration
@ -43,6 +45,7 @@
 ### Performance
 <!-- Changes that improve Black's performance. -->
 - Speed up the `is_fstring_start` function in Black's tokenizer (#4541)
 ### Output
--- a/src/black/linegen.py
+++ b/src/black/linegen.py
@ -973,29 +973,7 @@ def _maybe_split_omitting_optional_parens(
        try:
            # The RHSResult Omitting Optional Parens.
            rhs_oop = _first_right_hand_split(line, omit=omit)
-            is_split_right_after_equal = (
+            if _prefer_split_rhs_oop_over_rhs(rhs_oop, rhs, mode):
                len(rhs.head.leaves) >= 2 and rhs.head.leaves[-2].type == token.EQUAL
            )
            rhs_head_contains_brackets = any(
                leaf.type in BRACKETS for leaf in rhs.head.leaves[:-1]
            )
            # the -1 is for the ending optional paren
            rhs_head_short_enough = is_line_short_enough(
                rhs.head, mode=replace(mode, line_length=mode.line_length - 1)
            )
            rhs_head_explode_blocked_by_magic_trailing_comma = (
                rhs.head.magic_trailing_comma is None
            )
            if (
                not (
                    is_split_right_after_equal
                    and rhs_head_contains_brackets
                    and rhs_head_short_enough
                    and rhs_head_explode_blocked_by_magic_trailing_comma
                )
                # the omit optional parens split is preferred by some other reason
                or _prefer_split_rhs_oop_over_rhs(rhs_oop, rhs, mode)
            ):
                yield from _maybe_split_omitting_optional_parens(
                    rhs_oop, line, mode, features=features, omit=omit
                )
@ -1006,8 +984,15 @@ def _maybe_split_omitting_optional_parens(
            if line.is_chained_assignment:
                pass
-            elif not can_be_split(rhs.body) and not is_line_short_enough(
+            elif (
-                rhs.body, mode=mode
+                not can_be_split(rhs.body)
                and not is_line_short_enough(rhs.body, mode=mode)
                and not (
                    Preview.wrap_long_dict_values_in_parens
                    and rhs.opening_bracket.parent
                    and rhs.opening_bracket.parent.parent
                    and rhs.opening_bracket.parent.parent.type == syms.dictsetmaker
                )
            ):
                raise CannotSplit(
                    "Splitting failed, body is still too long and can't be split."
@ -1038,6 +1023,44 @@ def _prefer_split_rhs_oop_over_rhs(
    Returns whether we should prefer the result from a split omitting optional parens
    (rhs_oop) over the original (rhs).
    """
    # contains unsplittable type ignore
    if (
        rhs_oop.head.contains_unsplittable_type_ignore()
        or rhs_oop.body.contains_unsplittable_type_ignore()
        or rhs_oop.tail.contains_unsplittable_type_ignore()
    ):
        return True
    # Retain optional parens around dictionary values
    if (
        Preview.wrap_long_dict_values_in_parens
        and rhs.opening_bracket.parent
        and rhs.opening_bracket.parent.parent
        and rhs.opening_bracket.parent.parent.type == syms.dictsetmaker
        and rhs.body.bracket_tracker.delimiters
    ):
        # Unless the split is inside the key
        return any(leaf.type == token.COLON for leaf in rhs_oop.tail.leaves)
    # the split is right after `=`
    if not (len(rhs.head.leaves) >= 2 and rhs.head.leaves[-2].type == token.EQUAL):
        return True
    # the left side of assignment contains brackets
    if not any(leaf.type in BRACKETS for leaf in rhs.head.leaves[:-1]):
        return True
    # the left side of assignment is short enough (the -1 is for the ending optional
    # paren)
    if not is_line_short_enough(
        rhs.head, mode=replace(mode, line_length=mode.line_length - 1)
    ):
        return True
    # the left side of assignment won't explode further because of magic trailing comma
    if rhs.head.magic_trailing_comma is not None:
        return True
    # If we have multiple targets, we prefer more `=`s on the head vs pushing them to
    # the body
    rhs_head_equal_count = [leaf.type for leaf in rhs.head.leaves].count(token.EQUAL)
@ -1065,10 +1088,6 @@ def _prefer_split_rhs_oop_over_rhs(
            # the first line is short enough
            and is_line_short_enough(rhs_oop.head, mode=mode)
        )
        # contains unsplittable type ignore
        or rhs_oop.head.contains_unsplittable_type_ignore()
        or rhs_oop.body.contains_unsplittable_type_ignore()
        or rhs_oop.tail.contains_unsplittable_type_ignore()
    )
--- a/src/black/trans.py
+++ b/src/black/trans.py
@ -856,7 +856,7 @@ def _validate_msg(line: Line, string_idx: int) -> TResult[None]:
            ):
                return TErr(
                    "StringMerger does NOT merge f-strings with different quote types"
-                    "and internal quotes."
+                    " and internal quotes."
                )
            if id(leaf) in line.comments:
@ -887,6 +887,7 @@ class StringParenStripper(StringTransformer):
        The line contains a string which is surrounded by parentheses and:
            - The target string is NOT the only argument to a function call.
            - The target string is NOT a "pointless" string.
            - The target string is NOT a dictionary value.
            - If the target string contains a PERCENT, the brackets are not
              preceded or followed by an operator with higher precedence than
              PERCENT.
@ -934,11 +935,14 @@ def do_match(self, line: Line) -> TMatchResult:
            ):
                continue
-            # That LPAR should NOT be preceded by a function name or a closing
+            # That LPAR should NOT be preceded by a colon (which could be a
-            # bracket (which could be a function which returns a function or a
+            # dictionary value), function name, or a closing bracket (which
-            # list/dictionary that contains a function)...
+            # could be a function returning a function or a list/dictionary
            # containing a function)...
            if is_valid_index(idx - 2) and (
-                LL[idx - 2].type == token.NAME or LL[idx - 2].type in CLOSING_BRACKETS
+                LL[idx - 2].type == token.COLON
                or LL[idx - 2].type == token.NAME
                or LL[idx - 2].type in CLOSING_BRACKETS
            ):
                continue
@ -2264,12 +2268,12 @@ def do_transform(
            elif right_leaves and right_leaves[-1].type == token.RPAR:
                # Special case for lambda expressions as dict's value, e.g.:
                #     my_dict = {
-                #        "key": lambda x: f"formatted: {x},
+                #        "key": lambda x: f"formatted: {x}",
                #     }
                # After wrapping the dict's value with parentheses, the string is
                # followed by a RPAR but its opening bracket is lambda's, not
                # the string's:
-                #        "key": (lambda x: f"formatted: {x}),
+                #        "key": (lambda x: f"formatted: {x}"),
                opening_bracket = right_leaves[-1].opening_bracket
                if opening_bracket is not None and opening_bracket in left_leaves:
                    index = left_leaves.index(opening_bracket)
--- a/tests/data/cases/preview_long_dict_values.py
+++ b/tests/data/cases/preview_long_dict_values.py
@ -1,4 +1,25 @@
 # flags: --unstable
 x = {
    "xx_xxxxx_xxxxxxxxxx_xxxxxxxxx_xx": (
        "xx:xxxxxxxxxxxxxxxxx_xxxxx_xxxxxxx_xxxxxxxxxxx{xx}xxx_xxxxx_xxxxxxxxx_xxxxxxxxxxxx_xxxx"
    )
 }
 x = {
    "xx_xxxxx_xxxxxxxxxx_xxxxxxxxx_xx": (
        "xx:xxxxxxxxxxxxxxxxx_xxxxx_xxxxxxx_xxxxxxxxxxx{xx}xxx_xxxxx_xxxxxxxxx_xxxxxxxxxxxx_xxxx"
    ),
 }
 x = {
    "foo": bar,
    "foo": bar,
    "foo": (
        xx_xxxxxxxxxxxxxxxxx_xxxxx_xxxxxxx_xxxxxxxxxxxxxx_xxxxx_xxxxxxxxx_xxxxxxxxxxxx_xxxx
    ),
 }
 x = {
    "xx_xxxxx_xxxxxxxxxx_xxxxxxxxx_xx": "xx:xxxxxxxxxxxxxxxxx_xxxxx_xxxxxxx_xxxxxxxxxx"
 }
 my_dict = {
    "something_something":
        r"Lorem ipsum dolor sit amet, an sed convenire eloquentiam \t"
@ -6,23 +27,90 @@
        r"signiferumque, duo ea vocibus consetetur scriptorem. Facer \t",
 }
 # Function calls as keys
 tasks = {
    get_key_name(
        foo,
        bar,
        baz,
    ): src,
    loop.run_in_executor(): src,
    loop.run_in_executor(xx_xxxxxxxxxxxxxxxxx_xxxxx_xxxxxxx_xxxxxxxxxxxxxx): src,
    loop.run_in_executor(
        xx_xxxxxxxxxxxxxxxxx_xxxxx_xxxxxxx_xxxxxxxxxxxxxx_xxxxx_xxxxx
    ): src,
    loop.run_in_executor(): (
        xx_xxxxxxxxxxxxxxxxx_xxxxx_xxxxxxx_xxxxxxxxxxxxxx_xxxxx_xxxxxxxxx_xxxxxxxxxxxx_xxxx
    ),
 }
 # Dictionary comprehensions
 tasks = {
    key_name: (
        xx_xxxxxxxxxxxxxxxxx_xxxxx_xxxxxxx_xxxxxxxxxxxxxx_xxxxx_xxxxxxxxx_xxxxxxxxxxxx_xxxx
    )
    for src in sources
 }
 tasks = {key_name: foobar for src in sources}
 tasks = {
    get_key_name(
        src,
    ): "foo"
    for src in sources
 }
 tasks = {
    get_key_name(
        foo,
        bar,
        baz,
    ): src
    for src in sources
 }
 tasks = {
    get_key_name(): (
        xx_xxxxxxxxxxxxxxxxx_xxxxx_xxxxxxx_xxxxxxxxxxxxxx_xxxxx_xxxxxxxxx_xxxxxxxxxxxx_xxxx
    )
    for src in sources
 }
 tasks = {get_key_name(): foobar for src in sources}
 # Delimiters inside the value
 def foo():
    def bar():
        x = {
            common.models.DateTimeField: datetime(2020, 1, 31, tzinfo=utc) + timedelta(
                days=i
            ),
        }
        x = {
            common.models.DateTimeField: (
                datetime(2020, 1, 31, tzinfo=utc) + timedelta(days=i)
            ),
        }
        x = {
            "foobar": (123 + 456),
        }
        x = {
            "foobar": (123) + 456,
        }
 my_dict = {
    "a key in my dict": a_very_long_variable * and_a_very_long_function_call() / 100000.0
 }
 my_dict = {
    "a key in my dict": a_very_long_variable * and_a_very_long_function_call() * and_another_long_func() / 100000.0
 }
 my_dict = {
    "a key in my dict": MyClass.some_attribute.first_call().second_call().third_call(some_args="some value")
 }
 {
-    'xxxxxx':
+    "xxxxxx":
        xxxxxxxxxxxxxxxxxxx.xxxxxxxxxxxxxx(
            xxxxxxxxxxxxxx={
-                'x':
+                "x":
                    xxxxxxxxxxxxxxxxxxxxxxxxxx.xxxxxxxxxxxxxxxxxxxxxxxxxxxxx(
                        xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx=(
                            xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx
@ -30,8 +118,8 @@
                                xxxxxxxxxxxxx=xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx
                                .xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx(
                                    xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx={
-                                        'x': x.xx,
+                                        "x": x.xx,
-                                        'x': x.x,
+                                        "x": x.x,
                                    }))))
            }),
 }
@ -58,7 +146,26 @@ def func():
 # output
-
+x = {
    "xx_xxxxx_xxxxxxxxxx_xxxxxxxxx_xx": (
        "xx:xxxxxxxxxxxxxxxxx_xxxxx_xxxxxxx_xxxxxxxxxxx{xx}xxx_xxxxx_xxxxxxxxx_xxxxxxxxxxxx_xxxx"
    )
 }
 x = {
    "xx_xxxxx_xxxxxxxxxx_xxxxxxxxx_xx": (
        "xx:xxxxxxxxxxxxxxxxx_xxxxx_xxxxxxx_xxxxxxxxxxx{xx}xxx_xxxxx_xxxxxxxxx_xxxxxxxxxxxx_xxxx"
    ),
 }
 x = {
    "foo": bar,
    "foo": bar,
    "foo": (
        xx_xxxxxxxxxxxxxxxxx_xxxxx_xxxxxxx_xxxxxxxxxxxxxx_xxxxx_xxxxxxxxx_xxxxxxxxxxxx_xxxx
    ),
 }
 x = {
    "xx_xxxxx_xxxxxxxxxx_xxxxxxxxx_xx": "xx:xxxxxxxxxxxxxxxxx_xxxxx_xxxxxxx_xxxxxxxxxx"
 }
 my_dict = {
    "something_something": (
@ -68,12 +175,80 @@ def func():
    ),
 }
 # Function calls as keys
 tasks = {
    get_key_name(
        foo,
        bar,
        baz,
    ): src,
    loop.run_in_executor(): src,
    loop.run_in_executor(xx_xxxxxxxxxxxxxxxxx_xxxxx_xxxxxxx_xxxxxxxxxxxxxx): src,
    loop.run_in_executor(
        xx_xxxxxxxxxxxxxxxxx_xxxxx_xxxxxxx_xxxxxxxxxxxxxx_xxxxx_xxxxx
    ): src,
    loop.run_in_executor(): (
        xx_xxxxxxxxxxxxxxxxx_xxxxx_xxxxxxx_xxxxxxxxxxxxxx_xxxxx_xxxxxxxxx_xxxxxxxxxxxx_xxxx
    ),
 }
 # Dictionary comprehensions
 tasks = {
    key_name: (
        xx_xxxxxxxxxxxxxxxxx_xxxxx_xxxxxxx_xxxxxxxxxxxxxx_xxxxx_xxxxxxxxx_xxxxxxxxxxxx_xxxx
    )
    for src in sources
 }
 tasks = {key_name: foobar for src in sources}
 tasks = {
    get_key_name(
        src,
    ): "foo"
    for src in sources
 }
 tasks = {
    get_key_name(
        foo,
        bar,
        baz,
    ): src
    for src in sources
 }
 tasks = {
    get_key_name(): (
        xx_xxxxxxxxxxxxxxxxx_xxxxx_xxxxxxx_xxxxxxxxxxxxxx_xxxxx_xxxxxxxxx_xxxxxxxxxxxx_xxxx
    )
    for src in sources
 }
 tasks = {get_key_name(): foobar for src in sources}
 # Delimiters inside the value
 def foo():
    def bar():
        x = {
            common.models.DateTimeField: (
                datetime(2020, 1, 31, tzinfo=utc) + timedelta(days=i)
            ),
        }
        x = {
            common.models.DateTimeField: (
                datetime(2020, 1, 31, tzinfo=utc) + timedelta(days=i)
            ),
        }
        x = {
            "foobar": 123 + 456,
        }
        x = {
            "foobar": (123) + 456,
        }
 my_dict = {
    "a key in my dict": (
        a_very_long_variable * and_a_very_long_function_call() / 100000.0
    )
 }
 my_dict = {
    "a key in my dict": (
        a_very_long_variable
@ -82,7 +257,6 @@ def func():
        / 100000.0
    )
 }
 my_dict = {
    "a key in my dict": (
        MyClass.some_attribute.first_call()
@ -113,17 +287,15 @@ def func():
 class Random:
    def func():
-        random_service.status.active_states.inactive = (
+        random_service.status.active_states.inactive = make_new_top_level_state_from_dict({
-            make_new_top_level_state_from_dict({
+            "topLevelBase": {
-                "topLevelBase": {
+                "secondaryBase": {
-                    "secondaryBase": {
+                    "timestamp": 1234,
-                        "timestamp": 1234,
+                    "latitude": 1,
-                        "latitude": 1,
+                    "longitude": 2,
-                        "longitude": 2,
+                    "actionTimestamp": (
-                        "actionTimestamp": (
+                        Timestamp(seconds=1530584000, nanos=0).ToJsonString()
-                            Timestamp(seconds=1530584000, nanos=0).ToJsonString()
+                    ),
-                        ),
+                }
-                    }
+            },
-                },
+        })
            })
        )
--- a/tests/data/cases/preview_long_strings.py
+++ b/tests/data/cases/preview_long_strings.py
@ -279,7 +279,7 @@ def foo():
    "........................................................................... \\N{LAO KO LA}"
 )
-msg = lambda x: f"this is a very very very long lambda value {x} that doesn't fit on a single line"
+msg = lambda x: f"this is a very very very very long lambda value {x} that doesn't fit on a single line"
 dict_with_lambda_values = {
    "join": lambda j: (
@ -329,6 +329,20 @@ def foo():
 log.info(f"""Skipping: {'a' == 'b'} {desc['ms_name']} {money=} {dte=} {pos_share=} {desc['status']} {desc['exposure_max']}""")
 x = {
    "xx_xxxxx_xxxxxxxxxx_xxxxxxxxx_xx": (
        "xx:xxxxxxxxxxxxxxxxx_xxxxx_xxxxxxx_xxxxxxxxxxx{xx}xxx_xxxxx_xxxxxxxxx_xxxxxxxxxxxx_xxxx"
    )
 }
 x = {
    "xx_xxxxx_xxxxxxxxxx_xxxxxxxxx_xx": "xx:xxxxxxxxxxxxxxxxx_xxxxx_xxxxxxx_xxxxxxxxxxx{xx}xxx_xxxxx_xxxxxxxxx_xxxxxxxxxxxx_xxxx",
 }
 x = {
    "xx_xxxxx_xxxxxxxxxx_xxxxxxxxx_xx": (
        "xx:xxxxxxxxxxxxxxxxx_xxxxx_xxxxxxx_xxxxxxxxxx"
    )
 }
 # output
@ -842,11 +856,9 @@ def foo():
    " \\N{LAO KO LA}"
 )
-msg = (
+msg = lambda x: (
-    lambda x: (
+    f"this is a very very very very long lambda value {x} that doesn't fit on a"
-        f"this is a very very very long lambda value {x} that doesn't fit on a single"
+    " single line"
        " line"
    )
 )
 dict_with_lambda_values = {
@ -926,3 +938,17 @@ def foo():
 log.info(
    f"""Skipping: {'a' == 'b'} {desc['ms_name']} {money=} {dte=} {pos_share=} {desc['status']} {desc['exposure_max']}"""
 )
 x = {
    "xx_xxxxx_xxxxxxxxxx_xxxxxxxxx_xx": (
        "xx:xxxxxxxxxxxxxxxxx_xxxxx_xxxxxxx_xxxxxxxxxxx{xx}xxx_xxxxx_xxxxxxxxx_xxxxxxxxxxxx_xxxx"
    )
 }
 x = {
    "xx_xxxxx_xxxxxxxxxx_xxxxxxxxx_xx": (
        "xx:xxxxxxxxxxxxxxxxx_xxxxx_xxxxxxx_xxxxxxxxxxx{xx}xxx_xxxxx_xxxxxxxxx_xxxxxxxxxxxx_xxxx"
    ),
 }
 x = {
    "xx_xxxxx_xxxxxxxxxx_xxxxxxxxx_xx": "xx:xxxxxxxxxxxxxxxxx_xxxxx_xxxxxxx_xxxxxxxxxx"
 }