fix: Don't remove parenthesis around long dictionary values (#4377)

2025-01-17 00:09:22 -06:00 · 2025-01-17 00:09:22 -06:00 · 584d0331c8
commit 584d0331c8
parent 6e9654065c
5 changed files with 290 additions and 66 deletions
--- a/CHANGES.md
+++ b/CHANGES.md
@ -24,6 +24,8 @@
  (#4498)
 - Remove parentheses around sole list items (#4312)
 - Collapse multiple empty lines after an import into one (#4489)
+- Prevent `string_processing` and `wrap_long_dict_values_in_parens` from removing
+  parentheses around long dictionary values (#4377)

 ### Configuration

@ -43,6 +45,7 @@
 ### Performance

 <!-- Changes that improve Black's performance. -->
+
 - Speed up the `is_fstring_start` function in Black's tokenizer (#4541)

 ### Output
--- a/src/black/linegen.py
+++ b/src/black/linegen.py
@ -973,29 +973,7 @@ def _maybe_split_omitting_optional_parens(
        try:
            # The RHSResult Omitting Optional Parens.
            rhs_oop = _first_right_hand_split(line, omit=omit)
-            is_split_right_after_equal = (
-                len(rhs.head.leaves) >= 2 and rhs.head.leaves[-2].type == token.EQUAL
-            )
-            rhs_head_contains_brackets = any(
-                leaf.type in BRACKETS for leaf in rhs.head.leaves[:-1]
-            )
-            # the -1 is for the ending optional paren
-            rhs_head_short_enough = is_line_short_enough(
-                rhs.head, mode=replace(mode, line_length=mode.line_length - 1)
-            )
-            rhs_head_explode_blocked_by_magic_trailing_comma = (
-                rhs.head.magic_trailing_comma is None
-            )
-            if (
-                not (
-                    is_split_right_after_equal
-                    and rhs_head_contains_brackets
-                    and rhs_head_short_enough
-                    and rhs_head_explode_blocked_by_magic_trailing_comma
-                )
-                # the omit optional parens split is preferred by some other reason
-                or _prefer_split_rhs_oop_over_rhs(rhs_oop, rhs, mode)
-            ):
+            if _prefer_split_rhs_oop_over_rhs(rhs_oop, rhs, mode):
                yield from _maybe_split_omitting_optional_parens(
                    rhs_oop, line, mode, features=features, omit=omit
                )
@ -1006,8 +984,15 @@ def _maybe_split_omitting_optional_parens(
            if line.is_chained_assignment:
                pass

-            elif not can_be_split(rhs.body) and not is_line_short_enough(
-                rhs.body, mode=mode
+            elif (
+                not can_be_split(rhs.body)
+                and not is_line_short_enough(rhs.body, mode=mode)
+                and not (
+                    Preview.wrap_long_dict_values_in_parens
+                    and rhs.opening_bracket.parent
+                    and rhs.opening_bracket.parent.parent
+                    and rhs.opening_bracket.parent.parent.type == syms.dictsetmaker
+                )
            ):
                raise CannotSplit(
                    "Splitting failed, body is still too long and can't be split."
@ -1038,6 +1023,44 @@ def _prefer_split_rhs_oop_over_rhs(
    Returns whether we should prefer the result from a split omitting optional parens
    (rhs_oop) over the original (rhs).
    """
+    # contains unsplittable type ignore
+    if (
+        rhs_oop.head.contains_unsplittable_type_ignore()
+        or rhs_oop.body.contains_unsplittable_type_ignore()
+        or rhs_oop.tail.contains_unsplittable_type_ignore()
+    ):
+        return True
+
+    # Retain optional parens around dictionary values
+    if (
+        Preview.wrap_long_dict_values_in_parens
+        and rhs.opening_bracket.parent
+        and rhs.opening_bracket.parent.parent
+        and rhs.opening_bracket.parent.parent.type == syms.dictsetmaker
+        and rhs.body.bracket_tracker.delimiters
+    ):
+        # Unless the split is inside the key
+        return any(leaf.type == token.COLON for leaf in rhs_oop.tail.leaves)
+
+    # the split is right after `=`
+    if not (len(rhs.head.leaves) >= 2 and rhs.head.leaves[-2].type == token.EQUAL):
+        return True
+
+    # the left side of assignment contains brackets
+    if not any(leaf.type in BRACKETS for leaf in rhs.head.leaves[:-1]):
+        return True
+
+    # the left side of assignment is short enough (the -1 is for the ending optional
+    # paren)
+    if not is_line_short_enough(
+        rhs.head, mode=replace(mode, line_length=mode.line_length - 1)
+    ):
+        return True
+
+    # the left side of assignment won't explode further because of magic trailing comma
+    if rhs.head.magic_trailing_comma is not None:
+        return True
+
    # If we have multiple targets, we prefer more `=`s on the head vs pushing them to
    # the body
    rhs_head_equal_count = [leaf.type for leaf in rhs.head.leaves].count(token.EQUAL)
@ -1065,10 +1088,6 @@ def _prefer_split_rhs_oop_over_rhs(
            # the first line is short enough
            and is_line_short_enough(rhs_oop.head, mode=mode)
        )
-        # contains unsplittable type ignore
-        or rhs_oop.head.contains_unsplittable_type_ignore()
-        or rhs_oop.body.contains_unsplittable_type_ignore()
-        or rhs_oop.tail.contains_unsplittable_type_ignore()
    )


--- a/src/black/trans.py
+++ b/src/black/trans.py
@ -856,7 +856,7 @@ def _validate_msg(line: Line, string_idx: int) -> TResult[None]:
            ):
                return TErr(
                    "StringMerger does NOT merge f-strings with different quote types"
-                    "and internal quotes."
+                    " and internal quotes."
                )

            if id(leaf) in line.comments:
@ -887,6 +887,7 @@ class StringParenStripper(StringTransformer):
        The line contains a string which is surrounded by parentheses and:
            - The target string is NOT the only argument to a function call.
            - The target string is NOT a "pointless" string.
+            - The target string is NOT a dictionary value.
            - If the target string contains a PERCENT, the brackets are not
              preceded or followed by an operator with higher precedence than
              PERCENT.
@ -934,11 +935,14 @@ def do_match(self, line: Line) -> TMatchResult:
            ):
                continue

-            # That LPAR should NOT be preceded by a function name or a closing
-            # bracket (which could be a function which returns a function or a
-            # list/dictionary that contains a function)...
+            # That LPAR should NOT be preceded by a colon (which could be a
+            # dictionary value), function name, or a closing bracket (which
+            # could be a function returning a function or a list/dictionary
+            # containing a function)...
            if is_valid_index(idx - 2) and (
-                LL[idx - 2].type == token.NAME or LL[idx - 2].type in CLOSING_BRACKETS
+                LL[idx - 2].type == token.COLON
+                or LL[idx - 2].type == token.NAME
+                or LL[idx - 2].type in CLOSING_BRACKETS
            ):
                continue

@ -2264,12 +2268,12 @@ def do_transform(
            elif right_leaves and right_leaves[-1].type == token.RPAR:
                # Special case for lambda expressions as dict's value, e.g.:
                #     my_dict = {
-                #        "key": lambda x: f"formatted: {x},
+                #        "key": lambda x: f"formatted: {x}",
                #     }
                # After wrapping the dict's value with parentheses, the string is
                # followed by a RPAR but its opening bracket is lambda's, not
                # the string's:
-                #        "key": (lambda x: f"formatted: {x}),
+                #        "key": (lambda x: f"formatted: {x}"),
                opening_bracket = right_leaves[-1].opening_bracket
                if opening_bracket is not None and opening_bracket in left_leaves:
                    index = left_leaves.index(opening_bracket)
--- a/tests/data/cases/preview_long_dict_values.py
+++ b/tests/data/cases/preview_long_dict_values.py
@ -1,4 +1,25 @@
 # flags: --unstable
+x = {
+    "xx_xxxxx_xxxxxxxxxx_xxxxxxxxx_xx": (
+        "xx:xxxxxxxxxxxxxxxxx_xxxxx_xxxxxxx_xxxxxxxxxxx{xx}xxx_xxxxx_xxxxxxxxx_xxxxxxxxxxxx_xxxx"
+    )
+}
+x = {
+    "xx_xxxxx_xxxxxxxxxx_xxxxxxxxx_xx": (
+        "xx:xxxxxxxxxxxxxxxxx_xxxxx_xxxxxxx_xxxxxxxxxxx{xx}xxx_xxxxx_xxxxxxxxx_xxxxxxxxxxxx_xxxx"
+    ),
+}
+x = {
+    "foo": bar,
+    "foo": bar,
+    "foo": (
+        xx_xxxxxxxxxxxxxxxxx_xxxxx_xxxxxxx_xxxxxxxxxxxxxx_xxxxx_xxxxxxxxx_xxxxxxxxxxxx_xxxx
+    ),
+}
+x = {
+    "xx_xxxxx_xxxxxxxxxx_xxxxxxxxx_xx": "xx:xxxxxxxxxxxxxxxxx_xxxxx_xxxxxxx_xxxxxxxxxx"
+}
+
 my_dict = {
    "something_something":
        r"Lorem ipsum dolor sit amet, an sed convenire eloquentiam \t"
@ -6,23 +27,90 @@
        r"signiferumque, duo ea vocibus consetetur scriptorem. Facer \t",
 }

+# Function calls as keys
+tasks = {
+    get_key_name(
+        foo,
+        bar,
+        baz,
+    ): src,
+    loop.run_in_executor(): src,
+    loop.run_in_executor(xx_xxxxxxxxxxxxxxxxx_xxxxx_xxxxxxx_xxxxxxxxxxxxxx): src,
+    loop.run_in_executor(
+        xx_xxxxxxxxxxxxxxxxx_xxxxx_xxxxxxx_xxxxxxxxxxxxxx_xxxxx_xxxxx
+    ): src,
+    loop.run_in_executor(): (
+        xx_xxxxxxxxxxxxxxxxx_xxxxx_xxxxxxx_xxxxxxxxxxxxxx_xxxxx_xxxxxxxxx_xxxxxxxxxxxx_xxxx
+    ),
+}
+
+# Dictionary comprehensions
+tasks = {
+    key_name: (
+        xx_xxxxxxxxxxxxxxxxx_xxxxx_xxxxxxx_xxxxxxxxxxxxxx_xxxxx_xxxxxxxxx_xxxxxxxxxxxx_xxxx
+    )
+    for src in sources
+}
+tasks = {key_name: foobar for src in sources}
+tasks = {
+    get_key_name(
+        src,
+    ): "foo"
+    for src in sources
+}
+tasks = {
+    get_key_name(
+        foo,
+        bar,
+        baz,
+    ): src
+    for src in sources
+}
+tasks = {
+    get_key_name(): (
+        xx_xxxxxxxxxxxxxxxxx_xxxxx_xxxxxxx_xxxxxxxxxxxxxx_xxxxx_xxxxxxxxx_xxxxxxxxxxxx_xxxx
+    )
+    for src in sources
+}
+tasks = {get_key_name(): foobar for src in sources}
+
+
+# Delimiters inside the value
+def foo():
+    def bar():
+        x = {
+            common.models.DateTimeField: datetime(2020, 1, 31, tzinfo=utc) + timedelta(
+                days=i
+            ),
+        }
+        x = {
+            common.models.DateTimeField: (
+                datetime(2020, 1, 31, tzinfo=utc) + timedelta(days=i)
+            ),
+        }
+        x = {
+            "foobar": (123 + 456),
+        }
+        x = {
+            "foobar": (123) + 456,
+        }
+
+
 my_dict = {
    "a key in my dict": a_very_long_variable * and_a_very_long_function_call() / 100000.0
 }
-
 my_dict = {
    "a key in my dict": a_very_long_variable * and_a_very_long_function_call() * and_another_long_func() / 100000.0
 }
-
 my_dict = {
    "a key in my dict": MyClass.some_attribute.first_call().second_call().third_call(some_args="some value")
 }

 {
-    'xxxxxx':
+    "xxxxxx":
        xxxxxxxxxxxxxxxxxxx.xxxxxxxxxxxxxx(
            xxxxxxxxxxxxxx={
-                'x':
+                "x":
                    xxxxxxxxxxxxxxxxxxxxxxxxxx.xxxxxxxxxxxxxxxxxxxxxxxxxxxxx(
                        xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx=(
                            xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx
@ -30,8 +118,8 @@
                                xxxxxxxxxxxxx=xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx
                                .xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx(
                                    xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx={
-                                        'x': x.xx,
-                                        'x': x.x,
+                                        "x": x.xx,
+                                        "x": x.x,
                                    }))))
            }),
 }
@ -58,7 +146,26 @@ def func():


 # output
-
+x = {
+    "xx_xxxxx_xxxxxxxxxx_xxxxxxxxx_xx": (
+        "xx:xxxxxxxxxxxxxxxxx_xxxxx_xxxxxxx_xxxxxxxxxxx{xx}xxx_xxxxx_xxxxxxxxx_xxxxxxxxxxxx_xxxx"
+    )
+}
+x = {
+    "xx_xxxxx_xxxxxxxxxx_xxxxxxxxx_xx": (
+        "xx:xxxxxxxxxxxxxxxxx_xxxxx_xxxxxxx_xxxxxxxxxxx{xx}xxx_xxxxx_xxxxxxxxx_xxxxxxxxxxxx_xxxx"
+    ),
+}
+x = {
+    "foo": bar,
+    "foo": bar,
+    "foo": (
+        xx_xxxxxxxxxxxxxxxxx_xxxxx_xxxxxxx_xxxxxxxxxxxxxx_xxxxx_xxxxxxxxx_xxxxxxxxxxxx_xxxx
+    ),
+}
+x = {
+    "xx_xxxxx_xxxxxxxxxx_xxxxxxxxx_xx": "xx:xxxxxxxxxxxxxxxxx_xxxxx_xxxxxxx_xxxxxxxxxx"
+}

 my_dict = {
    "something_something": (
@ -68,12 +175,80 @@ def func():
    ),
 }

+# Function calls as keys
+tasks = {
+    get_key_name(
+        foo,
+        bar,
+        baz,
+    ): src,
+    loop.run_in_executor(): src,
+    loop.run_in_executor(xx_xxxxxxxxxxxxxxxxx_xxxxx_xxxxxxx_xxxxxxxxxxxxxx): src,
+    loop.run_in_executor(
+        xx_xxxxxxxxxxxxxxxxx_xxxxx_xxxxxxx_xxxxxxxxxxxxxx_xxxxx_xxxxx
+    ): src,
+    loop.run_in_executor(): (
+        xx_xxxxxxxxxxxxxxxxx_xxxxx_xxxxxxx_xxxxxxxxxxxxxx_xxxxx_xxxxxxxxx_xxxxxxxxxxxx_xxxx
+    ),
+}
+
+# Dictionary comprehensions
+tasks = {
+    key_name: (
+        xx_xxxxxxxxxxxxxxxxx_xxxxx_xxxxxxx_xxxxxxxxxxxxxx_xxxxx_xxxxxxxxx_xxxxxxxxxxxx_xxxx
+    )
+    for src in sources
+}
+tasks = {key_name: foobar for src in sources}
+tasks = {
+    get_key_name(
+        src,
+    ): "foo"
+    for src in sources
+}
+tasks = {
+    get_key_name(
+        foo,
+        bar,
+        baz,
+    ): src
+    for src in sources
+}
+tasks = {
+    get_key_name(): (
+        xx_xxxxxxxxxxxxxxxxx_xxxxx_xxxxxxx_xxxxxxxxxxxxxx_xxxxx_xxxxxxxxx_xxxxxxxxxxxx_xxxx
+    )
+    for src in sources
+}
+tasks = {get_key_name(): foobar for src in sources}
+
+
+# Delimiters inside the value
+def foo():
+    def bar():
+        x = {
+            common.models.DateTimeField: (
+                datetime(2020, 1, 31, tzinfo=utc) + timedelta(days=i)
+            ),
+        }
+        x = {
+            common.models.DateTimeField: (
+                datetime(2020, 1, 31, tzinfo=utc) + timedelta(days=i)
+            ),
+        }
+        x = {
+            "foobar": 123 + 456,
+        }
+        x = {
+            "foobar": (123) + 456,
+        }
+
+
 my_dict = {
    "a key in my dict": (
        a_very_long_variable * and_a_very_long_function_call() / 100000.0
    )
 }
-
 my_dict = {
    "a key in my dict": (
        a_very_long_variable
@ -82,7 +257,6 @@ def func():
        / 100000.0
    )
 }
-
 my_dict = {
    "a key in my dict": (
        MyClass.some_attribute.first_call()
@ -113,17 +287,15 @@ def func():

 class Random:
    def func():
-        random_service.status.active_states.inactive = (
-            make_new_top_level_state_from_dict({
-                "topLevelBase": {
-                    "secondaryBase": {
-                        "timestamp": 1234,
-                        "latitude": 1,
-                        "longitude": 2,
-                        "actionTimestamp": (
-                            Timestamp(seconds=1530584000, nanos=0).ToJsonString()
-                        ),
-                    }
-                },
-            })
-        )
+        random_service.status.active_states.inactive = make_new_top_level_state_from_dict({
+            "topLevelBase": {
+                "secondaryBase": {
+                    "timestamp": 1234,
+                    "latitude": 1,
+                    "longitude": 2,
+                    "actionTimestamp": (
+                        Timestamp(seconds=1530584000, nanos=0).ToJsonString()
+                    ),
+                }
+            },
+        })
--- a/tests/data/cases/preview_long_strings.py
+++ b/tests/data/cases/preview_long_strings.py
@ -279,7 +279,7 @@ def foo():
    "........................................................................... \\N{LAO KO LA}"
 )

-msg = lambda x: f"this is a very very very long lambda value {x} that doesn't fit on a single line"
+msg = lambda x: f"this is a very very very very long lambda value {x} that doesn't fit on a single line"

 dict_with_lambda_values = {
    "join": lambda j: (
@ -329,6 +329,20 @@ def foo():

 log.info(f"""Skipping: {'a' == 'b'} {desc['ms_name']} {money=} {dte=} {pos_share=} {desc['status']} {desc['exposure_max']}""")

+x = {
+    "xx_xxxxx_xxxxxxxxxx_xxxxxxxxx_xx": (
+        "xx:xxxxxxxxxxxxxxxxx_xxxxx_xxxxxxx_xxxxxxxxxxx{xx}xxx_xxxxx_xxxxxxxxx_xxxxxxxxxxxx_xxxx"
+    )
+}
+x = {
+    "xx_xxxxx_xxxxxxxxxx_xxxxxxxxx_xx": "xx:xxxxxxxxxxxxxxxxx_xxxxx_xxxxxxx_xxxxxxxxxxx{xx}xxx_xxxxx_xxxxxxxxx_xxxxxxxxxxxx_xxxx",
+}
+x = {
+    "xx_xxxxx_xxxxxxxxxx_xxxxxxxxx_xx": (
+        "xx:xxxxxxxxxxxxxxxxx_xxxxx_xxxxxxx_xxxxxxxxxx"
+    )
+}
+
 # output


@ -842,11 +856,9 @@ def foo():
    " \\N{LAO KO LA}"
 )

-msg = (
-    lambda x: (
-        f"this is a very very very long lambda value {x} that doesn't fit on a single"
-        " line"
-    )
+msg = lambda x: (
+    f"this is a very very very very long lambda value {x} that doesn't fit on a"
+    " single line"
 )

 dict_with_lambda_values = {
@ -925,4 +937,18 @@ def foo():

 log.info(
    f"""Skipping: {'a' == 'b'} {desc['ms_name']} {money=} {dte=} {pos_share=} {desc['status']} {desc['exposure_max']}"""
-)
+)
+
+x = {
+    "xx_xxxxx_xxxxxxxxxx_xxxxxxxxx_xx": (
+        "xx:xxxxxxxxxxxxxxxxx_xxxxx_xxxxxxx_xxxxxxxxxxx{xx}xxx_xxxxx_xxxxxxxxx_xxxxxxxxxxxx_xxxx"
+    )
+}
+x = {
+    "xx_xxxxx_xxxxxxxxxx_xxxxxxxxx_xx": (
+        "xx:xxxxxxxxxxxxxxxxx_xxxxx_xxxxxxx_xxxxxxxxxxx{xx}xxx_xxxxx_xxxxxxxxx_xxxxxxxxxxxx_xxxx"
+    ),
+}
+x = {
+    "xx_xxxxx_xxxxxxxxxx_xxxxxxxxx_xx": "xx:xxxxxxxxxxxxxxxxx_xxxxx_xxxxxxx_xxxxxxxxxx"
+}