Address pre-existing trailing commas when not in the rightmost bracket pair

This required some hackery. Long story short, we need to reuse the ability to omit rightmost bracket pairs (which glues them together and splits on something else instead), for use with pre-existing trailing commas. This form of user-controlled formatting is brittle so we have to be careful not to cause a scenario where Black first formats code without trailing commas in one way, and then looks at the same file with pre-existing trailing commas (that it itself put on the previous run) and decides to format the code again. One particular ugly edge case here is handling of optional parentheses. In particular, the long-standing `line_length=1` hack got in the way of pre-existing trailing commas and had to be removed. Instead, a more intelligent but costly solution was put in place: a "second opinion" if the formatting that omits optional parentheses ended up causing lines to be too long. Again, for efficiency purposes, Black reuses Leaf objects from blib2to3 and modifies them in place, which was invalid for having two separate formattings. Line cloning was used to mitigate this. Fixes #1619
2020-08-24 18:29:59 +02:00 · 2020-08-24 18:29:59 +02:00 · 586d24236e
commit 586d24236e
parent d46268cd67
6 changed files with 237 additions and 77 deletions
--- a/src/black/init.py
+++ b/src/black/init.py
@ -195,6 +195,7 @@ class Feature(Enum):
    ASYNC_KEYWORDS = 7
    ASSIGNMENT_EXPRESSIONS = 8
    POS_ONLY_ARGUMENTS = 9
+    FORCE_OPTIONAL_PARENTHESES = 50


 VERSION_TO_FEATURES: Dict[TargetVersion, Set[Feature]] = {
@ -1284,6 +1285,7 @@ class BracketTracker:
    previous: Optional[Leaf] = None
    _for_loop_depths: List[int] = field(default_factory=list)
    _lambda_argument_depths: List[int] = field(default_factory=list)
+    invisible: List[Leaf] = field(default_factory=list)

    def mark(self, leaf: Leaf) -> None:
        """Mark `leaf` with bracket-related metadata. Keep track of delimiters.
@ -1309,6 +1311,8 @@ def mark(self, leaf: Leaf) -> None:
            self.depth -= 1
            opening_bracket = self.bracket_match.pop((self.depth, leaf.type))
            leaf.opening_bracket = opening_bracket
+            if not leaf.value:
+                self.invisible.append(leaf)
        leaf.bracket_depth = self.depth
        if self.depth == 0:
            delim = is_split_before_delimiter(leaf, self.previous)
@ -1321,6 +1325,8 @@ def mark(self, leaf: Leaf) -> None:
        if leaf.type in OPENING_BRACKETS:
            self.bracket_match[self.depth, BRACKET[leaf.type]] = leaf
            self.depth += 1
+            if not leaf.value:
+                self.invisible.append(leaf)
        self.previous = leaf
        self.maybe_increment_lambda_arguments(leaf)
        self.maybe_increment_for_loop_variable(leaf)
@ -2627,20 +2633,31 @@ def init_st(ST: Type[StringTransformer]) -> StringTransformer:
    else:

        def rhs(line: Line, features: Collection[Feature]) -> Iterator[Line]:
+            """Wraps calls to `right_hand_split`.
+
+            The calls increasingly `omit` right-hand trailers (bracket pairs with
+            content), meaning the trailers get glued together to split on another
+            bracket pair instead.
+            """
            for omit in generate_trailers_to_omit(line, mode.line_length):
                lines = list(
                    right_hand_split(line, mode.line_length, features, omit=omit)
                )
+                # Note: this check is only able to figure out if the first line of the
+                # *current* transformation fits in the line length.  This is true only
+                # for simple cases.  All others require running more transforms via
+                # `transform_line()`.  This check doesn't know if those would succeed.
                if is_line_short_enough(lines[0], line_length=mode.line_length):
                    yield from lines
                    return

            # All splits failed, best effort split with no omits.
            # This mostly happens to multiline strings that are by definition
-            # reported as not fitting a single line.
-            # line_length=1 here was historically a bug that somehow became a feature.
-            # See #762 and #781 for the full story.
-            yield from right_hand_split(line, line_length=1, features=features)
+            # reported as not fitting a single line, as well as lines that contain
+            # pre-existing trailing commas (those have to be exploded).
+            yield from right_hand_split(
+                line, line_length=mode.line_length, features=features
+            )

        if mode.experimental_string_processing:
            if line.inside_brackets:
@ -2671,17 +2688,8 @@ def rhs(line: Line, features: Collection[Feature]) -> Iterator[Line]:
        # We are accumulating lines in `result` because we might want to abort
        # mission and return the original line in the end, or attempt a different
        # split altogether.
-        result: List[Line] = []
        try:
-            for transformed_line in transform(line, features):
-                if str(transformed_line).strip("\n") == line_str:
-                    raise CannotTransform(
-                        "Line transformer returned an unchanged result"
-                    )
-
-                result.extend(
-                    transform_line(transformed_line, mode=mode, features=features)
-                )
+            result = run_transformer(line, transform, mode, features, line_str=line_str)
        except CannotTransform:
            continue
        else:
@ -2722,6 +2730,7 @@ class StringTransformer(ABC):

    line_length: int
    normalize_strings: bool
+    __name__ = "StringTransformer"

    @abstractmethod
    def do_match(self, line: Line) -> TMatchResult:
@ -2968,7 +2977,7 @@ def __remove_backslash_line_continuation_chars(
            )

        new_line = line.clone()
-        new_line.comments = line.comments
+        new_line.comments = line.comments.copy()
        append_leaves(new_line, line, LL)

        new_string_leaf = new_line.leaves[string_idx]
@ -3296,7 +3305,6 @@ def do_transform(self, line: Line, string_idx: int) -> Iterator[TResult[Line]]:

        new_line = line.clone()
        new_line.comments = line.comments.copy()
-
        append_leaves(new_line, line, LL[: string_idx - 1])

        string_leaf = Leaf(token.STRING, LL[string_idx].value)
@ -4740,8 +4748,9 @@ def right_hand_split(
    tail = bracket_split_build_line(tail_leaves, line, opening_bracket)
    bracket_split_succeeded_or_raise(head, body, tail)
    if (
+        Feature.FORCE_OPTIONAL_PARENTHESES not in features
        # the opening bracket is an optional paren
-        opening_bracket.type == token.LPAR
+        and opening_bracket.type == token.LPAR
        and not opening_bracket.value
        # the closing bracket is an optional paren
        and closing_bracket.type == token.RPAR
@ -4752,7 +4761,7 @@ def right_hand_split(
        # there are no standalone comments in the body
        and not body.contains_standalone_comments(0)
        # and we can actually remove the parens
-        and can_omit_invisible_parens(body, line_length)
+        and can_omit_invisible_parens(body, line_length, omit_on_explode=omit)
    ):
        omit = {id(closing_bracket), *omit}
        try:
@ -5587,6 +5596,9 @@ def should_split_body_explode(line: Line, opening_bracket: Leaf) -> bool:

 def is_one_tuple_between(opening: Leaf, closing: Leaf, leaves: List[Leaf]) -> bool:
    """Return True if content between `opening` and `closing` looks like a one-tuple."""
+    if opening.type != token.LPAR and closing.type != token.RPAR:
+        return False
+
    depth = closing.bracket_depth + 1
    for _opening_index, leaf in enumerate(leaves):
        if leaf is opening:
@ -5678,11 +5690,13 @@ def generate_trailers_to_omit(line: Line, line_length: int) -> Iterator[Set[Leaf
    a preceding closing bracket fits in one line.

    Yielded sets are cumulative (contain results of previous yields, too).  First
-    set is empty.
+    set is empty, unless the line should explode, in which case bracket pairs until
+    the one that needs to explode are omitted.
    """

    omit: Set[LeafID] = set()
-    yield omit
+    if not line.should_explode:
+        yield omit

    length = 4 * line.depth
    opening_bracket: Optional[Leaf] = None
@ -5701,9 +5715,24 @@ def generate_trailers_to_omit(line: Line, line_length: int) -> Iterator[Set[Leaf
            if leaf is opening_bracket:
                opening_bracket = None
            elif leaf.type in CLOSING_BRACKETS:
+                prev = line.leaves[index - 1] if index > 0 else None
+                if (
+                    line.should_explode
+                    and prev
+                    and prev.type == token.COMMA
+                    and not prev.was_checked
+                    and not is_one_tuple_between(
+                        leaf.opening_bracket, leaf, line.leaves
+                    )
+                ):
+                    # Never omit bracket pairs with pre-existing trailing commas.
+                    # We need to explode on those.
+                    break
+
                inner_brackets.add(id(leaf))
        elif leaf.type in CLOSING_BRACKETS:
-            if index > 0 and line.leaves[index - 1].type in OPENING_BRACKETS:
+            prev = line.leaves[index - 1] if index > 0 else None
+            if prev and prev.type in OPENING_BRACKETS:
                # Empty brackets would fail a split so treat them as "inner"
                # brackets (e.g. only add them to the `omit` set if another
                # pair of brackets was good enough.
@ -5716,6 +5745,17 @@ def generate_trailers_to_omit(line: Line, line_length: int) -> Iterator[Set[Leaf
                inner_brackets.clear()
                yield omit

+            if (
+                line.should_explode
+                and prev
+                and prev.type == token.COMMA
+                and not prev.was_checked
+                and not is_one_tuple_between(leaf.opening_bracket, leaf, line.leaves)
+            ):
+                # Never omit bracket pairs with pre-existing trailing commas.
+                # We need to explode on those.
+                break
+
            if leaf.value:
                opening_bracket = leaf.opening_bracket
                closing_bracket = leaf
@ -6297,7 +6337,11 @@ def can_be_split(line: Line) -> bool:
    return True


-def can_omit_invisible_parens(line: Line, line_length: int) -> bool:
+def can_omit_invisible_parens(
+    line: Line,
+    line_length: int,
+    omit_on_explode: Collection[LeafID] = (),
+) -> bool:
    """Does `line` have a shape safe to reformat without optional parens around it?

    Returns True for only a subset of potentially nice looking formattings but
@ -6320,37 +6364,27 @@ def can_omit_invisible_parens(line: Line, line_length: int) -> bool:

    assert len(line.leaves) >= 2, "Stranded delimiter"

-    first = line.leaves[0]
-    second = line.leaves[1]
-    penultimate = line.leaves[-2]
-    last = line.leaves[-1]
-
    # With a single delimiter, omit if the expression starts or ends with
    # a bracket.
+    first = line.leaves[0]
+    second = line.leaves[1]
    if first.type in OPENING_BRACKETS and second.type not in CLOSING_BRACKETS:
-        remainder = False
-        length = 4 * line.depth
-        for _index, leaf, leaf_length in enumerate_with_length(line):
-            if leaf.type in CLOSING_BRACKETS and leaf.opening_bracket is first:
-                remainder = True
-            if remainder:
-                length += leaf_length
-                if length > line_length:
-                    break
-
-                if leaf.type in OPENING_BRACKETS:
-                    # There are brackets we can further split on.
-                    remainder = False
-
-        else:
-            # checked the entire string and line length wasn't exceeded
-            if len(line.leaves) == _index + 1:
-                return True
+        if _can_omit_opening_paren(line, first=first, line_length=line_length):
+            return True

        # Note: we are not returning False here because a line might have *both*
        # a leading opening bracket and a trailing closing bracket.  If the
        # opening bracket doesn't match our rule, maybe the closing will.

+    penultimate = line.leaves[-2]
+    last = line.leaves[-1]
+    if line.should_explode:
+        try:
+            penultimate, last = last_two_except(line.leaves, omit=omit_on_explode)
+        except LookupError:
+            # Turns out we'd omit everything.  We cannot skip the optional parentheses.
+            return False
+
    if (
        last.type == token.RPAR
        or last.type == token.RBRACE
@ -6371,21 +6405,124 @@ def can_omit_invisible_parens(line: Line, line_length: int) -> bool:
            # unnecessary.
            return True

-        length = 4 * line.depth
-        seen_other_brackets = False
-        for _index, leaf, leaf_length in enumerate_with_length(line):
-            length += leaf_length
-            if leaf is last.opening_bracket:
-                if seen_other_brackets or length <= line_length:
-                    return True
+        if (
+            line.should_explode
+            and penultimate.type == token.COMMA
+            and not penultimate.was_checked
+        ):
+            # The rightmost non-omitted bracket pair is the one we want to explode on.
+            return True

-            elif leaf.type in OPENING_BRACKETS:
-                # There are brackets we can further split on.
-                seen_other_brackets = True
+        if _can_omit_closing_paren(line, last=last, line_length=line_length):
+            return True

    return False


+def _can_omit_opening_paren(line: Line, *, first: Leaf, line_length: int) -> bool:
+    """See `can_omit_invisible_parens`."""
+    remainder = False
+    length = 4 * line.depth
+    _index = -1
+    for _index, leaf, leaf_length in enumerate_with_length(line):
+        if leaf.type in CLOSING_BRACKETS and leaf.opening_bracket is first:
+            remainder = True
+        if remainder:
+            length += leaf_length
+            if length > line_length:
+                break
+
+            if leaf.type in OPENING_BRACKETS:
+                # There are brackets we can further split on.
+                remainder = False
+
+    else:
+        # checked the entire string and line length wasn't exceeded
+        if len(line.leaves) == _index + 1:
+            return True
+
+    return False
+
+
+def _can_omit_closing_paren(line: Line, *, last: Leaf, line_length: int) -> bool:
+    """See `can_omit_invisible_parens`."""
+    length = 4 * line.depth
+    seen_other_brackets = False
+    for _index, leaf, leaf_length in enumerate_with_length(line):
+        length += leaf_length
+        if leaf is last.opening_bracket:
+            if seen_other_brackets or length <= line_length:
+                return True
+
+        elif leaf.type in OPENING_BRACKETS:
+            # There are brackets we can further split on.
+            seen_other_brackets = True
+
+    return False
+
+
+def last_two_except(leaves: List[Leaf], omit: Collection[LeafID]) -> Tuple[Leaf, Leaf]:
+    """Return (penultimate, last) leaves skipping brackets in `omit` and contents."""
+    stop_after = None
+    last = None
+    for leaf in reversed(leaves):
+        if stop_after:
+            if leaf is stop_after:
+                stop_after = None
+            continue
+
+        if last:
+            return leaf, last
+
+        if id(leaf) in omit:
+            stop_after = leaf.opening_bracket
+        else:
+            last = leaf
+    else:
+        raise LookupError("Last two leaves were also skipped")
+
+
+def run_transformer(
+    line: Line,
+    transform: Transformer,
+    mode: Mode,
+    features: Collection[Feature],
+    *,
+    line_str: str = "",
+) -> List[Line]:
+    if not line_str:
+        line_str = line_to_string(line)
+    result: List[Line] = []
+    for transformed_line in transform(line, features):
+        if str(transformed_line).strip("\n") == line_str:
+            raise CannotTransform("Line transformer returned an unchanged result")
+
+        result.extend(transform_line(transformed_line, mode=mode, features=features))
+
+    if not (
+        transform.__name__ == "rhs"
+        and line.bracket_tracker.invisible
+        and not any(bracket.value for bracket in line.bracket_tracker.invisible)
+        and not line.contains_multiline_strings()
+        and not result[0].contains_uncollapsable_type_comments()
+        and not result[0].contains_unsplittable_type_ignore()
+        and not is_line_short_enough(result[0], line_length=mode.line_length)
+    ):
+        return result
+
+    line_copy = line.clone()
+    append_leaves(line_copy, line, line.leaves)
+    features_fop = set(features) | {Feature.FORCE_OPTIONAL_PARENTHESES}
+    second_opinion = run_transformer(
+        line_copy, transform, mode, features_fop, line_str=line_str
+    )
+    if all(
+        is_line_short_enough(ln, line_length=mode.line_length) for ln in second_opinion
+    ):
+        result = second_opinion
+    return result
+
+
 def get_cache_file(mode: Mode) -> Path:
    return CACHE_DIR / f"cache.{mode.get_cache_key()}.pickle"

--- a/tests/data/cantfit.py
+++ b/tests/data/cantfit.py
@ -67,11 +67,15 @@
 normal_name = (
    but_the_function_name_is_now_ridiculously_long_and_it_is_still_super_annoying()
 )
-normal_name = but_the_function_name_is_now_ridiculously_long_and_it_is_still_super_annoying(
-    arg1, arg2, arg3
+normal_name = (
+    but_the_function_name_is_now_ridiculously_long_and_it_is_still_super_annoying(
+        arg1, arg2, arg3
+    )
 )
-normal_name = but_the_function_name_is_now_ridiculously_long_and_it_is_still_super_annoying(
-    [1, 2, 3], arg1, [1, 2, 3], arg2, [1, 2, 3], arg3
+normal_name = (
+    but_the_function_name_is_now_ridiculously_long_and_it_is_still_super_annoying(
+        [1, 2, 3], arg1, [1, 2, 3], arg2, [1, 2, 3], arg3
+    )
 )
 # long arguments
 normal_name = normal_function_name(
--- a/tests/data/function_trailing_comma.py
+++ b/tests/data/function_trailing_comma.py
@ -9,6 +9,12 @@ def f2(a,b,):
 def f(a:int=1,):
    call(arg={'explode': 'this',})
    call2(arg=[1,2,3],)
+    x = {
+        "a": 1,
+        "b": 2,
+    }["a"]
+    if a == {"a": 1,"b": 2,"c": 3,"d": 4,"e": 5,"f": 6,"g": 7,"h": 8,}["a"]:
+        pass

 def xxxxxxxxxxxxxxxxxxxxxxxxxxxx() -> Set[
    "xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx"
@ -51,6 +57,21 @@ def f(
    call2(
        arg=[1, 2, 3],
    )
+    x = {
+        "a": 1,
+        "b": 2,
+    }["a"]
+    if a == {
+        "a": 1,
+        "b": 2,
+        "c": 3,
+        "d": 4,
+        "e": 5,
+        "f": 6,
+        "g": 7,
+        "h": 8,
+    }["a"]:
+        pass


 def xxxxxxxxxxxxxxxxxxxxxxxxxxxx() -> Set[
--- a/tests/data/function_trailing_comma_wip.py
+++ b/tests/data/function_trailing_comma_wip.py
@ -1,5 +0,0 @@
-CONFIG_FILES = [CONFIG_FILE] + SHARED_CONFIG_FILES + USER_CONFIG_FILES  # type: Final
-
-# output
-
-CONFIG_FILES = [CONFIG_FILE] + SHARED_CONFIG_FILES + USER_CONFIG_FILES  # type: Final
--- a/tests/data/long_strings_flag_disabled.py
+++ b/tests/data/long_strings_flag_disabled.py
@ -133,14 +133,11 @@
    "Use f-strings instead!",
 )

-old_fmt_string3 = (
-    "Whereas only the strings after the percent sign were long in the last example, this example uses a long initial string as well. This is another %s %s %s %s"
-    % (
-        "really really really really really",
-        "old",
-        "way to format strings!",
-        "Use f-strings instead!",
-    )
+old_fmt_string3 = "Whereas only the strings after the percent sign were long in the last example, this example uses a long initial string as well. This is another %s %s %s %s" % (
+    "really really really really really",
+    "old",
+    "way to format strings!",
+    "Use f-strings instead!",
 )

 fstring = f"f-strings definitely make things more {difficult} than they need to be for {{black}}. But boy they sure are handy. The problem is that some lines will need to have the 'f' whereas others do not. This {line}, for example, needs one."
--- a/tests/test_black.py
+++ b/tests/test_black.py
@ -346,11 +346,16 @@ def test_function2(self) -> None:
        black.assert_stable(source, actual, DEFAULT_MODE)

    @patch("black.dump_to_file", dump_to_stderr)
-    def test_function_trailing_comma_wip(self) -> None:
-        source, expected = read_data("function_trailing_comma_wip")
-        # sys.settrace(tracefunc)
-        actual = fs(source)
-        # sys.settrace(None)
+    def _test_wip(self) -> None:
+        source, expected = read_data("wip")
+        sys.settrace(tracefunc)
+        mode = replace(
+            DEFAULT_MODE,
+            experimental_string_processing=False,
+            target_versions={black.TargetVersion.PY38},
+        )
+        actual = fs(source, mode=mode)
+        sys.settrace(None)
        self.assertFormatEqual(expected, actual)
        black.assert_equivalent(source, actual)
        black.assert_stable(source, actual, black.FileMode())
@ -2085,6 +2090,7 @@ def tracefunc(frame: types.FrameType, event: str, arg: Any) -> Callable:
        return tracefunc

    stack = len(inspect.stack()) - 19
+    stack *= 2
    filename = frame.f_code.co_filename
    lineno = frame.f_lineno
    func_sig_lineno = lineno - 1