Make sure to split lines that start with a string operator (#2286)

Fixes #2284
2021-05-30 17:41:03 -04:00 · 2021-05-30 17:41:03 -04:00 · 4ca4407b4a
commit 4ca4407b4a
parent eec44f5977
3 changed files with 142 additions and 21 deletions
--- a/CHANGES.md
+++ b/CHANGES.md
@ -11,6 +11,7 @@
 - Restored compatibility with Click 8.0 on Python 3.6 when LANG=C used (#2227)
 - Add extra uvloop install + import support if in python env (#2258)
 - Fix --experimental-string-processing crash when matching parens are not found (#2283)
 - Make sure to split lines that start with a string operator (#2286)
 ### _Blackd_
--- a/src/black/trans.py
+++ b/src/black/trans.py
@ -920,9 +920,9 @@ class StringSplitter(CustomSplitMapMixin, BaseStringSplitter):
    lines by themselves).
    Requirements:
-        * The line consists ONLY of a single string (with the exception of a
+        * The line consists ONLY of a single string (possibly prefixed by a
-        '+' symbol which MAY exist at the start of the line), MAYBE a string
+        string operator [e.g. '+' or '==']), MAYBE a string trailer, and MAYBE
-        trailer, and MAYBE a trailing comma.
+        a trailing comma.
            AND
        * All of the requirements listed in BaseStringSplitter's docstring.
@ -952,6 +952,16 @@ class StringSplitter(CustomSplitMapMixin, BaseStringSplitter):
        CustomSplit objects and add them to the custom split map.
    """
    STRING_OPERATORS = [
        token.PLUS,
        token.STAR,
        token.EQEQUAL,
        token.NOTEQUAL,
        token.LESS,
        token.LESSEQUAL,
        token.GREATER,
        token.GREATEREQUAL,
    ]
    MIN_SUBSTR_SIZE = 6
    # Matches an "f-expression" (e.g. {var}) that might be found in an f-string.
    RE_FEXPR = r"""
@ -972,8 +982,20 @@ def do_splitter_match(self, line: Line) -> TMatchResult:
        idx = 0
-        # The first leaf MAY be a '+' symbol...
+        # The first two leaves MAY be the 'not in' keywords...
-        if is_valid_index(idx) and LL[idx].type == token.PLUS:
+        if (
            is_valid_index(idx)
            and is_valid_index(idx + 1)
            and [LL[idx].type, LL[idx + 1].type] == [token.NAME, token.NAME]
            and str(LL[idx]) + str(LL[idx + 1]) == "not in"
        ):
            idx += 2
        # Else the first leaf MAY be a string operator symbol or the 'in' keyword...
        elif is_valid_index(idx) and (
            LL[idx].type in self.STRING_OPERATORS
            or LL[idx].type == token.NAME
            and str(LL[idx]) == "in"
        ):
            idx += 1
        # The next/first leaf MAY be an empty LPAR...
@ -1023,23 +1045,26 @@ def do_transform(self, line: Line, string_idx: int) -> Iterator[TResult[Line]]:
        )
        first_string_line = True
        starts_with_plus = LL[0].type == token.PLUS
-        def line_needs_plus() -> bool:
+        string_op_leaves = self._get_string_operator_leaves(LL)
-            return first_string_line and starts_with_plus
+        string_op_leaves_length = (
            sum([len(str(prefix_leaf)) for prefix_leaf in string_op_leaves]) + 1
            if string_op_leaves
            else 0
        )
-        def maybe_append_plus(new_line: Line) -> None:
+        def maybe_append_string_operators(new_line: Line) -> None:
            """
            Side Effects:
-                If @line starts with a plus and this is the first line we are
+                If @line starts with a string operator and this is the first
-                constructing, this function appends a PLUS leaf to @new_line
+                line we are constructing, this function appends the string
-                and replaces the old PLUS leaf in the node structure. Otherwise
+                operator to @new_line and replaces the old string operator leaf
-                this function does nothing.
+                in the node structure. Otherwise this function does nothing.
            """
-            if line_needs_plus():
+            maybe_prefix_leaves = string_op_leaves if first_string_line else []
-                plus_leaf = Leaf(token.PLUS, "+")
+            for i, prefix_leaf in enumerate(maybe_prefix_leaves):
-                replace_child(LL[0], plus_leaf)
+                replace_child(LL[i], prefix_leaf)
-                new_line.append(plus_leaf)
+                new_line.append(prefix_leaf)
        ends_with_comma = (
            is_valid_index(string_idx + 1) and LL[string_idx + 1].type == token.COMMA
@ -1054,7 +1079,7 @@ def max_last_string() -> int:
            result = self.line_length
            result -= line.depth * 4
            result -= 1 if ends_with_comma else 0
-            result -= 2 if line_needs_plus() else 0
+            result -= string_op_leaves_length
            return result
        # --- Calculate Max Break Index (for string value)
@ -1103,7 +1128,7 @@ def more_splits_should_be_made() -> bool:
                break_idx = csplit.break_idx
            else:
                # Algorithmic Split (automatic)
-                max_bidx = max_break_idx - 2 if line_needs_plus() else max_break_idx
+                max_bidx = max_break_idx - string_op_leaves_length
                maybe_break_idx = self._get_break_idx(rest_value, max_bidx)
                if maybe_break_idx is None:
                    # If we are unable to algorithmically determine a good split
@ -1148,7 +1173,7 @@ def more_splits_should_be_made() -> bool:
            # --- Construct `next_line`
            next_line = line.clone()
-            maybe_append_plus(next_line)
+            maybe_append_string_operators(next_line)
            next_line.append(next_leaf)
            string_line_results.append(Ok(next_line))
@ -1169,7 +1194,7 @@ def more_splits_should_be_made() -> bool:
        self._maybe_normalize_string_quotes(rest_leaf)
        last_line = line.clone()
-        maybe_append_plus(last_line)
+        maybe_append_string_operators(last_line)
        # If there are any leaves to the right of the target string...
        if is_valid_index(string_idx + 1):
@ -1345,6 +1370,17 @@ def _normalize_f_string(self, string: str, prefix: str) -> str:
        else:
            return string
    def _get_string_operator_leaves(self, leaves: Iterable[Leaf]) -> List[Leaf]:
        LL = list(leaves)
        string_op_leaves = []
        i = 0
        while LL[i].type in self.STRING_OPERATORS + [token.NAME]:
            prefix_leaf = Leaf(LL[i].type, str(LL[i]).strip())
            string_op_leaves.append(prefix_leaf)
            i += 1
        return string_op_leaves
 class StringParenWrapper(CustomSplitMapMixin, BaseStringSplitter):
    """
--- a/tests/data/long_strings__regression.py
+++ b/tests/data/long_strings__regression.py
@ -406,6 +406,40 @@ def _legacy_listen_examples():
        }
    )
 assert str(suffix_arr) == (
    "['$', 'angaroo$', 'angrykangaroo$', 'aroo$', 'garoo$', "
    "'grykangaroo$', 'kangaroo$', 'ngaroo$', 'ngrykangaroo$', "
    "'o$', 'oo$', 'roo$', 'rykangaroo$', 'ykangaroo$']"
 )
 assert str(suffix_arr) != (
    "['$', 'angaroo$', 'angrykangaroo$', 'aroo$', 'garoo$', "
    "'grykangaroo$', 'kangaroo$', 'ngaroo$', 'ngrykangaroo$', "
    "'o$', 'oo$', 'roo$', 'rykangaroo$', 'ykangaroo$']"
 )
 assert str(suffix_arr) <= (
    "['$', 'angaroo$', 'angrykangaroo$', 'aroo$', 'garoo$', "
    "'grykangaroo$', 'kangaroo$', 'ngaroo$', 'ngrykangaroo$', "
    "'o$', 'oo$', 'roo$', 'rykangaroo$', 'ykangaroo$']"
 )
 assert str(suffix_arr) >= (
    "['$', 'angaroo$', 'angrykangaroo$', 'aroo$', 'garoo$', "
    "'grykangaroo$', 'kangaroo$', 'ngaroo$', 'ngrykangaroo$', "
    "'o$', 'oo$', 'roo$', 'rykangaroo$', 'ykangaroo$']"
 )
 assert str(suffix_arr) < (
    "['$', 'angaroo$', 'angrykangaroo$', 'aroo$', 'garoo$', "
    "'grykangaroo$', 'kangaroo$', 'ngaroo$', 'ngrykangaroo$', "
    "'o$', 'oo$', 'roo$', 'rykangaroo$', 'ykangaroo$']"
 )
 assert str(suffix_arr) > (
    "['$', 'angaroo$', 'angrykangaroo$', 'aroo$', 'garoo$', "
    "'grykangaroo$', 'kangaroo$', 'ngaroo$', 'ngrykangaroo$', "
    "'o$', 'oo$', 'roo$', 'rykangaroo$', 'ykangaroo$']"
 )
 assert str(suffix_arr) in "['$', 'angaroo$', 'angrykangaroo$', 'aroo$', 'garoo$', 'grykangaroo$', 'kangaroo$', 'ngaroo$', 'ngrykangaroo$', 'o$', 'oo$', 'roo$', 'rykangaroo$', 'ykangaroo$']"
 assert str(suffix_arr) not in "['$', 'angaroo$', 'angrykangaroo$', 'aroo$', 'garoo$', 'grykangaroo$', 'kangaroo$', 'ngaroo$', 'ngrykangaroo$', 'o$', 'oo$', 'roo$', 'rykangaroo$', 'ykangaroo$']"
 # output
@ -906,3 +940,53 @@ def _legacy_listen_examples():
            "since": since,
        }
    )
 assert (
    str(suffix_arr)
    == "['$', 'angaroo$', 'angrykangaroo$', 'aroo$', 'garoo$', "
    "'grykangaroo$', 'kangaroo$', 'ngaroo$', 'ngrykangaroo$', "
    "'o$', 'oo$', 'roo$', 'rykangaroo$', 'ykangaroo$']"
 )
 assert (
    str(suffix_arr)
    != "['$', 'angaroo$', 'angrykangaroo$', 'aroo$', 'garoo$', "
    "'grykangaroo$', 'kangaroo$', 'ngaroo$', 'ngrykangaroo$', "
    "'o$', 'oo$', 'roo$', 'rykangaroo$', 'ykangaroo$']"
 )
 assert (
    str(suffix_arr)
    <= "['$', 'angaroo$', 'angrykangaroo$', 'aroo$', 'garoo$', "
    "'grykangaroo$', 'kangaroo$', 'ngaroo$', 'ngrykangaroo$', "
    "'o$', 'oo$', 'roo$', 'rykangaroo$', 'ykangaroo$']"
 )
 assert (
    str(suffix_arr)
    >= "['$', 'angaroo$', 'angrykangaroo$', 'aroo$', 'garoo$', "
    "'grykangaroo$', 'kangaroo$', 'ngaroo$', 'ngrykangaroo$', "
    "'o$', 'oo$', 'roo$', 'rykangaroo$', 'ykangaroo$']"
 )
 assert (
    str(suffix_arr)
    < "['$', 'angaroo$', 'angrykangaroo$', 'aroo$', 'garoo$', "
    "'grykangaroo$', 'kangaroo$', 'ngaroo$', 'ngrykangaroo$', "
    "'o$', 'oo$', 'roo$', 'rykangaroo$', 'ykangaroo$']"
 )
 assert (
    str(suffix_arr)
    > "['$', 'angaroo$', 'angrykangaroo$', 'aroo$', 'garoo$', "
    "'grykangaroo$', 'kangaroo$', 'ngaroo$', 'ngrykangaroo$', "
    "'o$', 'oo$', 'roo$', 'rykangaroo$', 'ykangaroo$']"
 )
 assert (
    str(suffix_arr)
    in "['$', 'angaroo$', 'angrykangaroo$', 'aroo$', 'garoo$', 'grykangaroo$',"
    " 'kangaroo$', 'ngaroo$', 'ngrykangaroo$', 'o$', 'oo$', 'roo$', 'rykangaroo$',"
    " 'ykangaroo$']"
 )
 assert (
    str(suffix_arr)
    not in "['$', 'angaroo$', 'angrykangaroo$', 'aroo$', 'garoo$', 'grykangaroo$',"
    " 'kangaroo$', 'ngaroo$', 'ngrykangaroo$', 'o$', 'oo$', 'roo$',"
    " 'rykangaroo$', 'ykangaroo$']"
 )