Make sure to split lines that start with a string operator (#2286)

Fixes #2284
2021-05-30 17:41:03 -04:00 · 2021-05-30 17:41:03 -04:00 · 4ca4407b4a
commit 4ca4407b4a
parent eec44f5977
3 changed files with 142 additions and 21 deletions
--- a/CHANGES.md
+++ b/CHANGES.md
@ -11,6 +11,7 @@
 - Restored compatibility with Click 8.0 on Python 3.6 when LANG=C used (#2227)
 - Add extra uvloop install + import support if in python env (#2258)
 - Fix --experimental-string-processing crash when matching parens are not found (#2283)
+- Make sure to split lines that start with a string operator (#2286)

 ### _Blackd_

--- a/src/black/trans.py
+++ b/src/black/trans.py
@ -920,9 +920,9 @@ class StringSplitter(CustomSplitMapMixin, BaseStringSplitter):
    lines by themselves).

    Requirements:
-        * The line consists ONLY of a single string (with the exception of a
-        '+' symbol which MAY exist at the start of the line), MAYBE a string
-        trailer, and MAYBE a trailing comma.
+        * The line consists ONLY of a single string (possibly prefixed by a
+        string operator [e.g. '+' or '==']), MAYBE a string trailer, and MAYBE
+        a trailing comma.
            AND
        * All of the requirements listed in BaseStringSplitter's docstring.

@ -952,6 +952,16 @@ class StringSplitter(CustomSplitMapMixin, BaseStringSplitter):
        CustomSplit objects and add them to the custom split map.
    """

+    STRING_OPERATORS = [
+        token.PLUS,
+        token.STAR,
+        token.EQEQUAL,
+        token.NOTEQUAL,
+        token.LESS,
+        token.LESSEQUAL,
+        token.GREATER,
+        token.GREATEREQUAL,
+    ]
    MIN_SUBSTR_SIZE = 6
    # Matches an "f-expression" (e.g. {var}) that might be found in an f-string.
    RE_FEXPR = r"""
@ -972,8 +982,20 @@ def do_splitter_match(self, line: Line) -> TMatchResult:

        idx = 0

-        # The first leaf MAY be a '+' symbol...
-        if is_valid_index(idx) and LL[idx].type == token.PLUS:
+        # The first two leaves MAY be the 'not in' keywords...
+        if (
+            is_valid_index(idx)
+            and is_valid_index(idx + 1)
+            and [LL[idx].type, LL[idx + 1].type] == [token.NAME, token.NAME]
+            and str(LL[idx]) + str(LL[idx + 1]) == "not in"
+        ):
+            idx += 2
+        # Else the first leaf MAY be a string operator symbol or the 'in' keyword...
+        elif is_valid_index(idx) and (
+            LL[idx].type in self.STRING_OPERATORS
+            or LL[idx].type == token.NAME
+            and str(LL[idx]) == "in"
+        ):
            idx += 1

        # The next/first leaf MAY be an empty LPAR...
@ -1023,23 +1045,26 @@ def do_transform(self, line: Line, string_idx: int) -> Iterator[TResult[Line]]:
        )

        first_string_line = True
-        starts_with_plus = LL[0].type == token.PLUS

-        def line_needs_plus() -> bool:
-            return first_string_line and starts_with_plus
+        string_op_leaves = self._get_string_operator_leaves(LL)
+        string_op_leaves_length = (
+            sum([len(str(prefix_leaf)) for prefix_leaf in string_op_leaves]) + 1
+            if string_op_leaves
+            else 0
+        )

-        def maybe_append_plus(new_line: Line) -> None:
+        def maybe_append_string_operators(new_line: Line) -> None:
            """
            Side Effects:
-                If @line starts with a plus and this is the first line we are
-                constructing, this function appends a PLUS leaf to @new_line
-                and replaces the old PLUS leaf in the node structure. Otherwise
-                this function does nothing.
+                If @line starts with a string operator and this is the first
+                line we are constructing, this function appends the string
+                operator to @new_line and replaces the old string operator leaf
+                in the node structure. Otherwise this function does nothing.
            """
-            if line_needs_plus():
-                plus_leaf = Leaf(token.PLUS, "+")
-                replace_child(LL[0], plus_leaf)
-                new_line.append(plus_leaf)
+            maybe_prefix_leaves = string_op_leaves if first_string_line else []
+            for i, prefix_leaf in enumerate(maybe_prefix_leaves):
+                replace_child(LL[i], prefix_leaf)
+                new_line.append(prefix_leaf)

        ends_with_comma = (
            is_valid_index(string_idx + 1) and LL[string_idx + 1].type == token.COMMA
@ -1054,7 +1079,7 @@ def max_last_string() -> int:
            result = self.line_length
            result -= line.depth * 4
            result -= 1 if ends_with_comma else 0
-            result -= 2 if line_needs_plus() else 0
+            result -= string_op_leaves_length
            return result

        # --- Calculate Max Break Index (for string value)
@ -1103,7 +1128,7 @@ def more_splits_should_be_made() -> bool:
                break_idx = csplit.break_idx
            else:
                # Algorithmic Split (automatic)
-                max_bidx = max_break_idx - 2 if line_needs_plus() else max_break_idx
+                max_bidx = max_break_idx - string_op_leaves_length
                maybe_break_idx = self._get_break_idx(rest_value, max_bidx)
                if maybe_break_idx is None:
                    # If we are unable to algorithmically determine a good split
@ -1148,7 +1173,7 @@ def more_splits_should_be_made() -> bool:

            # --- Construct `next_line`
            next_line = line.clone()
-            maybe_append_plus(next_line)
+            maybe_append_string_operators(next_line)
            next_line.append(next_leaf)
            string_line_results.append(Ok(next_line))

@ -1169,7 +1194,7 @@ def more_splits_should_be_made() -> bool:
        self._maybe_normalize_string_quotes(rest_leaf)

        last_line = line.clone()
-        maybe_append_plus(last_line)
+        maybe_append_string_operators(last_line)

        # If there are any leaves to the right of the target string...
        if is_valid_index(string_idx + 1):
@ -1345,6 +1370,17 @@ def _normalize_f_string(self, string: str, prefix: str) -> str:
        else:
            return string

+    def _get_string_operator_leaves(self, leaves: Iterable[Leaf]) -> List[Leaf]:
+        LL = list(leaves)
+
+        string_op_leaves = []
+        i = 0
+        while LL[i].type in self.STRING_OPERATORS + [token.NAME]:
+            prefix_leaf = Leaf(LL[i].type, str(LL[i]).strip())
+            string_op_leaves.append(prefix_leaf)
+            i += 1
+        return string_op_leaves
+

 class StringParenWrapper(CustomSplitMapMixin, BaseStringSplitter):
    """
--- a/tests/data/long_strings__regression.py
+++ b/tests/data/long_strings__regression.py
@ -406,6 +406,40 @@ def _legacy_listen_examples():
        }
    )

+
+assert str(suffix_arr) == (
+    "['$', 'angaroo$', 'angrykangaroo$', 'aroo$', 'garoo$', "
+    "'grykangaroo$', 'kangaroo$', 'ngaroo$', 'ngrykangaroo$', "
+    "'o$', 'oo$', 'roo$', 'rykangaroo$', 'ykangaroo$']"
+)
+assert str(suffix_arr) != (
+    "['$', 'angaroo$', 'angrykangaroo$', 'aroo$', 'garoo$', "
+    "'grykangaroo$', 'kangaroo$', 'ngaroo$', 'ngrykangaroo$', "
+    "'o$', 'oo$', 'roo$', 'rykangaroo$', 'ykangaroo$']"
+)
+assert str(suffix_arr) <= (
+    "['$', 'angaroo$', 'angrykangaroo$', 'aroo$', 'garoo$', "
+    "'grykangaroo$', 'kangaroo$', 'ngaroo$', 'ngrykangaroo$', "
+    "'o$', 'oo$', 'roo$', 'rykangaroo$', 'ykangaroo$']"
+)
+assert str(suffix_arr) >= (
+    "['$', 'angaroo$', 'angrykangaroo$', 'aroo$', 'garoo$', "
+    "'grykangaroo$', 'kangaroo$', 'ngaroo$', 'ngrykangaroo$', "
+    "'o$', 'oo$', 'roo$', 'rykangaroo$', 'ykangaroo$']"
+)
+assert str(suffix_arr) < (
+    "['$', 'angaroo$', 'angrykangaroo$', 'aroo$', 'garoo$', "
+    "'grykangaroo$', 'kangaroo$', 'ngaroo$', 'ngrykangaroo$', "
+    "'o$', 'oo$', 'roo$', 'rykangaroo$', 'ykangaroo$']"
+)
+assert str(suffix_arr) > (
+    "['$', 'angaroo$', 'angrykangaroo$', 'aroo$', 'garoo$', "
+    "'grykangaroo$', 'kangaroo$', 'ngaroo$', 'ngrykangaroo$', "
+    "'o$', 'oo$', 'roo$', 'rykangaroo$', 'ykangaroo$']"
+)
+assert str(suffix_arr) in "['$', 'angaroo$', 'angrykangaroo$', 'aroo$', 'garoo$', 'grykangaroo$', 'kangaroo$', 'ngaroo$', 'ngrykangaroo$', 'o$', 'oo$', 'roo$', 'rykangaroo$', 'ykangaroo$']"
+assert str(suffix_arr) not in "['$', 'angaroo$', 'angrykangaroo$', 'aroo$', 'garoo$', 'grykangaroo$', 'kangaroo$', 'ngaroo$', 'ngrykangaroo$', 'o$', 'oo$', 'roo$', 'rykangaroo$', 'ykangaroo$']"
+
 # output


@ -906,3 +940,53 @@ def _legacy_listen_examples():
            "since": since,
        }
    )
+
+
+assert (
+    str(suffix_arr)
+    == "['$', 'angaroo$', 'angrykangaroo$', 'aroo$', 'garoo$', "
+    "'grykangaroo$', 'kangaroo$', 'ngaroo$', 'ngrykangaroo$', "
+    "'o$', 'oo$', 'roo$', 'rykangaroo$', 'ykangaroo$']"
+)
+assert (
+    str(suffix_arr)
+    != "['$', 'angaroo$', 'angrykangaroo$', 'aroo$', 'garoo$', "
+    "'grykangaroo$', 'kangaroo$', 'ngaroo$', 'ngrykangaroo$', "
+    "'o$', 'oo$', 'roo$', 'rykangaroo$', 'ykangaroo$']"
+)
+assert (
+    str(suffix_arr)
+    <= "['$', 'angaroo$', 'angrykangaroo$', 'aroo$', 'garoo$', "
+    "'grykangaroo$', 'kangaroo$', 'ngaroo$', 'ngrykangaroo$', "
+    "'o$', 'oo$', 'roo$', 'rykangaroo$', 'ykangaroo$']"
+)
+assert (
+    str(suffix_arr)
+    >= "['$', 'angaroo$', 'angrykangaroo$', 'aroo$', 'garoo$', "
+    "'grykangaroo$', 'kangaroo$', 'ngaroo$', 'ngrykangaroo$', "
+    "'o$', 'oo$', 'roo$', 'rykangaroo$', 'ykangaroo$']"
+)
+assert (
+    str(suffix_arr)
+    < "['$', 'angaroo$', 'angrykangaroo$', 'aroo$', 'garoo$', "
+    "'grykangaroo$', 'kangaroo$', 'ngaroo$', 'ngrykangaroo$', "
+    "'o$', 'oo$', 'roo$', 'rykangaroo$', 'ykangaroo$']"
+)
+assert (
+    str(suffix_arr)
+    > "['$', 'angaroo$', 'angrykangaroo$', 'aroo$', 'garoo$', "
+    "'grykangaroo$', 'kangaroo$', 'ngaroo$', 'ngrykangaroo$', "
+    "'o$', 'oo$', 'roo$', 'rykangaroo$', 'ykangaroo$']"
+)
+assert (
+    str(suffix_arr)
+    in "['$', 'angaroo$', 'angrykangaroo$', 'aroo$', 'garoo$', 'grykangaroo$',"
+    " 'kangaroo$', 'ngaroo$', 'ngrykangaroo$', 'o$', 'oo$', 'roo$', 'rykangaroo$',"
+    " 'ykangaroo$']"
+)
+assert (
+    str(suffix_arr)
+    not in "['$', 'angaroo$', 'angrykangaroo$', 'aroo$', 'garoo$', 'grykangaroo$',"
+    " 'kangaroo$', 'ngaroo$', 'ngrykangaroo$', 'o$', 'oo$', 'roo$',"
+    " 'rykangaroo$', 'ykangaroo$']"
+)