Make sure to split lines that start with a string operator (#2286)

Fixes #2284
This commit is contained in:
Bryan Bugyi 2021-05-30 17:41:03 -04:00 committed by GitHub
parent eec44f5977
commit 4ca4407b4a
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
3 changed files with 142 additions and 21 deletions

View File

@ -11,6 +11,7 @@
- Restored compatibility with Click 8.0 on Python 3.6 when LANG=C used (#2227) - Restored compatibility with Click 8.0 on Python 3.6 when LANG=C used (#2227)
- Add extra uvloop install + import support if in python env (#2258) - Add extra uvloop install + import support if in python env (#2258)
- Fix --experimental-string-processing crash when matching parens are not found (#2283) - Fix --experimental-string-processing crash when matching parens are not found (#2283)
- Make sure to split lines that start with a string operator (#2286)
### _Blackd_ ### _Blackd_

View File

@ -920,9 +920,9 @@ class StringSplitter(CustomSplitMapMixin, BaseStringSplitter):
lines by themselves). lines by themselves).
Requirements: Requirements:
* The line consists ONLY of a single string (with the exception of a * The line consists ONLY of a single string (possibly prefixed by a
'+' symbol which MAY exist at the start of the line), MAYBE a string string operator [e.g. '+' or '==']), MAYBE a string trailer, and MAYBE
trailer, and MAYBE a trailing comma. a trailing comma.
AND AND
* All of the requirements listed in BaseStringSplitter's docstring. * All of the requirements listed in BaseStringSplitter's docstring.
@ -952,6 +952,16 @@ class StringSplitter(CustomSplitMapMixin, BaseStringSplitter):
CustomSplit objects and add them to the custom split map. CustomSplit objects and add them to the custom split map.
""" """
STRING_OPERATORS = [
token.PLUS,
token.STAR,
token.EQEQUAL,
token.NOTEQUAL,
token.LESS,
token.LESSEQUAL,
token.GREATER,
token.GREATEREQUAL,
]
MIN_SUBSTR_SIZE = 6 MIN_SUBSTR_SIZE = 6
# Matches an "f-expression" (e.g. {var}) that might be found in an f-string. # Matches an "f-expression" (e.g. {var}) that might be found in an f-string.
RE_FEXPR = r""" RE_FEXPR = r"""
@ -972,8 +982,20 @@ def do_splitter_match(self, line: Line) -> TMatchResult:
idx = 0 idx = 0
# The first leaf MAY be a '+' symbol... # The first two leaves MAY be the 'not in' keywords...
if is_valid_index(idx) and LL[idx].type == token.PLUS: if (
is_valid_index(idx)
and is_valid_index(idx + 1)
and [LL[idx].type, LL[idx + 1].type] == [token.NAME, token.NAME]
and str(LL[idx]) + str(LL[idx + 1]) == "not in"
):
idx += 2
# Else the first leaf MAY be a string operator symbol or the 'in' keyword...
elif is_valid_index(idx) and (
LL[idx].type in self.STRING_OPERATORS
or LL[idx].type == token.NAME
and str(LL[idx]) == "in"
):
idx += 1 idx += 1
# The next/first leaf MAY be an empty LPAR... # The next/first leaf MAY be an empty LPAR...
@ -1023,23 +1045,26 @@ def do_transform(self, line: Line, string_idx: int) -> Iterator[TResult[Line]]:
) )
first_string_line = True first_string_line = True
starts_with_plus = LL[0].type == token.PLUS
def line_needs_plus() -> bool: string_op_leaves = self._get_string_operator_leaves(LL)
return first_string_line and starts_with_plus string_op_leaves_length = (
sum([len(str(prefix_leaf)) for prefix_leaf in string_op_leaves]) + 1
if string_op_leaves
else 0
)
def maybe_append_plus(new_line: Line) -> None: def maybe_append_string_operators(new_line: Line) -> None:
""" """
Side Effects: Side Effects:
If @line starts with a plus and this is the first line we are If @line starts with a string operator and this is the first
constructing, this function appends a PLUS leaf to @new_line line we are constructing, this function appends the string
and replaces the old PLUS leaf in the node structure. Otherwise operator to @new_line and replaces the old string operator leaf
this function does nothing. in the node structure. Otherwise this function does nothing.
""" """
if line_needs_plus(): maybe_prefix_leaves = string_op_leaves if first_string_line else []
plus_leaf = Leaf(token.PLUS, "+") for i, prefix_leaf in enumerate(maybe_prefix_leaves):
replace_child(LL[0], plus_leaf) replace_child(LL[i], prefix_leaf)
new_line.append(plus_leaf) new_line.append(prefix_leaf)
ends_with_comma = ( ends_with_comma = (
is_valid_index(string_idx + 1) and LL[string_idx + 1].type == token.COMMA is_valid_index(string_idx + 1) and LL[string_idx + 1].type == token.COMMA
@ -1054,7 +1079,7 @@ def max_last_string() -> int:
result = self.line_length result = self.line_length
result -= line.depth * 4 result -= line.depth * 4
result -= 1 if ends_with_comma else 0 result -= 1 if ends_with_comma else 0
result -= 2 if line_needs_plus() else 0 result -= string_op_leaves_length
return result return result
# --- Calculate Max Break Index (for string value) # --- Calculate Max Break Index (for string value)
@ -1103,7 +1128,7 @@ def more_splits_should_be_made() -> bool:
break_idx = csplit.break_idx break_idx = csplit.break_idx
else: else:
# Algorithmic Split (automatic) # Algorithmic Split (automatic)
max_bidx = max_break_idx - 2 if line_needs_plus() else max_break_idx max_bidx = max_break_idx - string_op_leaves_length
maybe_break_idx = self._get_break_idx(rest_value, max_bidx) maybe_break_idx = self._get_break_idx(rest_value, max_bidx)
if maybe_break_idx is None: if maybe_break_idx is None:
# If we are unable to algorithmically determine a good split # If we are unable to algorithmically determine a good split
@ -1148,7 +1173,7 @@ def more_splits_should_be_made() -> bool:
# --- Construct `next_line` # --- Construct `next_line`
next_line = line.clone() next_line = line.clone()
maybe_append_plus(next_line) maybe_append_string_operators(next_line)
next_line.append(next_leaf) next_line.append(next_leaf)
string_line_results.append(Ok(next_line)) string_line_results.append(Ok(next_line))
@ -1169,7 +1194,7 @@ def more_splits_should_be_made() -> bool:
self._maybe_normalize_string_quotes(rest_leaf) self._maybe_normalize_string_quotes(rest_leaf)
last_line = line.clone() last_line = line.clone()
maybe_append_plus(last_line) maybe_append_string_operators(last_line)
# If there are any leaves to the right of the target string... # If there are any leaves to the right of the target string...
if is_valid_index(string_idx + 1): if is_valid_index(string_idx + 1):
@ -1345,6 +1370,17 @@ def _normalize_f_string(self, string: str, prefix: str) -> str:
else: else:
return string return string
def _get_string_operator_leaves(self, leaves: Iterable[Leaf]) -> List[Leaf]:
LL = list(leaves)
string_op_leaves = []
i = 0
while LL[i].type in self.STRING_OPERATORS + [token.NAME]:
prefix_leaf = Leaf(LL[i].type, str(LL[i]).strip())
string_op_leaves.append(prefix_leaf)
i += 1
return string_op_leaves
class StringParenWrapper(CustomSplitMapMixin, BaseStringSplitter): class StringParenWrapper(CustomSplitMapMixin, BaseStringSplitter):
""" """

View File

@ -406,6 +406,40 @@ def _legacy_listen_examples():
} }
) )
assert str(suffix_arr) == (
"['$', 'angaroo$', 'angrykangaroo$', 'aroo$', 'garoo$', "
"'grykangaroo$', 'kangaroo$', 'ngaroo$', 'ngrykangaroo$', "
"'o$', 'oo$', 'roo$', 'rykangaroo$', 'ykangaroo$']"
)
assert str(suffix_arr) != (
"['$', 'angaroo$', 'angrykangaroo$', 'aroo$', 'garoo$', "
"'grykangaroo$', 'kangaroo$', 'ngaroo$', 'ngrykangaroo$', "
"'o$', 'oo$', 'roo$', 'rykangaroo$', 'ykangaroo$']"
)
assert str(suffix_arr) <= (
"['$', 'angaroo$', 'angrykangaroo$', 'aroo$', 'garoo$', "
"'grykangaroo$', 'kangaroo$', 'ngaroo$', 'ngrykangaroo$', "
"'o$', 'oo$', 'roo$', 'rykangaroo$', 'ykangaroo$']"
)
assert str(suffix_arr) >= (
"['$', 'angaroo$', 'angrykangaroo$', 'aroo$', 'garoo$', "
"'grykangaroo$', 'kangaroo$', 'ngaroo$', 'ngrykangaroo$', "
"'o$', 'oo$', 'roo$', 'rykangaroo$', 'ykangaroo$']"
)
assert str(suffix_arr) < (
"['$', 'angaroo$', 'angrykangaroo$', 'aroo$', 'garoo$', "
"'grykangaroo$', 'kangaroo$', 'ngaroo$', 'ngrykangaroo$', "
"'o$', 'oo$', 'roo$', 'rykangaroo$', 'ykangaroo$']"
)
assert str(suffix_arr) > (
"['$', 'angaroo$', 'angrykangaroo$', 'aroo$', 'garoo$', "
"'grykangaroo$', 'kangaroo$', 'ngaroo$', 'ngrykangaroo$', "
"'o$', 'oo$', 'roo$', 'rykangaroo$', 'ykangaroo$']"
)
assert str(suffix_arr) in "['$', 'angaroo$', 'angrykangaroo$', 'aroo$', 'garoo$', 'grykangaroo$', 'kangaroo$', 'ngaroo$', 'ngrykangaroo$', 'o$', 'oo$', 'roo$', 'rykangaroo$', 'ykangaroo$']"
assert str(suffix_arr) not in "['$', 'angaroo$', 'angrykangaroo$', 'aroo$', 'garoo$', 'grykangaroo$', 'kangaroo$', 'ngaroo$', 'ngrykangaroo$', 'o$', 'oo$', 'roo$', 'rykangaroo$', 'ykangaroo$']"
# output # output
@ -906,3 +940,53 @@ def _legacy_listen_examples():
"since": since, "since": since,
} }
) )
assert (
str(suffix_arr)
== "['$', 'angaroo$', 'angrykangaroo$', 'aroo$', 'garoo$', "
"'grykangaroo$', 'kangaroo$', 'ngaroo$', 'ngrykangaroo$', "
"'o$', 'oo$', 'roo$', 'rykangaroo$', 'ykangaroo$']"
)
assert (
str(suffix_arr)
!= "['$', 'angaroo$', 'angrykangaroo$', 'aroo$', 'garoo$', "
"'grykangaroo$', 'kangaroo$', 'ngaroo$', 'ngrykangaroo$', "
"'o$', 'oo$', 'roo$', 'rykangaroo$', 'ykangaroo$']"
)
assert (
str(suffix_arr)
<= "['$', 'angaroo$', 'angrykangaroo$', 'aroo$', 'garoo$', "
"'grykangaroo$', 'kangaroo$', 'ngaroo$', 'ngrykangaroo$', "
"'o$', 'oo$', 'roo$', 'rykangaroo$', 'ykangaroo$']"
)
assert (
str(suffix_arr)
>= "['$', 'angaroo$', 'angrykangaroo$', 'aroo$', 'garoo$', "
"'grykangaroo$', 'kangaroo$', 'ngaroo$', 'ngrykangaroo$', "
"'o$', 'oo$', 'roo$', 'rykangaroo$', 'ykangaroo$']"
)
assert (
str(suffix_arr)
< "['$', 'angaroo$', 'angrykangaroo$', 'aroo$', 'garoo$', "
"'grykangaroo$', 'kangaroo$', 'ngaroo$', 'ngrykangaroo$', "
"'o$', 'oo$', 'roo$', 'rykangaroo$', 'ykangaroo$']"
)
assert (
str(suffix_arr)
> "['$', 'angaroo$', 'angrykangaroo$', 'aroo$', 'garoo$', "
"'grykangaroo$', 'kangaroo$', 'ngaroo$', 'ngrykangaroo$', "
"'o$', 'oo$', 'roo$', 'rykangaroo$', 'ykangaroo$']"
)
assert (
str(suffix_arr)
in "['$', 'angaroo$', 'angrykangaroo$', 'aroo$', 'garoo$', 'grykangaroo$',"
" 'kangaroo$', 'ngaroo$', 'ngrykangaroo$', 'o$', 'oo$', 'roo$', 'rykangaroo$',"
" 'ykangaroo$']"
)
assert (
str(suffix_arr)
not in "['$', 'angaroo$', 'angrykangaroo$', 'aroo$', 'garoo$', 'grykangaroo$',"
" 'kangaroo$', 'ngaroo$', 'ngrykangaroo$', 'o$', 'oo$', 'roo$',"
" 'rykangaroo$', 'ykangaroo$']"
)