Permit standalone form feed characters at the module level (#4021)

Co-authored-by: Stephen Morton <git@tungol.org>
Co-authored-by: Jelle Zijlstra <jelle.zijlstra@gmail.com>
This commit is contained in:
tungol 2023-11-20 20:44:33 -08:00 committed by GitHub
parent ec4a1525ee
commit 89e28ea66f
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
11 changed files with 318 additions and 35 deletions

View File

@ -12,7 +12,7 @@
### Preview style ### Preview style
<!-- Changes that affect Black's preview style --> - Standalone form feed characters at the module level are no longer removed (#4021)
- Additional cases of immediately nested tuples, lists, and dictionaries are now - Additional cases of immediately nested tuples, lists, and dictionaries are now
indented less (#4012) indented less (#4012)

View File

@ -149,7 +149,7 @@ Utilities
.. autofunction:: black.numerics.normalize_numeric_literal .. autofunction:: black.numerics.normalize_numeric_literal
.. autofunction:: black.linegen.normalize_prefix .. autofunction:: black.comments.normalize_trailing_prefix
.. autofunction:: black.strings.normalize_string_prefix .. autofunction:: black.strings.normalize_string_prefix
@ -168,3 +168,5 @@ Utilities
.. autofunction:: black.strings.sub_twice .. autofunction:: black.strings.sub_twice
.. autofunction:: black.nodes.whitespace .. autofunction:: black.nodes.whitespace
.. autofunction:: black.nodes.make_simple_prefix

View File

@ -296,3 +296,14 @@ s = ( # Top comment
# Bottom comment # Bottom comment
) )
``` ```
=======
### Form feed characters
_Black_ will now retain form feed characters on otherwise empty lines at the module
level. Only one form feed is retained for a group of consecutive empty lines. Where
there are two empty lines in a row, the form feed will be placed on the second line.
_Black_ already retained form feed literals inside a comment or inside a string. This
remains the case.

View File

@ -10,6 +10,7 @@
WHITESPACE, WHITESPACE,
container_of, container_of,
first_leaf_of, first_leaf_of,
make_simple_prefix,
preceding_leaf, preceding_leaf,
syms, syms,
) )
@ -44,6 +45,7 @@ class ProtoComment:
value: str # content of the comment value: str # content of the comment
newlines: int # how many newlines before the comment newlines: int # how many newlines before the comment
consumed: int # how many characters of the original leaf's prefix did we consume consumed: int # how many characters of the original leaf's prefix did we consume
form_feed: bool # is there a form feed before the comment
def generate_comments(leaf: LN) -> Iterator[Leaf]: def generate_comments(leaf: LN) -> Iterator[Leaf]:
@ -65,8 +67,12 @@ def generate_comments(leaf: LN) -> Iterator[Leaf]:
Inline comments are emitted as regular token.COMMENT leaves. Standalone Inline comments are emitted as regular token.COMMENT leaves. Standalone
are emitted with a fake STANDALONE_COMMENT token identifier. are emitted with a fake STANDALONE_COMMENT token identifier.
""" """
total_consumed = 0
for pc in list_comments(leaf.prefix, is_endmarker=leaf.type == token.ENDMARKER): for pc in list_comments(leaf.prefix, is_endmarker=leaf.type == token.ENDMARKER):
yield Leaf(pc.type, pc.value, prefix="\n" * pc.newlines) total_consumed = pc.consumed
prefix = make_simple_prefix(pc.newlines, pc.form_feed)
yield Leaf(pc.type, pc.value, prefix=prefix)
normalize_trailing_prefix(leaf, total_consumed)
@lru_cache(maxsize=4096) @lru_cache(maxsize=4096)
@ -79,11 +85,14 @@ def list_comments(prefix: str, *, is_endmarker: bool) -> List[ProtoComment]:
consumed = 0 consumed = 0
nlines = 0 nlines = 0
ignored_lines = 0 ignored_lines = 0
for index, line in enumerate(re.split("\r?\n", prefix)): form_feed = False
consumed += len(line) + 1 # adding the length of the split '\n' for index, full_line in enumerate(re.split("\r?\n", prefix)):
line = line.lstrip() consumed += len(full_line) + 1 # adding the length of the split '\n'
line = full_line.lstrip()
if not line: if not line:
nlines += 1 nlines += 1
if "\f" in full_line:
form_feed = True
if not line.startswith("#"): if not line.startswith("#"):
# Escaped newlines outside of a comment are not really newlines at # Escaped newlines outside of a comment are not really newlines at
# all. We treat a single-line comment following an escaped newline # all. We treat a single-line comment following an escaped newline
@ -99,13 +108,33 @@ def list_comments(prefix: str, *, is_endmarker: bool) -> List[ProtoComment]:
comment = make_comment(line) comment = make_comment(line)
result.append( result.append(
ProtoComment( ProtoComment(
type=comment_type, value=comment, newlines=nlines, consumed=consumed type=comment_type,
value=comment,
newlines=nlines,
consumed=consumed,
form_feed=form_feed,
) )
) )
form_feed = False
nlines = 0 nlines = 0
return result return result
def normalize_trailing_prefix(leaf: LN, total_consumed: int) -> None:
"""Normalize the prefix that's left over after generating comments.
Note: don't use backslashes for formatting or you'll lose your voting rights.
"""
remainder = leaf.prefix[total_consumed:]
if "\\" not in remainder:
nl_count = remainder.count("\n")
form_feed = "\f" in remainder and remainder.endswith("\n")
leaf.prefix = make_simple_prefix(nl_count, form_feed)
return
leaf.prefix = ""
def make_comment(content: str) -> str: def make_comment(content: str) -> str:
"""Return a consistently formatted comment from the given `content` string. """Return a consistently formatted comment from the given `content` string.

View File

@ -149,7 +149,8 @@ def visit_default(self, node: LN) -> Iterator[Line]:
self.current_line.append(comment) self.current_line.append(comment)
yield from self.line() yield from self.line()
normalize_prefix(node, inside_brackets=any_open_brackets) if any_open_brackets:
node.prefix = ""
if self.mode.string_normalization and node.type == token.STRING: if self.mode.string_normalization and node.type == token.STRING:
node.value = normalize_string_prefix(node.value) node.value = normalize_string_prefix(node.value)
node.value = normalize_string_quotes(node.value) node.value = normalize_string_quotes(node.value)
@ -1035,8 +1036,6 @@ def bracket_split_build_line(
result.inside_brackets = True result.inside_brackets = True
result.depth += 1 result.depth += 1
if leaves: if leaves:
# Since body is a new indent level, remove spurious leading whitespace.
normalize_prefix(leaves[0], inside_brackets=True)
# Ensure a trailing comma for imports and standalone function arguments, but # Ensure a trailing comma for imports and standalone function arguments, but
# be careful not to add one after any comments or within type annotations. # be careful not to add one after any comments or within type annotations.
no_commas = ( no_commas = (
@ -1106,7 +1105,7 @@ def split_wrapper(
line: Line, features: Collection[Feature], mode: Mode line: Line, features: Collection[Feature], mode: Mode
) -> Iterator[Line]: ) -> Iterator[Line]:
for split_line in split_func(line, features, mode): for split_line in split_func(line, features, mode):
normalize_prefix(split_line.leaves[0], inside_brackets=True) split_line.leaves[0].prefix = ""
yield split_line yield split_line
return split_wrapper return split_wrapper
@ -1250,24 +1249,6 @@ def append_to_line(leaf: Leaf) -> Iterator[Line]:
yield current_line yield current_line
def normalize_prefix(leaf: Leaf, *, inside_brackets: bool) -> None:
"""Leave existing extra newlines if not `inside_brackets`. Remove everything
else.
Note: don't use backslashes for formatting or you'll lose your voting rights.
"""
if not inside_brackets:
spl = leaf.prefix.split("#")
if "\\" not in spl[0]:
nl_count = spl[-1].count("\n")
if len(spl) > 1:
nl_count -= 1
leaf.prefix = "\n" * nl_count
return
leaf.prefix = ""
def normalize_invisible_parens( # noqa: C901 def normalize_invisible_parens( # noqa: C901
node: Node, parens_after: Set[str], *, mode: Mode, features: Collection[Feature] node: Node, parens_after: Set[str], *, mode: Mode, features: Collection[Feature]
) -> None: ) -> None:

View File

@ -31,6 +31,7 @@
is_type_comment, is_type_comment,
is_type_ignore_comment, is_type_ignore_comment,
is_with_or_async_with_stmt, is_with_or_async_with_stmt,
make_simple_prefix,
replace_child, replace_child,
syms, syms,
whitespace, whitespace,
@ -520,12 +521,12 @@ class LinesBlock:
before: int = 0 before: int = 0
content_lines: List[str] = field(default_factory=list) content_lines: List[str] = field(default_factory=list)
after: int = 0 after: int = 0
form_feed: bool = False
def all_lines(self) -> List[str]: def all_lines(self) -> List[str]:
empty_line = str(Line(mode=self.mode)) empty_line = str(Line(mode=self.mode))
return ( prefix = make_simple_prefix(self.before, self.form_feed, empty_line)
[empty_line * self.before] + self.content_lines + [empty_line * self.after] return [prefix] + self.content_lines + [empty_line * self.after]
)
@dataclass @dataclass
@ -550,6 +551,12 @@ def maybe_empty_lines(self, current_line: Line) -> LinesBlock:
This is for separating `def`, `async def` and `class` with extra empty This is for separating `def`, `async def` and `class` with extra empty
lines (two on module-level). lines (two on module-level).
""" """
form_feed = (
Preview.allow_form_feeds in self.mode
and current_line.depth == 0
and bool(current_line.leaves)
and "\f\n" in current_line.leaves[0].prefix
)
before, after = self._maybe_empty_lines(current_line) before, after = self._maybe_empty_lines(current_line)
previous_after = self.previous_block.after if self.previous_block else 0 previous_after = self.previous_block.after if self.previous_block else 0
before = ( before = (
@ -575,6 +582,7 @@ def maybe_empty_lines(self, current_line: Line) -> LinesBlock:
original_line=current_line, original_line=current_line,
before=before, before=before,
after=after, after=after,
form_feed=form_feed,
) )
# Maintain the semantic_leading_comment state. # Maintain the semantic_leading_comment state.

View File

@ -194,6 +194,7 @@ class Preview(Enum):
allow_empty_first_line_before_new_block_or_comment = auto() allow_empty_first_line_before_new_block_or_comment = auto()
single_line_format_skip_with_multiple_comments = auto() single_line_format_skip_with_multiple_comments = auto()
long_case_block_line_splitting = auto() long_case_block_line_splitting = auto()
allow_form_feeds = auto()
class Deprecated(UserWarning): class Deprecated(UserWarning):

View File

@ -407,6 +407,13 @@ def whitespace(leaf: Leaf, *, complex_subscript: bool, mode: Mode) -> str: # no
return SPACE return SPACE
def make_simple_prefix(nl_count: int, form_feed: bool, empty_line: str = "\n") -> str:
"""Generate a normalized prefix string."""
if form_feed:
return (empty_line * (nl_count - 1)) + "\f" + empty_line
return empty_line * nl_count
def preceding_leaf(node: Optional[LN]) -> Optional[Leaf]: def preceding_leaf(node: Optional[LN]) -> Optional[Leaf]:
"""Return the first leaf that precedes `node`, if any.""" """Return the first leaf that precedes `node`, if any."""
while node: while node:

View File

@ -4,8 +4,9 @@
""" """
import json import json
import re
import tempfile import tempfile
from typing import Any, Optional from typing import Any, List, Optional
from click import echo, style from click import echo, style
from mypy_extensions import mypyc_attr from mypy_extensions import mypyc_attr
@ -55,12 +56,28 @@ def ipynb_diff(a: str, b: str, a_name: str, b_name: str) -> str:
return "".join(diff_lines) return "".join(diff_lines)
_line_pattern = re.compile(r"(.*?(?:\r\n|\n|\r|$))")
def _splitlines_no_ff(source: str) -> List[str]:
"""Split a string into lines ignoring form feed and other chars.
This mimics how the Python parser splits source code.
A simplified version of the function with the same name in Lib/ast.py
"""
result = [match[0] for match in _line_pattern.finditer(source)]
if result[-1] == "":
result.pop(-1)
return result
def diff(a: str, b: str, a_name: str, b_name: str) -> str: def diff(a: str, b: str, a_name: str, b_name: str) -> str:
"""Return a unified diff string between strings `a` and `b`.""" """Return a unified diff string between strings `a` and `b`."""
import difflib import difflib
a_lines = a.splitlines(keepends=True) a_lines = _splitlines_no_ff(a)
b_lines = b.splitlines(keepends=True) b_lines = _splitlines_no_ff(b)
diff_lines = [] diff_lines = []
for line in difflib.unified_diff( for line in difflib.unified_diff(
a_lines, b_lines, fromfile=a_name, tofile=b_name, n=5 a_lines, b_lines, fromfile=a_name, tofile=b_name, n=5

View File

@ -222,6 +222,8 @@ def _partially_consume_prefix(self, prefix: str, column: int) -> Tuple[str, str]
elif char == "\n": elif char == "\n":
# unexpected empty line # unexpected empty line
current_column = 0 current_column = 0
elif char == "\f":
current_column = 0
else: else:
# indent is finished # indent is finished
wait_for_nl = True wait_for_nl = True

View File

@ -0,0 +1,225 @@
# flags: --preview
# Warning! This file contains form feeds (ASCII 0x0C, often represented by \f or ^L).
# These may be invisible in your editor: ensure you can see them before making changes here.
# There's one at the start that'll get stripped
# Comment and statement processing is different enough that we'll test variations of both
# contexts here
#
#
#
#
#
#
#
#
#
#
\
#
pass
pass
pass
pass
pass
pass
pass
pass
pass
pass
pass
# form feed after a dedent
def foo():
pass
pass
# form feeds are prohibited inside blocks, or on a line with nonwhitespace
def bar( a = 1 ,b : bool = False ) :
pass
class Baz:
def __init__(self):
pass
def something(self):
pass
#
pass
pass #
a = 1
#
pass
a = 1
a = [
]
# as internal whitespace of a comment is allowed but why
"form feed literal in a string is okay "
# form feeds at the very end get removed.
# output
# Warning! This file contains form feeds (ASCII 0x0C, often represented by \f or ^L).
# These may be invisible in your editor: ensure you can see them before making changes here.
# There's one at the start that'll get stripped
# Comment and statement processing is different enough that we'll test variations of both
# contexts here
#
#
#
#
#
#
#
#
#
#
#
pass
pass
pass
pass
pass
pass
pass
pass
pass
pass
pass
# form feed after a dedent
def foo():
pass
pass
# form feeds are prohibited inside blocks, or on a line with nonwhitespace
def bar(a=1, b: bool = False):
pass
class Baz:
def __init__(self):
pass
def something(self):
pass
#
pass
pass #
a = 1
#
pass
a = 1
a = []
# as internal whitespace of a comment is allowed but why
"form feed literal in a string is okay "
# form feeds at the very end get removed.