Re-indent the contents of docstrings (#1053)

* Re-indent the contents of docstrings when indentation changes

Keeping the contents of docstrings completely unchanged when
re-indenting (from 2-space intents to 4, for example) can cause
incorrect docstring indentation:

```
class MyClass:
  """Multiline
  class docstring
  """

  def method(self):
    """Multiline
    method docstring
    """
    pass
```
...becomes:
```
class MyClass:
    """Multiline
  class docstring
  """

    def method(self):
        """Multiline
    method docstring
    """
        pass
```

This uses the PEP 257 algorithm for determining docstring indentation,
and adjusts the contents of docstrings to match their new indentation
after `black` is applied.

A small normalization is necessary to `assert_equivalent` because the
trees are technically no longer precisely equivalent -- some constant
strings have changed.  When comparing two ASTs, whitespace after
newlines within constant strings is thus folded into a single space.

Co-authored-by: Luka Zakrajšek <luka@bancek.net>

* Extract the inner `_v` method to decrease complexity

This reduces the cyclomatic complexity to a level that makes flake8 happy.

* Blacken blib2to3's docstring which had an over-indent

Co-authored-by: Luka Zakrajšek <luka@bancek.net>
Co-authored-by: Zsolt Dollenstein <zsol.zsol@gmail.com>
This commit is contained in:
Alex Vandiver 2020-05-08 06:08:15 -07:00 committed by GitHub
parent 9938c92fd7
commit a4c11a75e1
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
4 changed files with 261 additions and 49 deletions

163
black.py
View File

@ -1955,6 +1955,18 @@ def visit_factor(self, node: Node) -> Iterator[Line]:
node.insert_child(index, Node(syms.atom, [lpar, operand, rpar]))
yield from self.visit_default(node)
def visit_STRING(self, leaf: Leaf) -> Iterator[Line]:
# Check if it's a docstring
if prev_siblings_are(
leaf.parent, [None, token.NEWLINE, token.INDENT, syms.simple_stmt]
) and is_multiline_string(leaf):
prefix = " " * self.current_line.depth
docstring = fix_docstring(leaf.value[3:-3], prefix)
leaf.value = leaf.value[0:3] + docstring + leaf.value[-3:]
normalize_string_quotes(leaf)
yield from self.visit_default(leaf)
def __post_init__(self) -> None:
"""You are in a twisty little maze of passages."""
v = self.visit_stmt
@ -2236,6 +2248,22 @@ def preceding_leaf(node: Optional[LN]) -> Optional[Leaf]:
return None
def prev_siblings_are(node: Optional[LN], tokens: List[Optional[NodeType]]) -> bool:
"""Return if the `node` and its previous siblings match types against the provided
list of tokens; the provided `node`has its type matched against the last element in
the list. `None` can be used as the first element to declare that the start of the
list is anchored at the start of its parent's children."""
if not tokens:
return True
if tokens[-1] is None:
return node is None
if not node:
return False
if node.type != tokens[-1]:
return False
return prev_siblings_are(node.prev_sibling, tokens[:-1])
def child_towards(ancestor: Node, descendant: LN) -> Optional[LN]:
"""Return the child of `ancestor` that contains `descendant`."""
node: Optional[LN] = descendant
@ -5804,54 +5832,66 @@ def _fixup_ast_constants(
return node
def _stringify_ast(
node: Union[ast.AST, ast3.AST, ast27.AST], depth: int = 0
) -> Iterator[str]:
"""Simple visitor generating strings to compare ASTs by content."""
node = _fixup_ast_constants(node)
yield f"{' ' * depth}{node.__class__.__name__}("
for field in sorted(node._fields): # noqa: F402
# TypeIgnore has only one field 'lineno' which breaks this comparison
type_ignore_classes = (ast3.TypeIgnore, ast27.TypeIgnore)
if sys.version_info >= (3, 8):
type_ignore_classes += (ast.TypeIgnore,)
if isinstance(node, type_ignore_classes):
break
try:
value = getattr(node, field)
except AttributeError:
continue
yield f"{' ' * (depth+1)}{field}="
if isinstance(value, list):
for item in value:
# Ignore nested tuples within del statements, because we may insert
# parentheses and they change the AST.
if (
field == "targets"
and isinstance(node, (ast.Delete, ast3.Delete, ast27.Delete))
and isinstance(item, (ast.Tuple, ast3.Tuple, ast27.Tuple))
):
for item in item.elts:
yield from _stringify_ast(item, depth + 2)
elif isinstance(item, (ast.AST, ast3.AST, ast27.AST)):
yield from _stringify_ast(item, depth + 2)
elif isinstance(value, (ast.AST, ast3.AST, ast27.AST)):
yield from _stringify_ast(value, depth + 2)
else:
# Constant strings may be indented across newlines, if they are
# docstrings; fold spaces after newlines when comparing
if (
isinstance(node, ast.Constant)
and field == "value"
and isinstance(value, str)
):
normalized = re.sub(r"\n[ \t]+", "\n ", value)
else:
normalized = value
yield f"{' ' * (depth+2)}{normalized!r}, # {value.__class__.__name__}"
yield f"{' ' * depth}) # /{node.__class__.__name__}"
def assert_equivalent(src: str, dst: str) -> None:
"""Raise AssertionError if `src` and `dst` aren't equivalent."""
def _v(node: Union[ast.AST, ast3.AST, ast27.AST], depth: int = 0) -> Iterator[str]:
"""Simple visitor generating strings to compare ASTs by content."""
node = _fixup_ast_constants(node)
yield f"{' ' * depth}{node.__class__.__name__}("
for field in sorted(node._fields): # noqa: F402
# TypeIgnore has only one field 'lineno' which breaks this comparison
type_ignore_classes = (ast3.TypeIgnore, ast27.TypeIgnore)
if sys.version_info >= (3, 8):
type_ignore_classes += (ast.TypeIgnore,)
if isinstance(node, type_ignore_classes):
break
try:
value = getattr(node, field)
except AttributeError:
continue
yield f"{' ' * (depth+1)}{field}="
if isinstance(value, list):
for item in value:
# Ignore nested tuples within del statements, because we may insert
# parentheses and they change the AST.
if (
field == "targets"
and isinstance(node, (ast.Delete, ast3.Delete, ast27.Delete))
and isinstance(item, (ast.Tuple, ast3.Tuple, ast27.Tuple))
):
for item in item.elts:
yield from _v(item, depth + 2)
elif isinstance(item, (ast.AST, ast3.AST, ast27.AST)):
yield from _v(item, depth + 2)
elif isinstance(value, (ast.AST, ast3.AST, ast27.AST)):
yield from _v(value, depth + 2)
else:
yield f"{' ' * (depth+2)}{value!r}, # {value.__class__.__name__}"
yield f"{' ' * depth}) # /{node.__class__.__name__}"
try:
src_ast = parse_ast(src)
except Exception as exc:
@ -5870,8 +5910,8 @@ def _v(node: Union[ast.AST, ast3.AST, ast27.AST], depth: int = 0) -> Iterator[st
f" helpful: {log}"
) from None
src_ast_str = "\n".join(_v(src_ast))
dst_ast_str = "\n".join(_v(dst_ast))
src_ast_str = "\n".join(_stringify_ast(src_ast))
dst_ast_str = "\n".join(_stringify_ast(dst_ast))
if src_ast_str != dst_ast_str:
log = dump_to_file(diff(src_ast_str, dst_ast_str, "src", "dst"))
raise AssertionError(
@ -6236,5 +6276,32 @@ def patched_main() -> None:
main()
def fix_docstring(docstring: str, prefix: str) -> str:
# https://www.python.org/dev/peps/pep-0257/#handling-docstring-indentation
if not docstring:
return ""
# Convert tabs to spaces (following the normal Python rules)
# and split into a list of lines:
lines = docstring.expandtabs().splitlines()
# Determine minimum indentation (first line doesn't count):
indent = sys.maxsize
for line in lines[1:]:
stripped = line.lstrip()
if stripped:
indent = min(indent, len(line) - len(stripped))
# Remove indentation (first line is special):
trimmed = [lines[0].strip()]
if indent < sys.maxsize:
last_line_idx = len(lines) - 2
for i, line in enumerate(lines[1:]):
stripped_line = line[indent:].rstrip()
if stripped_line or i == last_line_idx:
trimmed.append(prefix + stripped_line)
else:
trimmed.append("")
# Return a single string:
return "\n".join(trimmed)
if __name__ == "__main__":
patched_main()

View File

@ -962,7 +962,7 @@ def generate_matches(
(count, results) tuples where:
count: the entire sequence of patterns matches nodes[:count];
results: dict containing named submatches.
"""
"""
if not patterns:
yield 0, {}
else:

138
tests/data/docstring.py Normal file
View File

@ -0,0 +1,138 @@
class MyClass:
"""Multiline
class docstring
"""
def method(self):
"""Multiline
method docstring
"""
pass
def foo():
"""This is a docstring with
some lines of text here
"""
return
def bar():
'''This is another docstring
with more lines of text
'''
return
def baz():
'''"This" is a string with some
embedded "quotes"'''
return
def troz():
'''Indentation with tabs
is just as OK
'''
return
def zort():
"""Another
multiline
docstring
"""
pass
def poit():
"""
Lorem ipsum dolor sit amet.
Consectetur adipiscing elit:
- sed do eiusmod tempor incididunt ut labore
- dolore magna aliqua
- enim ad minim veniam
- quis nostrud exercitation ullamco laboris nisi
- aliquip ex ea commodo consequat
"""
pass
def over_indent():
"""
This has a shallow indent
- But some lines are deeper
- And the closing quote is too deep
"""
pass
# output
class MyClass:
"""Multiline
class docstring
"""
def method(self):
"""Multiline
method docstring
"""
pass
def foo():
"""This is a docstring with
some lines of text here
"""
return
def bar():
"""This is another docstring
with more lines of text
"""
return
def baz():
'''"This" is a string with some
embedded "quotes"'''
return
def troz():
"""Indentation with tabs
is just as OK
"""
return
def zort():
"""Another
multiline
docstring
"""
pass
def poit():
"""
Lorem ipsum dolor sit amet.
Consectetur adipiscing elit:
- sed do eiusmod tempor incididunt ut labore
- dolore magna aliqua
- enim ad minim veniam
- quis nostrud exercitation ullamco laboris nisi
- aliquip ex ea commodo consequat
"""
pass
def over_indent():
"""
This has a shallow indent
- But some lines are deeper
- And the closing quote is too deep
"""
pass

View File

@ -391,6 +391,13 @@ def test_string_quotes(self) -> None:
black.assert_stable(source, not_normalized, mode=mode)
@patch("black.dump_to_file", dump_to_stderr)
def test_docstring(self) -> None:
source, expected = read_data("docstring")
actual = fs(source)
self.assertFormatEqual(expected, actual)
black.assert_equivalent(source, actual)
black.assert_stable(source, actual, black.FileMode())
def test_long_strings(self) -> None:
"""Tests for splitting long strings."""
source, expected = read_data("long_strings")