Return NothingChanged if non-Python cell magic is detected, to avoid tokenize error (#2630)

Fixes https://github.com/psf/black/issues/2627 , a non-Python cell magic such as `%%writeline` can legitimately contain "incorrect" indentation, however this causes `tokenize-rt` to return an error. To avoid this, `validate_cell` should early detect cell magics (just like it detects `TransformerManager` transformations). Test added too, in the shape of a "badly indented" `%%writefile` within `test_non_python_magics`. Co-authored-by: Jelle Zijlstra <jelle.zijlstra@gmail.com> Co-authored-by: Marco Edward Gorelli <marcogorelli@protonmail.com>
2021-11-29 18:07:35 -05:00 · 2021-11-29 18:07:35 -05:00 · a066a2bc8b
commit a066a2bc8b
parent a18ee4018f
5 changed files with 21 additions and 18 deletions
--- a/CHANGES.md
+++ b/CHANGES.md
@ -4,6 +4,9 @@
 ### _Black_
 - Cell magics are now only processed if they are known Python cell magics. Earlier, all
  cell magics were tokenized, leading to possible indentation errors e.g. with
  `%%writefile`. (#2630)
 - Fixed Python 3.10 support on platforms without ProcessPoolExecutor (#2631)
 - Fixed `match` statements with open sequence subjects, like `match a, b:` (#2639)
 - Fixed assignment to environment variables in Jupyter Notebooks (#2642)
--- a/docs/faq.md
+++ b/docs/faq.md
@ -47,6 +47,7 @@ _Black_ is timid about formatting Jupyter Notebooks. Cells containing any of the
 following will not be formatted:
 - automagics (e.g. `pip install black`)
 - non-Python cell magics (e.g. `%%writeline`)
 - multiline magics, e.g.:
  ```python
--- a/src/black/init.py
+++ b/src/black/init.py
@ -57,6 +57,7 @@
    remove_trailing_semicolon,
    put_trailing_semicolon_back,
    TRANSFORMED_MAGICS,
    PYTHON_CELL_MAGICS,
    jupyter_dependencies_are_installed,
 )
@ -943,7 +944,9 @@ def format_file_contents(src_contents: str, *, fast: bool, mode: Mode) -> FileCo
 def validate_cell(src: str) -> None:
-    """Check that cell does not already contain TransformerManager transformations.
+    """Check that cell does not already contain TransformerManager transformations,
    or non-Python cell magics, which might cause tokenizer_rt to break because of
    indentations.
    If a cell contains ``!ls``, then it'll be transformed to
    ``get_ipython().system('ls')``. However, if the cell originally contained
@ -959,6 +962,8 @@ def validate_cell(src: str) -> None:
    """
    if any(transformed_magic in src for transformed_magic in TRANSFORMED_MAGICS):
        raise NothingChanged
    if src[:2] == "%%" and src.split()[0][2:] not in PYTHON_CELL_MAGICS:
        raise NothingChanged
 def format_cell(src: str, *, fast: bool, mode: Mode) -> str:
--- a/src/black/handle_ipynb_magics.py
+++ b/src/black/handle_ipynb_magics.py
@ -37,20 +37,15 @@
        "ESCAPED_NL",
    )
 )
-NON_PYTHON_CELL_MAGICS = frozenset(
+PYTHON_CELL_MAGICS = frozenset(
    (
-        "bash",
+        "capture",
-        "html",
+        "prun",
-        "javascript",
+        "pypy",
-        "js",
+        "python",
-        "latex",
+        "python3",
-        "markdown",
+        "time",
-        "perl",
+        "timeit",
        "ruby",
        "script",
        "sh",
        "svg",
        "writefile",
    )
 )
 TOKEN_HEX = secrets.token_hex
@ -230,8 +225,6 @@ def replace_cell_magics(src: str) -> Tuple[str, List[Replacement]]:
    cell_magic_finder.visit(tree)
    if cell_magic_finder.cell_magic is None:
        return src, replacements
    if cell_magic_finder.cell_magic.name in NON_PYTHON_CELL_MAGICS:
        raise NothingChanged
    header = cell_magic_finder.cell_magic.header
    mask = get_token(src, header)
    replacements.append(Replacement(mask=mask, src=header))
--- a/tests/test_ipynb.py
+++ b/tests/test_ipynb.py
@ -106,6 +106,7 @@ def test_magic(src: str, expected: str) -> None:
    (
        "%%bash\n2+2",
        "%%html --isolated\n2+2",
        "%%writefile e.txt\n  meh\n meh",
    ),
 )
 def test_non_python_magics(src: str) -> None:
@ -132,9 +133,9 @@ def test_magic_noop() -> None:
 def test_cell_magic_with_magic() -> None:
-    src = "%%t -n1\nls =!ls"
+    src = "%%timeit -n1\nls =!ls"
    result = format_cell(src, fast=True, mode=JUPYTER_MODE)
-    expected = "%%t -n1\nls = !ls"
+    expected = "%%timeit -n1\nls = !ls"
    assert result == expected