Lazily import parallelized format modules
`black.reformat_many` depends on a lot of slow-to-import modules. When formatting simply a single file, the time paid to import those modules is totally wasted. So I moved `black.reformat_many` and its helpers to `black.concurrency` which is now *only* imported if there's more than one file to reformat. This way, running Black over a single file is snappier Here are the numbers before and after this patch running `python -m black --version`: - interpreted: 411 ms +- 9 ms -> 342 ms +- 7 ms: 1.20x faster - compiled: 365 ms +- 15 ms -> 304 ms +- 7 ms: 1.20x faster Co-authored-by: Fabio Zadrozny <fabiofz@gmail.com>
This commit is contained in:
parent
c47b91f513
commit
e269f44b25
@ -87,6 +87,8 @@
|
||||
|
||||
<!-- Changes that improve Black's performance. -->
|
||||
|
||||
- Reduce Black's startup time when formatting a single file by 15-30% (#3211)
|
||||
|
||||
## 22.6.0
|
||||
|
||||
### Style
|
||||
|
@ -52,7 +52,7 @@ Formatting
|
||||
|
||||
.. autofunction:: black.reformat_one
|
||||
|
||||
.. autofunction:: black.schedule_formatting
|
||||
.. autofunction:: black.concurrency.schedule_formatting
|
||||
|
||||
File operations
|
||||
---------------
|
||||
@ -173,7 +173,7 @@ Utilities
|
||||
|
||||
.. autofunction:: black.linegen.should_split_line
|
||||
|
||||
.. autofunction:: black.shutdown
|
||||
.. autofunction:: black.concurrency.shutdown
|
||||
|
||||
.. autofunction:: black.strings.sub_twice
|
||||
|
||||
|
@ -1,10 +1,8 @@
|
||||
import asyncio
|
||||
import io
|
||||
import json
|
||||
import os
|
||||
import platform
|
||||
import re
|
||||
import signal
|
||||
import sys
|
||||
import tokenize
|
||||
import traceback
|
||||
@ -13,10 +11,8 @@
|
||||
from datetime import datetime
|
||||
from enum import Enum
|
||||
from json.decoder import JSONDecodeError
|
||||
from multiprocessing import Manager, freeze_support
|
||||
from pathlib import Path
|
||||
from typing import (
|
||||
TYPE_CHECKING,
|
||||
Any,
|
||||
Dict,
|
||||
Generator,
|
||||
@ -32,15 +28,19 @@
|
||||
Union,
|
||||
)
|
||||
|
||||
if sys.version_info >= (3, 8):
|
||||
from typing import Final
|
||||
else:
|
||||
from typing_extensions import Final
|
||||
|
||||
import click
|
||||
from click.core import ParameterSource
|
||||
from mypy_extensions import mypyc_attr
|
||||
from pathspec.patterns.gitwildmatch import GitWildMatchPatternError
|
||||
|
||||
from _black_version import version as __version__
|
||||
from black.cache import Cache, filter_cached, get_cache_info, read_cache, write_cache
|
||||
from black.cache import Cache, get_cache_info, read_cache, write_cache
|
||||
from black.comments import normalize_fmt_off
|
||||
from black.concurrency import cancel, maybe_install_uvloop, shutdown
|
||||
from black.const import (
|
||||
DEFAULT_EXCLUDES,
|
||||
DEFAULT_INCLUDES,
|
||||
@ -91,10 +91,8 @@
|
||||
from blib2to3.pgen2 import token
|
||||
from blib2to3.pytree import Leaf, Node
|
||||
|
||||
if TYPE_CHECKING:
|
||||
from concurrent.futures import Executor
|
||||
|
||||
COMPILED = Path(__file__).suffix in (".pyd", ".so")
|
||||
DEFAULT_WORKERS: Final = os.cpu_count()
|
||||
|
||||
# types
|
||||
FileContent = str
|
||||
@ -125,8 +123,6 @@ def from_configuration(
|
||||
# Legacy name, left for integrations.
|
||||
FileMode = Mode
|
||||
|
||||
DEFAULT_WORKERS = os.cpu_count()
|
||||
|
||||
|
||||
def read_pyproject_toml(
|
||||
ctx: click.Context, param: click.Parameter, value: Optional[str]
|
||||
@ -592,6 +588,8 @@ def main( # noqa: C901
|
||||
report=report,
|
||||
)
|
||||
else:
|
||||
from black.concurrency import reformat_many
|
||||
|
||||
reformat_many(
|
||||
sources=sources,
|
||||
fast=fast,
|
||||
@ -776,132 +774,6 @@ def reformat_one(
|
||||
report.failed(src, str(exc))
|
||||
|
||||
|
||||
# diff-shades depends on being to monkeypatch this function to operate. I know it's
|
||||
# not ideal, but this shouldn't cause any issues ... hopefully. ~ichard26
|
||||
@mypyc_attr(patchable=True)
|
||||
def reformat_many(
|
||||
sources: Set[Path],
|
||||
fast: bool,
|
||||
write_back: WriteBack,
|
||||
mode: Mode,
|
||||
report: "Report",
|
||||
workers: Optional[int],
|
||||
) -> None:
|
||||
"""Reformat multiple files using a ProcessPoolExecutor."""
|
||||
from concurrent.futures import Executor, ProcessPoolExecutor, ThreadPoolExecutor
|
||||
|
||||
executor: Executor
|
||||
worker_count = workers if workers is not None else DEFAULT_WORKERS
|
||||
if sys.platform == "win32":
|
||||
# Work around https://bugs.python.org/issue26903
|
||||
assert worker_count is not None
|
||||
worker_count = min(worker_count, 60)
|
||||
try:
|
||||
executor = ProcessPoolExecutor(max_workers=worker_count)
|
||||
except (ImportError, NotImplementedError, OSError):
|
||||
# we arrive here if the underlying system does not support multi-processing
|
||||
# like in AWS Lambda or Termux, in which case we gracefully fallback to
|
||||
# a ThreadPoolExecutor with just a single worker (more workers would not do us
|
||||
# any good due to the Global Interpreter Lock)
|
||||
executor = ThreadPoolExecutor(max_workers=1)
|
||||
|
||||
loop = asyncio.new_event_loop()
|
||||
asyncio.set_event_loop(loop)
|
||||
try:
|
||||
loop.run_until_complete(
|
||||
schedule_formatting(
|
||||
sources=sources,
|
||||
fast=fast,
|
||||
write_back=write_back,
|
||||
mode=mode,
|
||||
report=report,
|
||||
loop=loop,
|
||||
executor=executor,
|
||||
)
|
||||
)
|
||||
finally:
|
||||
try:
|
||||
shutdown(loop)
|
||||
finally:
|
||||
asyncio.set_event_loop(None)
|
||||
if executor is not None:
|
||||
executor.shutdown()
|
||||
|
||||
|
||||
async def schedule_formatting(
|
||||
sources: Set[Path],
|
||||
fast: bool,
|
||||
write_back: WriteBack,
|
||||
mode: Mode,
|
||||
report: "Report",
|
||||
loop: asyncio.AbstractEventLoop,
|
||||
executor: "Executor",
|
||||
) -> None:
|
||||
"""Run formatting of `sources` in parallel using the provided `executor`.
|
||||
|
||||
(Use ProcessPoolExecutors for actual parallelism.)
|
||||
|
||||
`write_back`, `fast`, and `mode` options are passed to
|
||||
:func:`format_file_in_place`.
|
||||
"""
|
||||
cache: Cache = {}
|
||||
if write_back not in (WriteBack.DIFF, WriteBack.COLOR_DIFF):
|
||||
cache = read_cache(mode)
|
||||
sources, cached = filter_cached(cache, sources)
|
||||
for src in sorted(cached):
|
||||
report.done(src, Changed.CACHED)
|
||||
if not sources:
|
||||
return
|
||||
|
||||
cancelled = []
|
||||
sources_to_cache = []
|
||||
lock = None
|
||||
if write_back in (WriteBack.DIFF, WriteBack.COLOR_DIFF):
|
||||
# For diff output, we need locks to ensure we don't interleave output
|
||||
# from different processes.
|
||||
manager = Manager()
|
||||
lock = manager.Lock()
|
||||
tasks = {
|
||||
asyncio.ensure_future(
|
||||
loop.run_in_executor(
|
||||
executor, format_file_in_place, src, fast, mode, write_back, lock
|
||||
)
|
||||
): src
|
||||
for src in sorted(sources)
|
||||
}
|
||||
pending = tasks.keys()
|
||||
try:
|
||||
loop.add_signal_handler(signal.SIGINT, cancel, pending)
|
||||
loop.add_signal_handler(signal.SIGTERM, cancel, pending)
|
||||
except NotImplementedError:
|
||||
# There are no good alternatives for these on Windows.
|
||||
pass
|
||||
while pending:
|
||||
done, _ = await asyncio.wait(pending, return_when=asyncio.FIRST_COMPLETED)
|
||||
for task in done:
|
||||
src = tasks.pop(task)
|
||||
if task.cancelled():
|
||||
cancelled.append(task)
|
||||
elif task.exception():
|
||||
report.failed(src, str(task.exception()))
|
||||
else:
|
||||
changed = Changed.YES if task.result() else Changed.NO
|
||||
# If the file was written back or was successfully checked as
|
||||
# well-formatted, store this information in the cache.
|
||||
if write_back is WriteBack.YES or (
|
||||
write_back is WriteBack.CHECK and changed is Changed.NO
|
||||
):
|
||||
sources_to_cache.append(src)
|
||||
report.done(src, changed)
|
||||
if cancelled:
|
||||
if sys.version_info >= (3, 7):
|
||||
await asyncio.gather(*cancelled, return_exceptions=True)
|
||||
else:
|
||||
await asyncio.gather(*cancelled, loop=loop, return_exceptions=True)
|
||||
if sources_to_cache:
|
||||
write_cache(cache, sources_to_cache, mode)
|
||||
|
||||
|
||||
def format_file_in_place(
|
||||
src: Path,
|
||||
fast: bool,
|
||||
@ -1506,8 +1378,11 @@ def patch_click() -> None:
|
||||
|
||||
|
||||
def patched_main() -> None:
|
||||
maybe_install_uvloop()
|
||||
if sys.platform == "win32" and getattr(sys, "frozen", False):
|
||||
from multiprocessing import freeze_support
|
||||
|
||||
freeze_support()
|
||||
|
||||
patch_click()
|
||||
main()
|
||||
|
||||
|
@ -1,9 +1,25 @@
|
||||
"""
|
||||
Formatting many files at once via multiprocessing. Contains entrypoint and utilities.
|
||||
|
||||
NOTE: this module is only imported if we need to format several files at once.
|
||||
"""
|
||||
|
||||
import asyncio
|
||||
import logging
|
||||
import signal
|
||||
import sys
|
||||
from typing import Any, Iterable
|
||||
from concurrent.futures import Executor, ProcessPoolExecutor, ThreadPoolExecutor
|
||||
from multiprocessing import Manager
|
||||
from pathlib import Path
|
||||
from typing import Any, Iterable, Optional, Set
|
||||
|
||||
from mypy_extensions import mypyc_attr
|
||||
|
||||
from black import DEFAULT_WORKERS, WriteBack, format_file_in_place
|
||||
from black.cache import Cache, filter_cached, read_cache, write_cache
|
||||
from black.mode import Mode
|
||||
from black.output import err
|
||||
from black.report import Changed, Report
|
||||
|
||||
|
||||
def maybe_install_uvloop() -> None:
|
||||
@ -11,7 +27,6 @@ def maybe_install_uvloop() -> None:
|
||||
|
||||
This is called only from command-line entry points to avoid
|
||||
interfering with the parent process if Black is used as a library.
|
||||
|
||||
"""
|
||||
try:
|
||||
import uvloop
|
||||
@ -55,3 +70,129 @@ def shutdown(loop: asyncio.AbstractEventLoop) -> None:
|
||||
cf_logger = logging.getLogger("concurrent.futures")
|
||||
cf_logger.setLevel(logging.CRITICAL)
|
||||
loop.close()
|
||||
|
||||
|
||||
# diff-shades depends on being to monkeypatch this function to operate. I know it's
|
||||
# not ideal, but this shouldn't cause any issues ... hopefully. ~ichard26
|
||||
@mypyc_attr(patchable=True)
|
||||
def reformat_many(
|
||||
sources: Set[Path],
|
||||
fast: bool,
|
||||
write_back: WriteBack,
|
||||
mode: Mode,
|
||||
report: Report,
|
||||
workers: Optional[int],
|
||||
) -> None:
|
||||
"""Reformat multiple files using a ProcessPoolExecutor."""
|
||||
maybe_install_uvloop()
|
||||
|
||||
executor: Executor
|
||||
worker_count = workers if workers is not None else DEFAULT_WORKERS
|
||||
if sys.platform == "win32":
|
||||
# Work around https://bugs.python.org/issue26903
|
||||
assert worker_count is not None
|
||||
worker_count = min(worker_count, 60)
|
||||
try:
|
||||
executor = ProcessPoolExecutor(max_workers=worker_count)
|
||||
except (ImportError, NotImplementedError, OSError):
|
||||
# we arrive here if the underlying system does not support multi-processing
|
||||
# like in AWS Lambda or Termux, in which case we gracefully fallback to
|
||||
# a ThreadPoolExecutor with just a single worker (more workers would not do us
|
||||
# any good due to the Global Interpreter Lock)
|
||||
executor = ThreadPoolExecutor(max_workers=1)
|
||||
|
||||
loop = asyncio.new_event_loop()
|
||||
asyncio.set_event_loop(loop)
|
||||
try:
|
||||
loop.run_until_complete(
|
||||
schedule_formatting(
|
||||
sources=sources,
|
||||
fast=fast,
|
||||
write_back=write_back,
|
||||
mode=mode,
|
||||
report=report,
|
||||
loop=loop,
|
||||
executor=executor,
|
||||
)
|
||||
)
|
||||
finally:
|
||||
try:
|
||||
shutdown(loop)
|
||||
finally:
|
||||
asyncio.set_event_loop(None)
|
||||
if executor is not None:
|
||||
executor.shutdown()
|
||||
|
||||
|
||||
async def schedule_formatting(
|
||||
sources: Set[Path],
|
||||
fast: bool,
|
||||
write_back: WriteBack,
|
||||
mode: Mode,
|
||||
report: "Report",
|
||||
loop: asyncio.AbstractEventLoop,
|
||||
executor: "Executor",
|
||||
) -> None:
|
||||
"""Run formatting of `sources` in parallel using the provided `executor`.
|
||||
|
||||
(Use ProcessPoolExecutors for actual parallelism.)
|
||||
|
||||
`write_back`, `fast`, and `mode` options are passed to
|
||||
:func:`format_file_in_place`.
|
||||
"""
|
||||
cache: Cache = {}
|
||||
if write_back not in (WriteBack.DIFF, WriteBack.COLOR_DIFF):
|
||||
cache = read_cache(mode)
|
||||
sources, cached = filter_cached(cache, sources)
|
||||
for src in sorted(cached):
|
||||
report.done(src, Changed.CACHED)
|
||||
if not sources:
|
||||
return
|
||||
|
||||
cancelled = []
|
||||
sources_to_cache = []
|
||||
lock = None
|
||||
if write_back in (WriteBack.DIFF, WriteBack.COLOR_DIFF):
|
||||
# For diff output, we need locks to ensure we don't interleave output
|
||||
# from different processes.
|
||||
manager = Manager()
|
||||
lock = manager.Lock()
|
||||
tasks = {
|
||||
asyncio.ensure_future(
|
||||
loop.run_in_executor(
|
||||
executor, format_file_in_place, src, fast, mode, write_back, lock
|
||||
)
|
||||
): src
|
||||
for src in sorted(sources)
|
||||
}
|
||||
pending = tasks.keys()
|
||||
try:
|
||||
loop.add_signal_handler(signal.SIGINT, cancel, pending)
|
||||
loop.add_signal_handler(signal.SIGTERM, cancel, pending)
|
||||
except NotImplementedError:
|
||||
# There are no good alternatives for these on Windows.
|
||||
pass
|
||||
while pending:
|
||||
done, _ = await asyncio.wait(pending, return_when=asyncio.FIRST_COMPLETED)
|
||||
for task in done:
|
||||
src = tasks.pop(task)
|
||||
if task.cancelled():
|
||||
cancelled.append(task)
|
||||
elif task.exception():
|
||||
report.failed(src, str(task.exception()))
|
||||
else:
|
||||
changed = Changed.YES if task.result() else Changed.NO
|
||||
# If the file was written back or was successfully checked as
|
||||
# well-formatted, store this information in the cache.
|
||||
if write_back is WriteBack.YES or (
|
||||
write_back is WriteBack.CHECK and changed is Changed.NO
|
||||
):
|
||||
sources_to_cache.append(src)
|
||||
report.done(src, changed)
|
||||
if cancelled:
|
||||
if sys.version_info >= (3, 7):
|
||||
await asyncio.gather(*cancelled, return_exceptions=True)
|
||||
else:
|
||||
await asyncio.gather(*cancelled, loop=loop, return_exceptions=True)
|
||||
if sources_to_cache:
|
||||
write_cache(cache, sources_to_cache, mode)
|
||||
|
@ -1763,7 +1763,9 @@ def test_output_locking_when_writeback_diff(self, color: bool) -> None:
|
||||
src = (workspace / f"test{tag}.py").resolve()
|
||||
with src.open("w") as fobj:
|
||||
fobj.write("print('hello')")
|
||||
with patch("black.Manager", wraps=multiprocessing.Manager) as mgr:
|
||||
with patch(
|
||||
"black.concurrency.Manager", wraps=multiprocessing.Manager
|
||||
) as mgr:
|
||||
cmd = ["--diff", str(workspace)]
|
||||
if color:
|
||||
cmd.append("--color")
|
||||
@ -1810,7 +1812,7 @@ def test_filter_cached(self) -> None:
|
||||
str(cached): black.get_cache_info(cached),
|
||||
str(cached_but_changed): (0.0, 0),
|
||||
}
|
||||
todo, done = black.filter_cached(
|
||||
todo, done = black.cache.filter_cached(
|
||||
cache, {uncached, cached, cached_but_changed}
|
||||
)
|
||||
assert todo == {uncached, cached_but_changed}
|
||||
|
Loading…
Reference in New Issue
Block a user