speed up cache by approximately 42x by avoiding pathlib (#1953)

This commit is contained in:
Anthony Sottile 2021-02-04 13:03:42 -08:00 committed by GitHub
parent df4dd38a9a
commit 3fca540d05
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
3 changed files with 32 additions and 23 deletions

View File

@ -26,6 +26,8 @@
- use lowercase hex strings (#1692) - use lowercase hex strings (#1692)
- speed up caching by avoiding pathlib (#1950)
#### _Packaging_ #### _Packaging_
- Self-contained native _Black_ binaries are now provided for releases via GitHub - Self-contained native _Black_ binaries are now provided for releases via GitHub

View File

@ -93,7 +93,7 @@
Timestamp = float Timestamp = float
FileSize = int FileSize = int
CacheInfo = Tuple[Timestamp, FileSize] CacheInfo = Tuple[Timestamp, FileSize]
Cache = Dict[Path, CacheInfo] Cache = Dict[str, CacheInfo]
out = partial(click.secho, bold=True, err=True) out = partial(click.secho, bold=True, err=True)
err = partial(click.secho, fg="red", err=True) err = partial(click.secho, fg="red", err=True)
@ -724,7 +724,8 @@ def reformat_one(
if write_back not in (WriteBack.DIFF, WriteBack.COLOR_DIFF): if write_back not in (WriteBack.DIFF, WriteBack.COLOR_DIFF):
cache = read_cache(mode) cache = read_cache(mode)
res_src = src.resolve() res_src = src.resolve()
if res_src in cache and cache[res_src] == get_cache_info(res_src): res_src_s = str(res_src)
if res_src_s in cache and cache[res_src_s] == get_cache_info(res_src):
changed = Changed.CACHED changed = Changed.CACHED
if changed is not Changed.CACHED and format_file_in_place( if changed is not Changed.CACHED and format_file_in_place(
src, fast=fast, write_back=write_back, mode=mode src, fast=fast, write_back=write_back, mode=mode
@ -6781,8 +6782,8 @@ def filter_cached(cache: Cache, sources: Iterable[Path]) -> Tuple[Set[Path], Set
""" """
todo, done = set(), set() todo, done = set(), set()
for src in sources: for src in sources:
src = src.resolve() res_src = src.resolve()
if cache.get(src) != get_cache_info(src): if cache.get(str(res_src)) != get_cache_info(res_src):
todo.add(src) todo.add(src)
else: else:
done.add(src) done.add(src)
@ -6794,7 +6795,10 @@ def write_cache(cache: Cache, sources: Iterable[Path], mode: Mode) -> None:
cache_file = get_cache_file(mode) cache_file = get_cache_file(mode)
try: try:
CACHE_DIR.mkdir(parents=True, exist_ok=True) CACHE_DIR.mkdir(parents=True, exist_ok=True)
new_cache = {**cache, **{src.resolve(): get_cache_info(src) for src in sources}} new_cache = {
**cache,
**{str(src.resolve()): get_cache_info(src) for src in sources},
}
with tempfile.NamedTemporaryFile(dir=str(cache_file.parent), delete=False) as f: with tempfile.NamedTemporaryFile(dir=str(cache_file.parent), delete=False) as f:
pickle.dump(new_cache, f, protocol=4) pickle.dump(new_cache, f, protocol=4)
os.replace(f.name, cache_file) os.replace(f.name, cache_file)

View File

@ -1003,7 +1003,7 @@ def test_cache_broken_file(self) -> None:
fobj.write("print('hello')") fobj.write("print('hello')")
self.invokeBlack([str(src)]) self.invokeBlack([str(src)])
cache = black.read_cache(mode) cache = black.read_cache(mode)
self.assertIn(src, cache) self.assertIn(str(src), cache)
def test_cache_single_file_already_cached(self) -> None: def test_cache_single_file_already_cached(self) -> None:
mode = DEFAULT_MODE mode = DEFAULT_MODE
@ -1035,8 +1035,8 @@ def test_cache_multiple_files(self) -> None:
with two.open("r") as fobj: with two.open("r") as fobj:
self.assertEqual(fobj.read(), 'print("hello")\n') self.assertEqual(fobj.read(), 'print("hello")\n')
cache = black.read_cache(mode) cache = black.read_cache(mode)
self.assertIn(one, cache) self.assertIn(str(one), cache)
self.assertIn(two, cache) self.assertIn(str(two), cache)
def test_no_cache_when_writeback_diff(self) -> None: def test_no_cache_when_writeback_diff(self) -> None:
mode = DEFAULT_MODE mode = DEFAULT_MODE
@ -1116,8 +1116,8 @@ def test_write_cache_read_cache(self) -> None:
src.touch() src.touch()
black.write_cache({}, [src], mode) black.write_cache({}, [src], mode)
cache = black.read_cache(mode) cache = black.read_cache(mode)
self.assertIn(src, cache) self.assertIn(str(src), cache)
self.assertEqual(cache[src], black.get_cache_info(src)) self.assertEqual(cache[str(src)], black.get_cache_info(src))
def test_filter_cached(self) -> None: def test_filter_cached(self) -> None:
with TemporaryDirectory() as workspace: with TemporaryDirectory() as workspace:
@ -1128,7 +1128,10 @@ def test_filter_cached(self) -> None:
uncached.touch() uncached.touch()
cached.touch() cached.touch()
cached_but_changed.touch() cached_but_changed.touch()
cache = {cached: black.get_cache_info(cached), cached_but_changed: (0.0, 0)} cache = {
str(cached): black.get_cache_info(cached),
str(cached_but_changed): (0.0, 0),
}
todo, done = black.filter_cached( todo, done = black.filter_cached(
cache, {uncached, cached, cached_but_changed} cache, {uncached, cached, cached_but_changed}
) )
@ -1156,8 +1159,8 @@ def test_failed_formatting_does_not_get_cached(self) -> None:
fobj.write('print("hello")\n') fobj.write('print("hello")\n')
self.invokeBlack([str(workspace)], exit_code=123) self.invokeBlack([str(workspace)], exit_code=123)
cache = black.read_cache(mode) cache = black.read_cache(mode)
self.assertNotIn(failing, cache) self.assertNotIn(str(failing), cache)
self.assertIn(clean, cache) self.assertIn(str(clean), cache)
def test_write_cache_write_fail(self) -> None: def test_write_cache_write_fail(self) -> None:
mode = DEFAULT_MODE mode = DEFAULT_MODE
@ -1210,9 +1213,9 @@ def test_read_cache_line_lengths(self) -> None:
path.touch() path.touch()
black.write_cache({}, [path], mode) black.write_cache({}, [path], mode)
one = black.read_cache(mode) one = black.read_cache(mode)
self.assertIn(path, one) self.assertIn(str(path), one)
two = black.read_cache(short_mode) two = black.read_cache(short_mode)
self.assertNotIn(path, two) self.assertNotIn(str(path), two)
def test_single_file_force_pyi(self) -> None: def test_single_file_force_pyi(self) -> None:
pyi_mode = replace(DEFAULT_MODE, is_pyi=True) pyi_mode = replace(DEFAULT_MODE, is_pyi=True)
@ -1226,9 +1229,9 @@ def test_single_file_force_pyi(self) -> None:
actual = fh.read() actual = fh.read()
# verify cache with --pyi is separate # verify cache with --pyi is separate
pyi_cache = black.read_cache(pyi_mode) pyi_cache = black.read_cache(pyi_mode)
self.assertIn(path, pyi_cache) self.assertIn(str(path), pyi_cache)
normal_cache = black.read_cache(DEFAULT_MODE) normal_cache = black.read_cache(DEFAULT_MODE)
self.assertNotIn(path, normal_cache) self.assertNotIn(str(path), normal_cache)
self.assertFormatEqual(expected, actual) self.assertFormatEqual(expected, actual)
black.assert_equivalent(contents, actual) black.assert_equivalent(contents, actual)
black.assert_stable(contents, actual, pyi_mode) black.assert_stable(contents, actual, pyi_mode)
@ -1255,8 +1258,8 @@ def test_multi_file_force_pyi(self) -> None:
pyi_cache = black.read_cache(pyi_mode) pyi_cache = black.read_cache(pyi_mode)
normal_cache = black.read_cache(reg_mode) normal_cache = black.read_cache(reg_mode)
for path in paths: for path in paths:
self.assertIn(path, pyi_cache) self.assertIn(str(path), pyi_cache)
self.assertNotIn(path, normal_cache) self.assertNotIn(str(path), normal_cache)
def test_pipe_force_pyi(self) -> None: def test_pipe_force_pyi(self) -> None:
source, expected = read_data("force_pyi") source, expected = read_data("force_pyi")
@ -1280,9 +1283,9 @@ def test_single_file_force_py36(self) -> None:
actual = fh.read() actual = fh.read()
# verify cache with --target-version is separate # verify cache with --target-version is separate
py36_cache = black.read_cache(py36_mode) py36_cache = black.read_cache(py36_mode)
self.assertIn(path, py36_cache) self.assertIn(str(path), py36_cache)
normal_cache = black.read_cache(reg_mode) normal_cache = black.read_cache(reg_mode)
self.assertNotIn(path, normal_cache) self.assertNotIn(str(path), normal_cache)
self.assertEqual(actual, expected) self.assertEqual(actual, expected)
@event_loop() @event_loop()
@ -1307,8 +1310,8 @@ def test_multi_file_force_py36(self) -> None:
pyi_cache = black.read_cache(py36_mode) pyi_cache = black.read_cache(py36_mode)
normal_cache = black.read_cache(reg_mode) normal_cache = black.read_cache(reg_mode)
for path in paths: for path in paths:
self.assertIn(path, pyi_cache) self.assertIn(str(path), pyi_cache)
self.assertNotIn(path, normal_cache) self.assertNotIn(str(path), normal_cache)
def test_pipe_force_py36(self) -> None: def test_pipe_force_py36(self) -> None:
source, expected = read_data("force_py36") source, expected = read_data("force_py36")