Make source handling use sets instead of lists

Also, sort cached file output to be (more) deterministic.
This commit is contained in:
Łukasz Langa 2018-06-04 12:50:24 -07:00
parent e5452a6b67
commit ed91dd4de6
2 changed files with 17 additions and 19 deletions

View File

@ -278,7 +278,7 @@ def main(
py36=py36, pyi=pyi, skip_string_normalization=skip_string_normalization py36=py36, pyi=pyi, skip_string_normalization=skip_string_normalization
) )
report = Report(check=check, quiet=quiet, verbose=verbose) report = Report(check=check, quiet=quiet, verbose=verbose)
sources: List[Path] = [] sources: Set[Path] = set()
try: try:
include_regex = re.compile(include) include_regex = re.compile(include)
except re.error: except re.error:
@ -293,12 +293,12 @@ def main(
for s in src: for s in src:
p = Path(s) p = Path(s)
if p.is_dir(): if p.is_dir():
sources.extend( sources.update(
gen_python_files_in_dir(p, root, include_regex, exclude_regex, report) gen_python_files_in_dir(p, root, include_regex, exclude_regex, report)
) )
elif p.is_file() or s == "-": elif p.is_file() or s == "-":
# if a file was explicitly given, we don't care about its extension # if a file was explicitly given, we don't care about its extension
sources.append(p) sources.add(p)
else: else:
err(f"invalid path: {s}") err(f"invalid path: {s}")
if len(sources) == 0: if len(sources) == 0:
@ -309,7 +309,7 @@ def main(
elif len(sources) == 1: elif len(sources) == 1:
reformat_one( reformat_one(
src=sources[0], src=sources.pop(),
line_length=line_length, line_length=line_length,
fast=fast, fast=fast,
write_back=write_back, write_back=write_back,
@ -384,7 +384,7 @@ def reformat_one(
async def schedule_formatting( async def schedule_formatting(
sources: List[Path], sources: Set[Path],
line_length: int, line_length: int,
fast: bool, fast: bool,
write_back: WriteBack, write_back: WriteBack,
@ -404,7 +404,7 @@ async def schedule_formatting(
if write_back != WriteBack.DIFF: if write_back != WriteBack.DIFF:
cache = read_cache(line_length, mode) cache = read_cache(line_length, mode)
sources, cached = filter_cached(cache, sources) sources, cached = filter_cached(cache, sources)
for src in cached: for src in sorted(cached):
report.done(src, Changed.CACHED) report.done(src, Changed.CACHED)
cancelled = [] cancelled = []
formatted = [] formatted = []
@ -3304,26 +3304,24 @@ def get_cache_info(path: Path) -> CacheInfo:
return stat.st_mtime, stat.st_size return stat.st_mtime, stat.st_size
def filter_cached( def filter_cached(cache: Cache, sources: Iterable[Path]) -> Tuple[Set[Path], Set[Path]]:
cache: Cache, sources: Iterable[Path] """Split an iterable of paths in `sources` into two sets.
) -> Tuple[List[Path], List[Path]]:
"""Split a list of paths into two.
The first list contains paths of files that modified on disk or are not in the The first contains paths of files that modified on disk or are not in the
cache. The other list contains paths to non-modified files. cache. The other contains paths to non-modified files.
""" """
todo, done = [], [] todo, done = set(), set()
for src in sources: for src in sources:
src = src.resolve() src = src.resolve()
if cache.get(src) != get_cache_info(src): if cache.get(src) != get_cache_info(src):
todo.append(src) todo.add(src)
else: else:
done.append(src) done.add(src)
return todo, done return todo, done
def write_cache( def write_cache(
cache: Cache, sources: List[Path], line_length: int, mode: FileMode cache: Cache, sources: Iterable[Path], line_length: int, mode: FileMode
) -> None: ) -> None:
"""Update the cache file.""" """Update the cache file."""
cache_file = get_cache_file(line_length, mode) cache_file = get_cache_file(line_length, mode)

View File

@ -856,10 +856,10 @@ def test_filter_cached(self) -> None:
cached_but_changed.touch() cached_but_changed.touch()
cache = {cached: black.get_cache_info(cached), cached_but_changed: (0.0, 0)} cache = {cached: black.get_cache_info(cached), cached_but_changed: (0.0, 0)}
todo, done = black.filter_cached( todo, done = black.filter_cached(
cache, [uncached, cached, cached_but_changed] cache, {uncached, cached, cached_but_changed}
) )
self.assertEqual(todo, [uncached, cached_but_changed]) self.assertEqual(todo, {uncached, cached_but_changed})
self.assertEqual(done, [cached]) self.assertEqual(done, {cached})
def test_write_cache_creates_directory_if_needed(self) -> None: def test_write_cache_creates_directory_if_needed(self) -> None:
mode = black.FileMode.AUTO_DETECT mode = black.FileMode.AUTO_DETECT