Make --exclude only apply to recursively found files (#1591)

Ever since --force-exclude was added, --exclude started to touch files
that were given to Black through the CLI too. This is not documented
behaviour and neither expected as --exclude and --force-exclude now
behave the same!

Before this commit, get_sources() when encountering a file that was passed
explicitly through the CLI would pass a single Path object list to
gen_python_files(). This causes bad behaviour since that function
doesn't treat the exclude and force_exclude regexes differently. Which
is fine for recursively found files, but *not* for files given through
the CLI.

Now when get_sources() iterates through srcs and encounters
a file, it checks if the force_exclude regex matches, if not, then the
file will be added to the computed sources set.

A new function had to be created since before you can do regex matching,
the path must be normalized. The full process of normalizing the path is
somewhat long as there is special error handling. I didn't want to
duplicate this logic in get_sources() and gen_python_files() so that's
why there is a new helper function.
This commit is contained in:
Richard Si 2020-08-12 23:07:19 -04:00 committed by GitHub
parent 149b38d674
commit 97c11f22aa
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
2 changed files with 112 additions and 50 deletions

View File

@ -583,9 +583,7 @@ def get_sources(
root = find_project_root(src)
sources: Set[Path] = set()
path_empty(src, "No Path provided. Nothing to do 😴", quiet, verbose, ctx)
exclude_regexes = [exclude_regex]
if force_exclude_regex is not None:
exclude_regexes.append(force_exclude_regex)
gitignore = get_gitignore(root)
for s in src:
p = Path(s)
@ -595,19 +593,30 @@ def get_sources(
p.iterdir(),
root,
include_regex,
exclude_regexes,
exclude_regex,
force_exclude_regex,
report,
get_gitignore(root),
gitignore,
)
)
elif s == "-":
sources.add(p)
elif p.is_file():
sources.update(
gen_python_files(
[p], root, None, exclude_regexes, report, get_gitignore(root)
)
)
normalized_path = normalize_path_maybe_ignore(p, root, report)
if normalized_path is None:
continue
normalized_path = "/" + normalized_path
# Hard-exclude any files that matches the `--force-exclude` regex.
if force_exclude_regex:
force_exclude_match = force_exclude_regex.search(normalized_path)
else:
force_exclude_match = None
if force_exclude_match and force_exclude_match.group(0):
report.path_ignored(p, "matches the --force-exclude regular expression")
continue
sources.add(p)
else:
err(f"invalid path: {s}")
return sources
@ -5757,16 +5766,40 @@ def get_gitignore(root: Path) -> PathSpec:
return PathSpec.from_lines("gitwildmatch", lines)
def normalize_path_maybe_ignore(
path: Path, root: Path, report: "Report"
) -> Optional[str]:
"""Normalize `path`. May return `None` if `path` was ignored.
`report` is where "path ignored" output goes.
"""
try:
normalized_path = path.resolve().relative_to(root).as_posix()
except OSError as e:
report.path_ignored(path, f"cannot be read because {e}")
return None
except ValueError:
if path.is_symlink():
report.path_ignored(path, f"is a symbolic link that points outside {root}")
return None
raise
return normalized_path
def gen_python_files(
paths: Iterable[Path],
root: Path,
include: Optional[Pattern[str]],
exclude_regexes: Iterable[Pattern[str]],
exclude: Pattern[str],
force_exclude: Optional[Pattern[str]],
report: "Report",
gitignore: PathSpec,
) -> Iterator[Path]:
"""Generate all files under `path` whose paths are not excluded by the
`exclude` regex, but are included by the `include` regex.
`exclude_regex` or `force_exclude` regexes, but are included by the `include` regex.
Symbolic links pointing outside of the `root` directory are ignored.
@ -5774,43 +5807,41 @@ def gen_python_files(
"""
assert root.is_absolute(), f"INTERNAL ERROR: `root` must be absolute but is {root}"
for child in paths:
# Then ignore with `exclude` option.
try:
normalized_path = child.resolve().relative_to(root).as_posix()
except OSError as e:
report.path_ignored(child, f"cannot be read because {e}")
normalized_path = normalize_path_maybe_ignore(child, root, report)
if normalized_path is None:
continue
except ValueError:
if child.is_symlink():
report.path_ignored(
child, f"is a symbolic link that points outside {root}"
)
continue
raise
# First ignore files matching .gitignore
if gitignore.match_file(normalized_path):
report.path_ignored(child, "matches the .gitignore file content")
continue
# Then ignore with `--exclude` and `--force-exclude` options.
normalized_path = "/" + normalized_path
if child.is_dir():
normalized_path += "/"
is_excluded = False
for exclude in exclude_regexes:
exclude_match = exclude.search(normalized_path) if exclude else None
if exclude_match and exclude_match.group(0):
report.path_ignored(child, "matches the --exclude regular expression")
is_excluded = True
break
if is_excluded:
continue
force_exclude_match = (
force_exclude.search(normalized_path) if force_exclude else None
)
if force_exclude_match and force_exclude_match.group(0):
report.path_ignored(child, "matches the --force-exclude regular expression")
continue
if child.is_dir():
yield from gen_python_files(
child.iterdir(), root, include, exclude_regexes, report, gitignore
child.iterdir(),
root,
include,
exclude,
force_exclude,
report,
gitignore,
)
elif child.is_file():

View File

@ -110,6 +110,20 @@ def skip_if_exception(e: str) -> Iterator[None]:
raise
class FakeContext(click.Context):
"""A fake click Context for when calling functions that need it."""
def __init__(self) -> None:
self.default_map: Dict[str, Any] = {}
class FakeParameter(click.Parameter):
"""A fake click Parameter for when calling functions that need it."""
def __init__(self) -> None:
pass
class BlackRunner(CliRunner):
"""Modify CliRunner so that stderr is not merged with stdout.
@ -1551,7 +1565,32 @@ def test_include_exclude(self) -> None:
this_abs = THIS_DIR.resolve()
sources.extend(
black.gen_python_files(
path.iterdir(), this_abs, include, [exclude], report, gitignore
path.iterdir(), this_abs, include, exclude, None, report, gitignore
)
)
self.assertEqual(sorted(expected), sorted(sources))
@patch("black.find_project_root", lambda *args: THIS_DIR.resolve())
def test_exclude_for_issue_1572(self) -> None:
# Exclude shouldn't touch files that were explicitly given to Black through the
# CLI. Exclude is supposed to only apply to the recursive discovery of files.
# https://github.com/psf/black/issues/1572
path = THIS_DIR / "data" / "include_exclude_tests"
include = ""
exclude = r"/exclude/|a\.py"
src = str(path / "b/exclude/a.py")
report = black.Report()
expected = [Path(path / "b/exclude/a.py")]
sources = list(
black.get_sources(
ctx=FakeContext(),
src=(src,),
quiet=True,
verbose=False,
include=include,
exclude=exclude,
force_exclude=None,
report=report,
)
)
self.assertEqual(sorted(expected), sorted(sources))
@ -1572,7 +1611,7 @@ def test_gitignore_exclude(self) -> None:
this_abs = THIS_DIR.resolve()
sources.extend(
black.gen_python_files(
path.iterdir(), this_abs, include, [exclude], report, gitignore
path.iterdir(), this_abs, include, exclude, None, report, gitignore
)
)
self.assertEqual(sorted(expected), sorted(sources))
@ -1600,7 +1639,8 @@ def test_empty_include(self) -> None:
path.iterdir(),
this_abs,
empty,
[re.compile(black.DEFAULT_EXCLUDES)],
re.compile(black.DEFAULT_EXCLUDES),
None,
report,
gitignore,
)
@ -1627,7 +1667,8 @@ def test_empty_exclude(self) -> None:
path.iterdir(),
this_abs,
re.compile(black.DEFAULT_INCLUDES),
[empty],
empty,
None,
report,
gitignore,
)
@ -1684,7 +1725,7 @@ def test_symlink_out_of_root_directory(self) -> None:
try:
list(
black.gen_python_files(
path.iterdir(), root, include, exclude, report, gitignore
path.iterdir(), root, include, exclude, None, report, gitignore
)
)
except ValueError as ve:
@ -1698,7 +1739,7 @@ def test_symlink_out_of_root_directory(self) -> None:
with self.assertRaises(ValueError):
list(
black.gen_python_files(
path.iterdir(), root, include, exclude, report, gitignore
path.iterdir(), root, include, exclude, None, report, gitignore
)
)
path.iterdir.assert_called()
@ -1777,16 +1818,6 @@ def test_parse_pyproject_toml(self) -> None:
def test_read_pyproject_toml(self) -> None:
test_toml_file = THIS_DIR / "test.toml"
# Fake a click context and parameter so mypy stays happy
class FakeContext(click.Context):
def __init__(self) -> None:
self.default_map: Dict[str, Any] = {}
class FakeParameter(click.Parameter):
def __init__(self) -> None:
pass
fake_ctx = FakeContext()
black.read_pyproject_toml(
fake_ctx, FakeParameter(), str(test_toml_file),