Fix ignoring input files for symlink reasons (#4222)
This relates to #4015, #4161 and the behaviour of os.getcwd() Black is a big user of pathlib and as such loves doing `.resolve()`, since for a long time it was the only good way of getting an absolute path in pathlib. However, this has two problems: The first minor problem is performance, e.g. in #3751 I (safely) got rid of a bunch of `.resolve()` which made Black 40% faster on cached runs. The second more important problem is that always resolving symlinks results in unintuitive exclusion behaviour. For instance, a gitignored symlink should never alter formatting of your actual code. This kind of thing was reported by users a few times. In #3846, I improved the exclusion rule logic for symlinks in `gen_python_files` and everything was good. But `gen_python_files` isn't enough, there's also `get_sources`, which handles user specified paths directly (instead of files Black discovers). So in #4015, I made a very similar change to #3846 for `get_sources`, and this is where some problems began. The core issue was the line: ``` root_relative_path = path.absolute().relative_to(root).as_posix() ``` The first issue is that despite root being computed from user inputs, we call `.resolve()` while computing it (likely unecessarily). Which means that `path` may not actually be relative to `root`. So I started off this PR trying to fix that, when I ran into the second issue. Which is that `os.getcwd()` (as called by `os.path.abspath` or `Path.absolute` or `Path.cwd`) also often resolves symlinks! ``` >>> import os >>> os.environ.get("PWD") '/Users/shantanu/dev/black/symlink/bug' >>> os.getcwd() '/Users/shantanu/dev/black/actual/bug' ``` This also meant that the breakage often would not show up when input relative paths. This doesn't affect `gen_python_files` / #3846 because things are always absolute and known to be relative to `root`. Anyway, it looks like #4161 fixed the crash by just swallowing the error and ignoring the file. Instead, we should just try to compute the actual relative path. I think this PR should be quite safe, but we could also consider reverting some of the previous changes; the associated issues weren't too popular. At the same time, I think there's still behaviour that can be improved and I kind of want to make larger changes, but maybe I'll save that for if we do something like #3952 Hopefully fixes #4205, fixes #4209, actual fix for #4077
This commit is contained in:
parent
a20100395c
commit
23dfc5b2c3
@ -24,6 +24,7 @@
|
||||
|
||||
### Configuration
|
||||
|
||||
- Fix issue where _Black_ would ignore input files in the presence of symlinks (#4222)
|
||||
- _Black_ now ignores `pyproject.toml` that is missing a `tool.black` section when
|
||||
discovering project root and configuration. Since _Black_ continues to use version
|
||||
control as an indicator of project root, this is expected to primarily change behavior
|
||||
|
@ -44,12 +44,12 @@
|
||||
STDIN_PLACEHOLDER,
|
||||
)
|
||||
from black.files import (
|
||||
best_effort_relative_path,
|
||||
find_project_root,
|
||||
find_pyproject_toml,
|
||||
find_user_pyproject_toml,
|
||||
gen_python_files,
|
||||
get_gitignore,
|
||||
get_root_relative_path,
|
||||
parse_pyproject_toml,
|
||||
path_is_excluded,
|
||||
resolves_outside_root_or_cannot_stat,
|
||||
@ -734,6 +734,7 @@ def get_sources(
|
||||
"""Compute the set of files to be formatted."""
|
||||
sources: Set[Path] = set()
|
||||
|
||||
assert root.is_absolute(), f"INTERNAL ERROR: `root` must be absolute but is {root}"
|
||||
using_default_exclude = exclude is None
|
||||
exclude = re_compile_maybe_verbose(DEFAULT_EXCLUDES) if exclude is None else exclude
|
||||
gitignore: Optional[Dict[Path, PathSpec]] = None
|
||||
@ -749,11 +750,12 @@ def get_sources(
|
||||
|
||||
# Compare the logic here to the logic in `gen_python_files`.
|
||||
if is_stdin or path.is_file():
|
||||
root_relative_path = get_root_relative_path(path, root, report)
|
||||
|
||||
if root_relative_path is None:
|
||||
if resolves_outside_root_or_cannot_stat(path, root, report):
|
||||
if verbose:
|
||||
out(f'Skipping invalid source: "{path}"', fg="red")
|
||||
continue
|
||||
|
||||
root_relative_path = best_effort_relative_path(path, root).as_posix()
|
||||
root_relative_path = "/" + root_relative_path
|
||||
|
||||
# Hard-exclude any files that matches the `--force-exclude` regex.
|
||||
@ -763,11 +765,6 @@ def get_sources(
|
||||
)
|
||||
continue
|
||||
|
||||
if resolves_outside_root_or_cannot_stat(path, root, report):
|
||||
if verbose:
|
||||
out(f'Skipping invalid source: "{path}"', fg="red")
|
||||
continue
|
||||
|
||||
if is_stdin:
|
||||
path = Path(f"{STDIN_PLACEHOLDER}{str(path)}")
|
||||
|
||||
|
@ -48,6 +48,11 @@ def _load_toml(path: Union[Path, str]) -> Dict[str, Any]:
|
||||
return tomllib.load(f)
|
||||
|
||||
|
||||
@lru_cache
|
||||
def _cached_resolve(path: Path) -> Path:
|
||||
return path.resolve()
|
||||
|
||||
|
||||
@lru_cache
|
||||
def find_project_root(
|
||||
srcs: Sequence[str], stdin_filename: Optional[str] = None
|
||||
@ -67,9 +72,9 @@ def find_project_root(
|
||||
if stdin_filename is not None:
|
||||
srcs = tuple(stdin_filename if s == "-" else s for s in srcs)
|
||||
if not srcs:
|
||||
srcs = [str(Path.cwd().resolve())]
|
||||
srcs = [str(_cached_resolve(Path.cwd()))]
|
||||
|
||||
path_srcs = [Path(Path.cwd(), src).resolve() for src in srcs]
|
||||
path_srcs = [_cached_resolve(Path(Path.cwd(), src)) for src in srcs]
|
||||
|
||||
# A list of lists of parents for each 'src'. 'src' is included as a
|
||||
# "parent" of itself if it is a directory
|
||||
@ -236,7 +241,7 @@ def find_user_pyproject_toml() -> Path:
|
||||
else:
|
||||
config_root = os.environ.get("XDG_CONFIG_HOME", "~/.config")
|
||||
user_config_path = Path(config_root).expanduser() / "black"
|
||||
return user_config_path.resolve()
|
||||
return _cached_resolve(user_config_path)
|
||||
|
||||
|
||||
@lru_cache
|
||||
@ -266,27 +271,31 @@ def resolves_outside_root_or_cannot_stat(
|
||||
try:
|
||||
if sys.version_info < (3, 8, 6):
|
||||
path = path.absolute() # https://bugs.python.org/issue33660
|
||||
resolved_path = path.resolve()
|
||||
return get_root_relative_path(resolved_path, root, report) is None
|
||||
resolved_path = _cached_resolve(path)
|
||||
except OSError as e:
|
||||
if report:
|
||||
report.path_ignored(path, f"cannot be read because {e}")
|
||||
return True
|
||||
|
||||
|
||||
def get_root_relative_path(
|
||||
path: Path,
|
||||
root: Path,
|
||||
report: Optional[Report] = None,
|
||||
) -> Optional[str]:
|
||||
"""Returns the file path relative to the 'root' directory"""
|
||||
try:
|
||||
root_relative_path = path.absolute().relative_to(root).as_posix()
|
||||
resolved_path.relative_to(root)
|
||||
except ValueError:
|
||||
if report:
|
||||
report.path_ignored(path, f"is a symbolic link that points outside {root}")
|
||||
return None
|
||||
return root_relative_path
|
||||
return True
|
||||
return False
|
||||
|
||||
|
||||
def best_effort_relative_path(path: Path, root: Path) -> Path:
|
||||
# Precondition: resolves_outside_root_or_cannot_stat(path, root) is False
|
||||
try:
|
||||
return path.absolute().relative_to(root)
|
||||
except ValueError:
|
||||
pass
|
||||
root_parent = next((p for p in path.parents if _cached_resolve(p) == root), None)
|
||||
if root_parent is not None:
|
||||
return path.relative_to(root_parent)
|
||||
# something adversarial, fallback to path guaranteed by precondition
|
||||
return _cached_resolve(path).relative_to(root)
|
||||
|
||||
|
||||
def _path_is_ignored(
|
||||
@ -339,7 +348,8 @@ def gen_python_files(
|
||||
|
||||
assert root.is_absolute(), f"INTERNAL ERROR: `root` must be absolute but is {root}"
|
||||
for child in paths:
|
||||
root_relative_path = child.absolute().relative_to(root).as_posix()
|
||||
assert child.is_absolute()
|
||||
root_relative_path = child.relative_to(root).as_posix()
|
||||
|
||||
# First ignore files matching .gitignore, if passed
|
||||
if gitignore_dict and _path_is_ignored(
|
||||
|
@ -2567,32 +2567,32 @@ def test_symlinks(self) -> None:
|
||||
gitignore = PathSpec.from_lines("gitwildmatch", [])
|
||||
|
||||
regular = MagicMock()
|
||||
regular.absolute.return_value = root / "regular.py"
|
||||
regular.relative_to.return_value = Path("regular.py")
|
||||
regular.resolve.return_value = root / "regular.py"
|
||||
regular.is_dir.return_value = False
|
||||
regular.is_file.return_value = True
|
||||
|
||||
outside_root_symlink = MagicMock()
|
||||
outside_root_symlink.absolute.return_value = root / "symlink.py"
|
||||
outside_root_symlink.relative_to.return_value = Path("symlink.py")
|
||||
outside_root_symlink.resolve.return_value = Path("/nowhere")
|
||||
outside_root_symlink.is_dir.return_value = False
|
||||
outside_root_symlink.is_file.return_value = True
|
||||
|
||||
ignored_symlink = MagicMock()
|
||||
ignored_symlink.absolute.return_value = root / ".mypy_cache" / "symlink.py"
|
||||
ignored_symlink.relative_to.return_value = Path(".mypy_cache") / "symlink.py"
|
||||
ignored_symlink.is_dir.return_value = False
|
||||
ignored_symlink.is_file.return_value = True
|
||||
|
||||
# A symlink that has an excluded name, but points to an included name
|
||||
symlink_excluded_name = MagicMock()
|
||||
symlink_excluded_name.absolute.return_value = root / "excluded_name"
|
||||
symlink_excluded_name.relative_to.return_value = Path("excluded_name")
|
||||
symlink_excluded_name.resolve.return_value = root / "included_name.py"
|
||||
symlink_excluded_name.is_dir.return_value = False
|
||||
symlink_excluded_name.is_file.return_value = True
|
||||
|
||||
# A symlink that has an included name, but points to an excluded name
|
||||
symlink_included_name = MagicMock()
|
||||
symlink_included_name.absolute.return_value = root / "included_name.py"
|
||||
symlink_included_name.relative_to.return_value = Path("included_name.py")
|
||||
symlink_included_name.resolve.return_value = root / "excluded_name"
|
||||
symlink_included_name.is_dir.return_value = False
|
||||
symlink_included_name.is_file.return_value = True
|
||||
@ -2626,39 +2626,100 @@ def test_symlinks(self) -> None:
|
||||
outside_root_symlink.resolve.assert_called_once()
|
||||
ignored_symlink.resolve.assert_not_called()
|
||||
|
||||
def test_get_sources_symlink_and_force_exclude(self) -> None:
|
||||
with TemporaryDirectory() as tempdir:
|
||||
tmp = Path(tempdir).resolve()
|
||||
actual = tmp / "actual"
|
||||
actual.mkdir()
|
||||
symlink = tmp / "symlink"
|
||||
symlink.symlink_to(actual)
|
||||
|
||||
actual_proj = actual / "project"
|
||||
actual_proj.mkdir()
|
||||
(actual_proj / "module.py").write_text("print('hello')", encoding="utf-8")
|
||||
|
||||
symlink_proj = symlink / "project"
|
||||
|
||||
with change_directory(symlink_proj):
|
||||
assert_collected_sources(
|
||||
src=["module.py"],
|
||||
root=symlink_proj.resolve(),
|
||||
expected=["module.py"],
|
||||
)
|
||||
|
||||
absolute_module = symlink_proj / "module.py"
|
||||
assert_collected_sources(
|
||||
src=[absolute_module],
|
||||
root=symlink_proj.resolve(),
|
||||
expected=[absolute_module],
|
||||
)
|
||||
|
||||
# a few tricky tests for force_exclude
|
||||
flat_symlink = symlink_proj / "symlink_module.py"
|
||||
flat_symlink.symlink_to(actual_proj / "module.py")
|
||||
assert_collected_sources(
|
||||
src=[flat_symlink],
|
||||
root=symlink_proj.resolve(),
|
||||
force_exclude=r"/symlink_module.py",
|
||||
expected=[],
|
||||
)
|
||||
|
||||
target = actual_proj / "target"
|
||||
target.mkdir()
|
||||
(target / "another.py").write_text("print('hello')", encoding="utf-8")
|
||||
(symlink_proj / "nested").symlink_to(target)
|
||||
|
||||
assert_collected_sources(
|
||||
src=[symlink_proj / "nested" / "another.py"],
|
||||
root=symlink_proj.resolve(),
|
||||
force_exclude=r"nested",
|
||||
expected=[],
|
||||
)
|
||||
assert_collected_sources(
|
||||
src=[symlink_proj / "nested" / "another.py"],
|
||||
root=symlink_proj.resolve(),
|
||||
force_exclude=r"target",
|
||||
expected=[symlink_proj / "nested" / "another.py"],
|
||||
)
|
||||
|
||||
def test_get_sources_with_stdin_symlink_outside_root(
|
||||
self,
|
||||
) -> None:
|
||||
path = THIS_DIR / "data" / "include_exclude_tests"
|
||||
stdin_filename = str(path / "b/exclude/a.py")
|
||||
outside_root_symlink = Path("/target_directory/a.py")
|
||||
root = Path("target_dir/").resolve().absolute()
|
||||
with patch("pathlib.Path.resolve", return_value=outside_root_symlink):
|
||||
assert_collected_sources(
|
||||
root=Path("target_directory/"),
|
||||
root=root,
|
||||
src=["-"],
|
||||
expected=[],
|
||||
stdin_filename=stdin_filename,
|
||||
)
|
||||
|
||||
@patch("black.find_project_root", lambda *args: (THIS_DIR.resolve(), None))
|
||||
def test_get_sources_with_stdin(self) -> None:
|
||||
src = ["-"]
|
||||
expected = ["-"]
|
||||
assert_collected_sources(src, expected, include="", exclude=r"/exclude/|a\.py")
|
||||
assert_collected_sources(
|
||||
src,
|
||||
root=THIS_DIR.resolve(),
|
||||
expected=expected,
|
||||
include="",
|
||||
exclude=r"/exclude/|a\.py",
|
||||
)
|
||||
|
||||
@patch("black.find_project_root", lambda *args: (THIS_DIR.resolve(), None))
|
||||
def test_get_sources_with_stdin_filename(self) -> None:
|
||||
src = ["-"]
|
||||
stdin_filename = str(THIS_DIR / "data/collections.py")
|
||||
expected = [f"__BLACK_STDIN_FILENAME__{stdin_filename}"]
|
||||
assert_collected_sources(
|
||||
src,
|
||||
expected,
|
||||
root=THIS_DIR.resolve(),
|
||||
expected=expected,
|
||||
exclude=r"/exclude/a\.py",
|
||||
stdin_filename=stdin_filename,
|
||||
)
|
||||
|
||||
@patch("black.find_project_root", lambda *args: (THIS_DIR.resolve(), None))
|
||||
def test_get_sources_with_stdin_filename_and_exclude(self) -> None:
|
||||
# Exclude shouldn't exclude stdin_filename since it is mimicking the
|
||||
# file being passed directly. This is the same as
|
||||
@ -2669,12 +2730,12 @@ def test_get_sources_with_stdin_filename_and_exclude(self) -> None:
|
||||
expected = [f"__BLACK_STDIN_FILENAME__{stdin_filename}"]
|
||||
assert_collected_sources(
|
||||
src,
|
||||
expected,
|
||||
root=THIS_DIR.resolve(),
|
||||
expected=expected,
|
||||
exclude=r"/exclude/|a\.py",
|
||||
stdin_filename=stdin_filename,
|
||||
)
|
||||
|
||||
@patch("black.find_project_root", lambda *args: (THIS_DIR.resolve(), None))
|
||||
def test_get_sources_with_stdin_filename_and_extend_exclude(self) -> None:
|
||||
# Extend exclude shouldn't exclude stdin_filename since it is mimicking the
|
||||
# file being passed directly. This is the same as
|
||||
@ -2685,12 +2746,12 @@ def test_get_sources_with_stdin_filename_and_extend_exclude(self) -> None:
|
||||
expected = [f"__BLACK_STDIN_FILENAME__{stdin_filename}"]
|
||||
assert_collected_sources(
|
||||
src,
|
||||
expected,
|
||||
root=THIS_DIR.resolve(),
|
||||
expected=expected,
|
||||
extend_exclude=r"/exclude/|a\.py",
|
||||
stdin_filename=stdin_filename,
|
||||
)
|
||||
|
||||
@patch("black.find_project_root", lambda *args: (THIS_DIR.resolve(), None))
|
||||
def test_get_sources_with_stdin_filename_and_force_exclude(self) -> None:
|
||||
# Force exclude should exclude the file when passing it through
|
||||
# stdin_filename
|
||||
@ -2698,23 +2759,28 @@ def test_get_sources_with_stdin_filename_and_force_exclude(self) -> None:
|
||||
stdin_filename = str(path / "b/exclude/a.py")
|
||||
assert_collected_sources(
|
||||
src=["-"],
|
||||
root=THIS_DIR.resolve(),
|
||||
expected=[],
|
||||
force_exclude=r"/exclude/|a\.py",
|
||||
stdin_filename=stdin_filename,
|
||||
)
|
||||
|
||||
@patch("black.find_project_root", lambda *args: (THIS_DIR.resolve(), None))
|
||||
def test_get_sources_with_stdin_filename_and_force_exclude_and_symlink(
|
||||
self,
|
||||
) -> None:
|
||||
# Force exclude should exclude a symlink based on the symlink, not its target
|
||||
path = THIS_DIR / "data" / "include_exclude_tests"
|
||||
stdin_filename = str(path / "symlink.py")
|
||||
with TemporaryDirectory() as tempdir:
|
||||
tmp = Path(tempdir).resolve()
|
||||
(tmp / "exclude").mkdir()
|
||||
(tmp / "exclude" / "a.py").write_text("print('hello')", encoding="utf-8")
|
||||
(tmp / "symlink.py").symlink_to(tmp / "exclude" / "a.py")
|
||||
|
||||
stdin_filename = str(tmp / "symlink.py")
|
||||
expected = [f"__BLACK_STDIN_FILENAME__{stdin_filename}"]
|
||||
target = path / "b/exclude/a.py"
|
||||
with patch("pathlib.Path.resolve", return_value=target):
|
||||
with change_directory(tmp):
|
||||
assert_collected_sources(
|
||||
src=["-"],
|
||||
root=tmp,
|
||||
expected=expected,
|
||||
force_exclude=r"exclude/a\.py",
|
||||
stdin_filename=stdin_filename,
|
||||
|
Loading…
Reference in New Issue
Block a user