diff --git a/.gitignore b/.gitignore index 8a932add..ac985751 100644 --- a/.gitignore +++ b/.gitignore @@ -16,3 +16,4 @@ venv* *.cdx.json release_notes.txt .hypothesis +.claude/worktrees/ diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index 25e13c78..43c6291d 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -66,10 +66,9 @@ repos: name: vulture language: python entry: vulture - args: ["--min-confidence=80"] - types: [python] - files: ^dfetch/ - require_serial: true + args: ["--min-confidence=80", "dfetch/"] + pass_filenames: false + always_run: true - id: pyroma (package friendliness) name: Pyroma entry: pyroma diff --git a/CHANGELOG.rst b/CHANGELOG.rst index dc32139a..32c02622 100644 --- a/CHANGELOG.rst +++ b/CHANGELOG.rst @@ -1,7 +1,8 @@ Release 0.14.0 (unreleased) =========================== -* Strip ``user:password@`` userinfo from ``remote_url`` before writing it to ``.dfetch_data.yaml``, so a manifest URL with an embedded credential no longer leaks the credential into the committed metadata file (DFT-13) +* Respect the superproject's line-ending preference (#1206) +* Strip ``user:password@`` userinfo before storing metadata (#1206) * Warn when a project URL uses a plaintext transport scheme (#1229) * Documentation and threat-model clarifications for existing release attestation support (#1208) * Report SVN externals fetched during update (#1220) diff --git a/dfetch/commands/update.py b/dfetch/commands/update.py index 1abe63fa..294700a0 100644 --- a/dfetch/commands/update.py +++ b/dfetch/commands/update.py @@ -95,6 +95,7 @@ def _ignored(dst: str = destination) -> list[str]: dfetch.project.create_sub_project(project).update( force=args.force, ignored_files_callback=_ignored, + eol_preferences_callback=superproject.eol_preferences, ) if not args.no_recommendations and os.path.isdir( diff --git a/dfetch/commands/update_patch.py b/dfetch/commands/update_patch.py index 5eca48bc..5f4a9f06 100644 --- a/dfetch/commands/update_patch.py +++ b/dfetch/commands/update_patch.py @@ -126,6 +126,7 @@ def _ignored(dst: str = destination) -> list[str]: force=True, ignored_files_callback=_ignored, patch_count=len(subproject.patch) - 1, + eol_preferences_callback=superproject.eol_preferences, ) # generate reverse patch @@ -147,7 +148,10 @@ def _ignored(dst: str = destination) -> list[str]: # force update again to fetched version from metadata but with applying patch subproject.update( - force=True, ignored_files_callback=_ignored, patch_count=-1 + force=True, + ignored_files_callback=_ignored, + patch_count=-1, + eol_preferences_callback=superproject.eol_preferences, ) if exceptions: diff --git a/dfetch/project/archivesubproject.py b/dfetch/project/archivesubproject.py index 86336214..2c3004da 100644 --- a/dfetch/project/archivesubproject.py +++ b/dfetch/project/archivesubproject.py @@ -157,7 +157,9 @@ def wanted_version(self) -> Version: return Version(revision=self._project_entry.hash) return Version(revision=self.remote) - def _fetch_impl(self, version: Version) -> tuple[Version, list[Dependency]]: + def _fetch_impl( + self, version: Version, eol_hint: str | None = None + ) -> tuple[Version, list[Dependency]]: """Download and extract the archive to the local destination. 1. Download the archive to a temporary file. @@ -170,6 +172,7 @@ def _fetch_impl(self, version: Version) -> tuple[Version, list[Dependency]]: Returns: The version that was actually fetched (hash string or URL). """ + del eol_hint # archives are extracted byte-for-byte revision = version.revision pathlib.Path(self.local_path).mkdir(parents=True, exist_ok=True) diff --git a/dfetch/project/gitsubproject.py b/dfetch/project/gitsubproject.py index c66640ac..45f62b9b 100644 --- a/dfetch/project/gitsubproject.py +++ b/dfetch/project/gitsubproject.py @@ -1,7 +1,7 @@ """Git specific implementation.""" -import pathlib from functools import lru_cache +from pathlib import Path from dfetch.log import get_logger from dfetch.manifest.project import ProjectEntry @@ -67,12 +67,14 @@ def list_tool_info() -> None: ) SubProject._log_tool("git", "") - def _fetch_impl(self, version: Version) -> tuple[Version, list[Dependency]]: + def _fetch_impl( + self, version: Version, eol_hint: str | None = None + ) -> tuple[Version, list[Dependency]]: """Get the revision of the remote and place it at the local path.""" rev_or_branch_or_tag = self._determine_what_to_fetch(version) # When exporting a file, the destination directory must already exist - pathlib.Path(self.local_path).mkdir(parents=True, exist_ok=True) + Path(self.local_path).mkdir(parents=True, exist_ok=True) license_globs = [f"/{name.lower()}" for name in LICENSE_GLOBS] + [ f"/{name.upper()}" for name in LICENSE_GLOBS @@ -86,6 +88,7 @@ def _fetch_impl(self, version: Version) -> tuple[Version, list[Dependency]]: src=self.source, must_keeps=license_globs + [".gitmodules"], ignore=self.ignore, + eol=eol_hint, ) ) @@ -108,7 +111,7 @@ def _fetch_impl(self, version: Version) -> tuple[Version, list[Dependency]]: targets = {local_repo.METADATA_DIR, local_repo.GIT_MODULES_FILE} - for path in pathlib.Path(self.local_path).rglob("*"): + for path in Path(self.local_path).rglob("*"): if path.name in targets: safe_rm(path) diff --git a/dfetch/project/gitsuperproject.py b/dfetch/project/gitsuperproject.py index b46c439a..3d4343c1 100644 --- a/dfetch/project/gitsuperproject.py +++ b/dfetch/project/gitsuperproject.py @@ -72,6 +72,10 @@ def get_file_revision(self, path: str | pathlib.Path) -> str: """Get the revision of the given file.""" return str(self._repo.get_last_file_hash(str(path))) + def eol_preferences(self, paths: Sequence[str]) -> dict[str, str]: + """Get the line ending requested per path by this repo's gitattributes.""" + return self._repo.eol_attributes(paths) + @staticmethod def import_projects() -> Sequence[ProjectEntry]: """Import projects from underlying superproject.""" diff --git a/dfetch/project/subproject.py b/dfetch/project/subproject.py index 898c9543..9b750e17 100644 --- a/dfetch/project/subproject.py +++ b/dfetch/project/subproject.py @@ -92,6 +92,9 @@ def update( force: bool = False, ignored_files_callback: Callable[[], Sequence[str]] | None = None, patch_count: int = -1, + eol_preferences_callback: ( + Callable[[Sequence[str]], dict[str, str]] | None + ) = None, ) -> None: """Update this subproject if required. @@ -104,6 +107,10 @@ def update( hash). Calling it at both points ensures the stored hash and the check-time hash use the same skiplist, preventing false "local changes" reports. patch_count (int, optional): Number of patches to apply (-1 means all). + eol_preferences_callback (Callable, optional): Given a list of paths, returns + the line ending ("lf" or "crlf") the superproject requests per path (e.g. + from its gitattributes). Used to resolve the destination's preference, + which the VCS backend applies natively while fetching. """ to_fetch = self.update_is_required(force) @@ -131,7 +138,9 @@ def update( ): if warning := plaintext_warning(self.__project.remote_url): logger.print_warning_line(self.__project.name, warning) - actually_fetched, dependency = self._fetch_impl(to_fetch) + actually_fetched, dependency = self._fetch_impl( + to_fetch, self._destination_eol_hint(eol_preferences_callback) + ) self._log_project(f"Fetched {actually_fetched}") applied_patches = self._apply_patches(patch_count) @@ -153,6 +162,22 @@ def update( logger.debug(f"Writing repo metadata to: {self.__metadata.path}") self.__metadata.dump() + def _destination_eol_hint( + self, callback: Callable[[Sequence[str]], dict[str, str]] | None + ) -> str | None: + """Get the directory-wide line-ending preference for the destination, if any. + + Resolved with a hypothetical path inside the destination, so global and + directory-level rules can match. The VCS backend applies it natively + while fetching (git renormalisation, ``svn export --native-eol``). + """ + if not callback: + return None + exact = pathlib.PurePath(self.local_path).as_posix() + probe = f"{exact}/_" + result = callback([exact, probe]) + return result.get(probe) or result.get(exact) + def _apply_patches(self, count: int = -1) -> list[str]: """Apply the patches.""" cwd = pathlib.Path(".").resolve() @@ -429,8 +454,18 @@ def _are_there_local_changes(self, files_to_ignore: Sequence[str]) -> bool: ) @abstractmethod - def _fetch_impl(self, version: Version) -> tuple[Version, list[Dependency]]: - """Fetch the given version of the subproject, should be implemented by the child class.""" + def _fetch_impl( + self, version: Version, eol_hint: str | None = None + ) -> tuple[Version, list[Dependency]]: + """Fetch the given version of the subproject, should be implemented by the child class. + + Args: + version: The version to fetch. + eol_hint: Directory-wide line ending ("lf" or "crlf") the superproject + requests for the destination. Backends apply it natively while + fetching where the VCS supports it (git renormalisation, + ``svn export --native-eol``). + """ @abstractmethod def get_default_branch(self) -> str: diff --git a/dfetch/project/superproject.py b/dfetch/project/superproject.py index 4d5cce56..41dc655b 100644 --- a/dfetch/project/superproject.py +++ b/dfetch/project/superproject.py @@ -103,6 +103,15 @@ def _get_useremail_fallback(self) -> str: def get_file_revision(self, path: str | pathlib.Path) -> str: """Get the revision of the given file.""" + def eol_preferences(self, paths: Sequence[str]) -> dict[str, str]: + """Get the line ending ("lf" or "crlf") this project requests per path. + + Only paths with an explicit preference are returned. By default a + superproject has none; VCS-specific superprojects may override this. + """ + del paths # unused arg + return {} + @staticmethod @abstractmethod def import_projects() -> Sequence[ProjectEntry]: diff --git a/dfetch/project/svnsubproject.py b/dfetch/project/svnsubproject.py index e44ac070..e7e1bf48 100644 --- a/dfetch/project/svnsubproject.py +++ b/dfetch/project/svnsubproject.py @@ -113,10 +113,16 @@ def _remove_ignored_files(self) -> None: if not (file_or_dir.is_file() and is_license_file(file_or_dir.name)): safe_rm(file_or_dir) - def _fetch_impl(self, version: Version) -> tuple[Version, list[Dependency]]: + def _fetch_impl( + self, version: Version, eol_hint: str | None = None + ) -> tuple[Version, list[Dependency]]: """Get the revision of the remote and place it at the local path.""" branch, branch_path, revision = self._determine_what_to_fetch(version) + # Let svn itself produce the requested line endings; this applies to + # files marked 'svn:eol-style=native', other files keep their bytes. + native_eol = {"lf": "LF", "crlf": "CRLF"}.get(eol_hint or "", "") + complete_path = "/".join( filter(None, [self.remote, branch_path.strip(), self.source]) ).strip("/") @@ -128,7 +134,7 @@ def _fetch_impl(self, version: Version) -> tuple[Version, list[Dependency]]: complete_path, file_pattern = self._parse_file_pattern(complete_path) - SvnRepo.export(complete_path, revision, self.local_path) + SvnRepo.export(complete_path, revision, self.local_path, native_eol) if file_pattern: for file in find_non_matching_files(self.local_path, (file_pattern,)): @@ -147,7 +153,9 @@ def _fetch_impl(self, version: Version) -> tuple[Version, list[Dependency]]: if os.path.isdir(self.local_path) else os.path.dirname(self.local_path) ) - SvnRepo.export(f"{root_branch_path}/{license_files[0]}", revision, dest) + SvnRepo.export( + f"{root_branch_path}/{license_files[0]}", revision, dest, native_eol + ) if self.ignore: self._remove_ignored_files() diff --git a/dfetch/project/svnsuperproject.py b/dfetch/project/svnsuperproject.py index c3d708bb..e1f50744 100644 --- a/dfetch/project/svnsuperproject.py +++ b/dfetch/project/svnsuperproject.py @@ -44,6 +44,10 @@ def get_sub_project(self, project: ProjectEntry) -> SubProject | None: """Get the subproject in the same vcs type as the superproject.""" return SvnSubProject(project) + def eol_preferences(self, paths: Sequence[str]) -> dict[str, str]: + """Get the line ending requested per path by this repo's svn:auto-props.""" + return {path: eol for path in paths if (eol := self._repo.eol_style_for(path))} + def ignored_files(self, path: str) -> Sequence[str]: """Return a list of files that can be ignored in a given path.""" resolved_path = resolve_absolute_path(path) diff --git a/dfetch/util/cmdline.py b/dfetch/util/cmdline.py index b9a11a58..edecc4e1 100644 --- a/dfetch/util/cmdline.py +++ b/dfetch/util/cmdline.py @@ -40,12 +40,15 @@ def run_on_cmdline( logger: logging.Logger, cmd: list[str], env: Mapping[str, str] | None = None, + input_data: bytes | None = None, ) -> "subprocess.CompletedProcess[Any]": """Run a command and log the output, and raise if something goes wrong.""" logger.debug(f"Running {cmd}") try: - proc = subprocess.run(cmd, env=env, capture_output=True, check=True) # nosec + proc = subprocess.run( # nosec + cmd, env=env, input=input_data, capture_output=True, check=True + ) except subprocess.CalledProcessError as exc: raise SubprocessCommandError( exc.cmd, diff --git a/dfetch/vcs/git.py b/dfetch/vcs/git.py index 0bc762b4..1cd3bd18 100644 --- a/dfetch/vcs/git.py +++ b/dfetch/vcs/git.py @@ -319,6 +319,24 @@ def check_version_exists( return exists +def _parse_eol_attributes(output: str) -> dict[str, str]: + """Parse ``git check-attr -z text eol`` output into a path-to-eol mapping. + + The ``-z`` output is a flat sequence of NUL-separated + `` `` records. Only paths with an effective + ``eol`` of ``lf`` or ``crlf`` that are not marked ``-text`` are returned. + """ + attrs: dict[str, dict[str, str]] = {} + fields = output.split("\0") + for path, name, value in zip(fields[0::3], fields[1::3], fields[2::3]): + attrs.setdefault(path, {})[name] = value + return { + path: values["eol"] + for path, values in attrs.items() + if values.get("eol") in ("lf", "crlf") and values.get("text") != "unset" + } + + class GitLocalRepo: """A git repository.""" @@ -348,6 +366,78 @@ def is_git(self) -> bool: except (SubprocessCommandError, RuntimeError): return False + def eol_attributes(self, paths: Sequence[str]) -> dict[str, str]: + """Resolve the effective 'eol' gitattribute for each given path. + + Args: + paths: Paths relative to this repository's root. + + Returns: + A mapping of path to ``"lf"`` or ``"crlf"`` for every path whose + attributes request a specific line ending. Paths marked ``-text`` + and paths without an ``eol`` attribute are omitted. Empty when + git is unavailable or this is not a git repository. + """ + if not paths: + return {} + with in_directory(self._path): + try: + result = run_on_cmdline( + logger, + ["git", "check-attr", "-z", "--stdin", "text", "eol"], + input_data="\0".join(paths).encode() + b"\0", + ) + except (SubprocessCommandError, RuntimeError): + return {} + return _parse_eol_attributes(result.stdout.decode()) + + def _configure_eol(self, eol: str) -> None: + """Write the line-ending policy into the local repo before fetch. + + Enables text=auto detection via .git/info/attributes so git treats + text files consistently, then sets core.eol as the default checkout + ending. Deliberately omits eol= from info/attributes so that + per-file attributes in the remote's .gitattributes (e.g. *.bat + eol=crlf) take precedence over the superproject's blanket preference. + """ + if eol not in ("lf", "crlf"): + raise ValueError(f"Invalid eol value {eol!r}: must be 'lf' or 'crlf'") + info_dir = Path(".git") / "info" + info_dir.mkdir(exist_ok=True) + (info_dir / "attributes").write_text("* text=auto\n", encoding="utf-8") + run_on_cmdline(logger, ["git", "config", "core.eol", eol]) + run_on_cmdline(logger, ["git", "config", "core.autocrlf", "false"]) + + def _renormalize_eol(self) -> None: + """Rewrite the working tree so the configured eol attribute takes effect. + + ``git reset --hard`` only converts LF blobs to CRLF on the way out; + blobs stored with CRLF are left untouched. Renormalize the index and + check the files out again so git itself rewrites them with the + requested endings. Only files already present in the working tree are + written, keeping sparse checkouts sparse. + """ + run_on_cmdline(logger, ["git", "add", "--renormalize", "."]) + changed = ( + run_on_cmdline(logger, ["git", "diff", "--cached", "--name-only"]) + .stdout.decode() + .strip() + ) + if not changed: + return + tracked = run_on_cmdline(logger, ["git", "ls-files", "-z"]).stdout.decode() + on_disk = [path for path in tracked.split("\0") if os.path.isfile(path)] + if not on_disk: + return + tree = run_on_cmdline(logger, ["git", "write-tree"]).stdout.decode().strip() + # read-tree drops the index stat-cache, so checkout-index rewrites files + run_on_cmdline(logger, ["git", "read-tree", tree]) + run_on_cmdline( + logger, + ["git", "checkout-index", "-f", "-z", "--stdin"], + input_data="\0".join(on_disk).encode() + b"\0", + ) + def _configure_sparse_checkout( self, src: str | None, @@ -384,6 +474,9 @@ def checkout_version( options.src, options.must_keeps or [], options.ignore ) + if options.eol is not None: + self._configure_eol(options.eol) + run_on_cmdline( logger, ["git", "fetch", "--depth", "1", "origin", options.version], @@ -391,6 +484,9 @@ def checkout_version( ) run_on_cmdline(logger, ["git", "reset", "--hard", "FETCH_HEAD"]) + if options.eol is not None: + self._renormalize_eol() + run_on_cmdline( logger, ["git", "submodule", "update", "--init", "--recursive"], diff --git a/dfetch/vcs/git_types.py b/dfetch/vcs/git_types.py index 188e05dc..9b517d0e 100644 --- a/dfetch/vcs/git_types.py +++ b/dfetch/vcs/git_types.py @@ -26,3 +26,4 @@ class CheckoutOptions: src: str | None = None must_keeps: Sequence[str] | None = None ignore: Sequence[str] | None = None + eol: str | None = None diff --git a/dfetch/vcs/svn.py b/dfetch/vcs/svn.py index df563d9f..de7deed1 100644 --- a/dfetch/vcs/svn.py +++ b/dfetch/vcs/svn.py @@ -1,9 +1,11 @@ """Svn repository.""" import contextlib +import fnmatch import functools import os import pathlib +import posixpath import re from collections.abc import Callable, Generator, Mapping, Sequence from pathlib import Path @@ -95,6 +97,46 @@ def get_svn_version() -> tuple[str, str]: return (str(tool), str(version)) +def _self_and_ancestors(directory: str) -> Generator[str, None, None]: + """Yield *directory* and each of its ancestors, ending at ".". + + Args: + directory: A relative directory path using forward slashes. + """ + current = directory or "." + while current not in ("", "/", "."): + yield current + current = posixpath.dirname(current) + yield "." + + +def _match_auto_props_eol_style(props: str, filename: str) -> str | None: + """Find the ``svn:eol-style`` the given auto-props text requests for *filename*. + + Args: + props: ``svn propget svn:auto-props --show-inherited-props`` output: + pattern lines such as ``*.c = svn:eol-style=LF;svn:keywords=Id``, + where the first line of a block can be prefixed with `` -``. + filename: The file name to match the patterns against. + + Returns: + The value of the last matching pattern, mirroring how deeper + directories override shallower ones, or None if nothing matches. + """ + style = None + for line in props.splitlines(): + if " - " in line: + line = line.split(" - ", 1)[1] + pattern, _, values = line.partition("=") + if not values or not fnmatch.fnmatch(filename, pattern.strip()): + continue + for prop in values.split(";"): + name, _, value = prop.partition("=") + if name.strip() == "svn:eol-style": + style = value.strip() + return style + + class External(NamedTuple): """Information about a svn external.""" @@ -224,6 +266,46 @@ def is_svn(self) -> bool: except (SubprocessCommandError, RuntimeError): return False + def eol_style_for(self, path: str) -> str | None: + """Resolve the line ending ``svn:auto-props`` requests for *path*, if any. + + Reads the ``svn:auto-props`` property (own and inherited) of the + deepest existing versioned ancestor of *path* and matches its patterns + against the file name — mirroring how svn applies auto-props to newly + added files. + + Args: + path: A path relative to this repository's root. + + Returns: + ``"lf"`` or ``"crlf"``, or None when no preference applies. + """ + props = self._inherited_auto_props(posixpath.dirname(path)) + style = _match_auto_props_eol_style(props, posixpath.basename(path)) + return {"LF": "lf", "CRLF": "crlf"}.get(style or "") + + def _inherited_auto_props(self, directory: str) -> str: + """Get auto-props of the deepest existing versioned ancestor of *directory*.""" + with in_directory(self._path): + for candidate in _self_and_ancestors(directory): + if not os.path.isdir(candidate): + continue + try: + result = _run_svn_raw( + [ + "propget", + "svn:auto-props", + "--show-inherited-props", + candidate, + ] + ) + except SshHostKeyError: + raise + except (SubprocessCommandError, RuntimeError): + continue + return result.decode() + return "" + def externals(self) -> list[External]: """Get list of externals.""" with in_directory(self._path): @@ -400,7 +482,7 @@ def untracked_files(path: str, ignore: Sequence[str]) -> list[str]: return files @staticmethod - def export(url: str, rev: str = "", dst: str = ".") -> None: + def export(url: str, rev: str = "", dst: str = ".", native_eol: str = "") -> None: """Export the given revision from url to destination. Args: @@ -408,14 +490,22 @@ def export(url: str, rev: str = "", dst: str = ".") -> None: rev: Bare revision number (digits only) or empty string for HEAD. Must not include flag names such as ``--revision``. dst: Local destination path. + native_eol: Line ending ("LF" or "CRLF") to use for files with the + ``svn:eol-style=native`` property, or empty for the platform default. Raises: - ValueError: If *rev* is non-empty and contains non-digit characters. + ValueError: If *rev* is non-empty and contains non-digit characters, + or *native_eol* is not one of "", "LF" or "CRLF". """ if rev and not rev.isdigit(): raise ValueError(f"SVN revision must be digits only, got: {rev!r}") + if native_eol not in ("", "LF", "CRLF"): + raise ValueError(f"SVN native-eol must be LF or CRLF, got: {native_eol!r}") _run_svn( - ["export", "--force"] + (["--revision", rev] if rev else []) + [url, dst], + ["export", "--force"] + + (["--revision", rev] if rev else []) + + (["--native-eol", native_eol] if native_eol else []) + + [url, dst], url=url, ) diff --git a/doc/conf.py b/doc/conf.py index 0e181dae..04a4cfb3 100644 --- a/doc/conf.py +++ b/doc/conf.py @@ -125,6 +125,7 @@ "autosectionlabel.howto/updating-projects", "autosectionlabel.explanation/threat_model_supply_chain", "autosectionlabel.explanation/threat_model_usage", + "autosectionlabel.explanation/line-endings", ] # Options for sphinx-autoissues diff --git a/doc/explanation/line-endings.rst b/doc/explanation/line-endings.rst new file mode 100644 index 00000000..901c5ca0 --- /dev/null +++ b/doc/explanation/line-endings.rst @@ -0,0 +1,113 @@ + + +.. _line-endings: + +Line Endings +============ + +Line endings affect the on-disk bytes of every text file. For vendored code +this matters in two ways: tools and compilers on Windows may require CRLF, and +*Dfetch* hashes the directory after fetching to detect local modifications. A +file with LF endings has a different hash from the same file with CRLF endings, +so fixing the line-ending style at fetch time prevents ``dfetch check`` from +reporting false local modifications. + + +Where the preference comes from +-------------------------------- + +*Dfetch* reads the line-ending preference from the **superproject** that owns +the manifest. There is no separate dfetch setting. + +.. list-table:: + :header-rows: 1 + :widths: 25 75 + + * - Superproject type + - Where the preference comes from + * - Git + - ``.gitattributes`` in the superproject root, + e.g. ``* text=auto eol=lf`` + * - SVN + - ``svn:auto-props`` on the superproject working copy, + e.g. ``* = svn:eol-style=LF`` + +The preference is resolved for a hypothetical file inside the dependency's +destination directory, so directory-level rules such as +``vendor/mylib/* text=auto eol=lf`` are picked up correctly. + +If no preference is found, *Dfetch* leaves line endings as the remote provides +them. + + +What happens on fetch +---------------------- + +The table below shows what ends up on disk for each combination of remote +content and superproject preference. + +.. list-table:: + :header-rows: 1 + :widths: 25 30 45 + + * - Remote has + - Superproject wants + - Result on disk + * - LF + - lf + - LF (no conversion needed) + * - LF + - crlf + - CRLF (converted during checkout) + * - CRLF + - lf + - LF (renormalized after checkout) + * - CRLF + - crlf + - CRLF (no conversion needed) + * - CRLF in ``*.bat`` with remote ``*.bat eol=crlf`` + - lf + - CRLF (remote per-file attribute wins, see below) + * - any + - (none) + - unchanged + + +Remote per-file attributes take precedence +------------------------------------------- + +The superproject preference is a default, not an override. If the remote +declares per-file line-ending rules in its own ``.gitattributes``, those rules +win for the files they cover. + +A common case is Windows batch files. A remote might have:: + + # .gitattributes in the remote repo + * text=auto + *.bat eol=crlf + *.cmd eol=crlf + +Even when the superproject requests ``eol=lf``, ``.bat`` and ``.cmd`` files +keep their CRLF endings. Files that have no explicit ``eol=`` attribute in the +remote fall back to the superproject preference. + +Binary files are never converted regardless of any attribute. + + +Effect on the integrity hash +----------------------------- + +The hash in ``.dfetch_data.yaml`` is computed from the files **after** +line-ending conversion. Running ``dfetch update`` twice on the same remote +version always produces the same hash because the on-disk content is identical. + +Changing the superproject's line-ending preference and running ``dfetch update`` +again will produce a different hash. The vendored files have genuinely changed, +so this is expected. Running ``dfetch update`` after a preference change +re-aligns the hash with the new on-disk content. + + +Examples +-------- + +.. scenario-include:: ../features/superproject-line-ending-attrs.feature diff --git a/doc/howto/updating-projects.rst b/doc/howto/updating-projects.rst index a89b7ac4..ce00ce7e 100644 --- a/doc/howto/updating-projects.rst +++ b/doc/howto/updating-projects.rst @@ -121,3 +121,55 @@ submodule are recorded in ``.dfetch_data.yaml`` and are visible in :ref:`Report`. .. scenario-include:: ../features/fetch-git-repo-with-submodule.feature + +.. _updating-line-endings: + +Line endings +------------ + +If your superproject is a git repository and its ``.gitattributes`` file +contains a global or per-directory ``eol`` rule, *DFetch* lets the version +control system that performs the fetch produce matching line endings, so the +vendored files match your project's enforced style on every platform: + +.. code-block:: text + + # force LF everywhere (common on cross-platform projects) + * text=auto eol=lf + + # force CRLF everywhere (less common, but valid) + * text=auto eol=crlf + +*DFetch* uses ``git check-attr`` to resolve the effective setting for each +dependency's destination directory, so per-directory rules are honoured as +well as global ones. The conversion itself is done natively by the VCS that +fetches the project, so even large dependencies convert at full speed: + +- Git dependencies are checked out with the requested ``eol`` configured and + renormalised by git itself; git's own text detection decides which files + are text, so binary files are never converted. +- SVN dependencies are exported with ``svn export --native-eol``, which + applies the requested ending to every file carrying the + ``svn:eol-style=native`` property; files without that property keep their + bytes exactly as stored. + +If your superproject is an SVN repository, declare the preference with SVN's +own mechanism instead — the ``svn:auto-props`` property: + +.. code-block:: console + + svn propset svn:auto-props '* = svn:eol-style=LF' . + +*DFetch* reads the property (including values inherited from parent +directories) for each dependency's destination and applies it the same way. + +Archive dependencies are extracted byte-for-byte. If no preference is set, +the platform and VCS defaults apply unchanged. + +The scenarios below cover both Git and SVN subprojects, and verify all four +combinations of remote content (LF or CRLF) against each superproject +``eol`` setting — expressed concisely via Gherkin ``Examples`` tables. They +also verify that a superproject using another VCS (such as SVN) leaves the +fetched line endings untouched: + +.. scenario-include:: ../features/superproject-line-ending-attrs.feature diff --git a/doc/index.rst b/doc/index.rst index c9c5b077..1e1a1a07 100644 --- a/doc/index.rst +++ b/doc/index.rst @@ -102,6 +102,7 @@ upstream. See :ref:`vendoring` for background on the problem this solves. explanation/alternatives explanation/architecture explanation/security + explanation/line-endings .. only:: latex diff --git a/doc/static/uml/update.puml b/doc/static/uml/update.puml index e8f2a8a1..2adf93c6 100644 --- a/doc/static/uml/update.puml +++ b/doc/static/uml/update.puml @@ -62,6 +62,10 @@ partition "Perform Update" { revision; endif + if (EOL setting given?) then (yes) + :Configure EOL from superproject; + endif + :Fetch target: revision/tag/branch; diff --git a/features/steps/generic_steps.py b/features/steps/generic_steps.py index 886fc7ce..643ee92c 100644 --- a/features/steps/generic_steps.py +++ b/features/steps/generic_steps.py @@ -425,3 +425,17 @@ def step_impl(_, path): def step_impl(_, path): """Assert that a directory has not been created.""" assert not os.path.exists(path), f"Directory {path} exists but should not!" + + +@then("'{path}' has {ending} line endings") +def step_impl(_, path, ending): + with open(path, "rb") as f: + content = f.read() + if ending.upper() == "CRLF": + assert b"\r\n" in content, f"No CRLF found in {path}" + assert b"\n" not in content.replace(b"\r\n", b""), f"Bare LF found in {path}" + elif ending.upper() == "LF": + assert b"\n" in content, f"No LF found in {path}" + assert b"\r" not in content, f"CR found in {path}" + else: + raise ValueError(f"Unknown line ending type: {ending!r}") diff --git a/features/steps/git_steps.py b/features/steps/git_steps.py index be2b2eac..12c293c0 100644 --- a/features/steps/git_steps.py +++ b/features/steps/git_steps.py @@ -159,6 +159,39 @@ def step_impl(context, path, name): commit_all("A change") +@given('a git-repository "{name}" with {ending} content') +def step_impl(context, name, ending): + terminator = {"LF": "\n", "CRLF": "\r\n"}[ending] + remote_path = os.path.join(context.remotes_dir, name) + pathlib.Path(remote_path).mkdir(parents=True, exist_ok=True) + with in_directory(remote_path): + create_repo() + subprocess.check_call(["git", "config", "core.autocrlf", "false"]) + pathlib.Path("README.md").write_bytes( + f"Generated file for {name}{terminator}".encode("utf-8") + ) + commit_all("Initial commit") + tag("v1") + + +@given( + 'a git-repository "{name}" with a {ending} "{filename}" and "{gitattr}" gitattributes' +) +def step_impl(context, name, ending, filename, gitattr): + terminator = {"LF": "\n", "CRLF": "\r\n"}[ending] + remote_path = os.path.join(context.remotes_dir, name) + pathlib.Path(remote_path).mkdir(parents=True, exist_ok=True) + with in_directory(remote_path): + create_repo() + subprocess.check_call(["git", "config", "core.autocrlf", "false"]) + pathlib.Path(".gitattributes").write_text(gitattr + "\n", encoding="utf-8") + pathlib.Path(filename).write_bytes( + f"Generated file for {name}{terminator}".encode("utf-8") + ) + commit_all("Initial commit") + tag("v1") + + @given("all files in {directory} are committed") @when("all files in {directory} are committed") def step_impl(_, directory): diff --git a/features/steps/svn_steps.py b/features/steps/svn_steps.py index f8bd5f64..53b98eb7 100644 --- a/features/steps/svn_steps.py +++ b/features/steps/svn_steps.py @@ -115,6 +115,29 @@ def step_impl(context, name): add_and_commit("Added files") +@given("svn:auto-props in {directory} requests {eol} line endings") +def step_impl(_, directory, eol): + with in_directory(directory): + subprocess.check_call( + [ + "svn", + "propset", + "svn:auto-props", + f"* = svn:eol-style={eol.upper()}", + ".", + ] + ) + + +@given('a local svn repo "{directory}" with the manifest') +def step_impl(context, directory): + repo_path = create_svn_server_and_repo(context, directory) + + with in_directory(repo_path): + generate_manifest(context) + add_and_commit("Initial commit") + + @given("a fetched and committed MySvnProject with the manifest") def step_impl(context): repo_path = create_svn_server_and_repo(context, "MySvnProject") @@ -165,3 +188,22 @@ def step_impl(context, name, ext_path, source_name): with in_directory(name): with in_directory("trunk"): add_externals([{"url": source_url, "path": ext_path, "revision": ""}]) + + +@given('a svn-server "{name}" with {ending} content') +def step_impl(context, name, ending): + terminator = {"LF": "\n", "CRLF": "\r\n"}[ending] + repo_path = create_svn_server_and_repo(context, name) + with in_directory(repo_path): + create_stdlayout() + with in_directory("trunk"): + pathlib.Path("README.md").write_bytes( + f"Generated file for {name}{terminator}".encode("utf-8") + ) + subprocess.check_call(["svn", "update", "."]) + subprocess.check_call(["svn", "add", "--force", "."]) + subprocess.check_call( + ["svn", "propset", "svn:eol-style", "native", "trunk/README.md"] + ) + subprocess.check_call(["svn", "ci", "-m", '"Initial commit"']) + create_tag("v1") diff --git a/features/superproject-line-ending-attrs.feature b/features/superproject-line-ending-attrs.feature new file mode 100644 index 00000000..e37c9a20 --- /dev/null +++ b/features/superproject-line-ending-attrs.feature @@ -0,0 +1,157 @@ +@update +Feature: Superproject .gitattributes line endings respected on fetch + + When a superproject enforces specific line endings — through + ``.gitattributes`` (for example ``* text=auto eol=lf``) in a git + superproject, or ``svn:auto-props`` (for example ``* = svn:eol-style=LF``) + in an SVN superproject — *DFetch* respects that setting when fetching + dependencies, so the vendored files always match the project's enforced + style. + + Scenario Outline: Git superproject forces on git subproject with remote content + Given a git-repository "SomeProject.git" with content + And a local git repo "MyProject" with the manifest + """ + manifest: + version: '0.0' + + projects: + - name: SomeProject + url: some-remote-server/SomeProject.git + tag: v1 + """ + And ".gitattributes" in MyProject is created and committed with + """ + * text=auto eol= + """ + When I run "dfetch update" in MyProject + Then 'MyProject/SomeProject/README.md' has line endings + + Examples: + | remote_eol | eol | + | LF | crlf | + | CRLF | lf | + | CRLF | crlf | + | LF | lf | + + Scenario Outline: SVN superproject forces on git subproject with remote content + An SVN superproject declares its line-ending preference with svn's own + mechanism: the ``svn:auto-props`` property (e.g. ``* = svn:eol-style=LF``). + + Given a git-repository "SomeProject.git" with content + And a local svn repo "MyProject" with the manifest + """ + manifest: + version: '0.0' + + projects: + - name: SomeProject + url: some-remote-server/SomeProject.git + tag: v1 + """ + And svn:auto-props in MyProject requests line endings + When I run "dfetch update" in MyProject + Then 'MyProject/SomeProject/README.md' has line endings + + Examples: + | remote_eol | eol | + | LF | crlf | + | CRLF | lf | + | CRLF | crlf | + | LF | lf | + + Scenario Outline: SVN superproject forces on SVN subproject with remote content + Given a svn-server "SomeSvnProject" with content + And a local svn repo "MyProject" with the manifest + """ + manifest: + version: '0.0' + + projects: + - name: SomeSvnProject + url: some-remote-server/SomeSvnProject + tag: v1 + """ + And svn:auto-props in MyProject requests line endings + When I run "dfetch update" in MyProject + Then 'MyProject/SomeSvnProject/README.md' has line endings + + Examples: + | remote_eol | eol | + | LF | crlf | + | CRLF | lf | + | CRLF | crlf | + | LF | lf | + + Scenario: SVN superproject without a preference leaves line endings unchanged + Without a line-ending preference (``.gitattributes`` in a git + superproject, ``svn:auto-props`` in an SVN superproject), fetched files + keep the line endings the remote provides. A stray ``.gitattributes`` + file next to the manifest of an SVN superproject is not used. + + Given a git-repository "SomeProject.git" with CRLF content + And a local svn repo "MyProject" with the manifest + """ + manifest: + version: '0.0' + + projects: + - name: SomeProject + url: some-remote-server/SomeProject.git + tag: v1 + """ + And ".gitattributes" in MyProject is changed with + """ + * text=auto eol=lf + """ + When I run "dfetch update" in MyProject + Then 'MyProject/SomeProject/README.md' has CRLF line endings + + Scenario Outline: Git superproject forces on SVN subproject with remote content + Given a svn-server "SomeSvnProject" with content + And a local git repo "MyProject" with the manifest + """ + manifest: + version: '0.0' + + projects: + - name: SomeSvnProject + url: some-remote-server/SomeSvnProject + tag: v1 + """ + And ".gitattributes" in MyProject is created and committed with + """ + * text=auto eol= + """ + When I run "dfetch update" in MyProject + Then 'MyProject/SomeSvnProject/README.md' has line endings + + Examples: + | remote_eol | eol | + | LF | crlf | + | CRLF | lf | + | CRLF | crlf | + | LF | lf | + + Scenario: Remote gitattributes eol=crlf for a file type overrides the superproject lf preference + A remote repo that marks ``*.bat`` files as ``eol=crlf`` in its own + ``.gitattributes`` keeps those line endings even when the superproject + globally requests ``lf``, so Windows batch files are never corrupted. + + Given a git-repository "SomeProject.git" with a CRLF "script.bat" and "*.bat eol=crlf" gitattributes + And a local git repo "MyProject" with the manifest + """ + manifest: + version: '0.0' + + projects: + - name: SomeProject + url: some-remote-server/SomeProject.git + tag: v1 + """ + And ".gitattributes" in MyProject is created and committed with + """ + * text=auto eol=lf + """ + When I run "dfetch update" in MyProject + Then 'MyProject/SomeProject/script.bat' has CRLF line endings diff --git a/tests/test_git_vcs.py b/tests/test_git_vcs.py index 33b08d14..4b6c1ca1 100644 --- a/tests/test_git_vcs.py +++ b/tests/test_git_vcs.py @@ -433,3 +433,65 @@ def test_build_git_ssh_command(name, env_ssh, git_config_ssh, expected): mock_logger.warning.assert_called() else: mock_logger.warning.assert_not_called() + + +# --------------------------------------------------------------------------- +# GitLocalRepo.eol_attributes — check-attr based lookup +# --------------------------------------------------------------------------- + + +def _check_attr_output(records): + """Build `git check-attr -z` output bytes from (path, attr, value) tuples.""" + return b"".join( + b"%s\0%s\0%s\0" % (p.encode(), a.encode(), v.encode()) for p, a, v in records + ) + + +def test_eol_attributes_returns_eol_per_path(tmp_path): + """Paths with an effective lf/crlf eol attribute are returned.""" + output = _check_attr_output( + [ + ("a.txt", "text", "auto"), + ("a.txt", "eol", "lf"), + ("b.bat", "text", "unspecified"), + ("b.bat", "eol", "crlf"), + ] + ) + with patch("dfetch.vcs.git.run_on_cmdline") as mock_run: + mock_run.return_value.stdout = output + result = GitLocalRepo(tmp_path).eol_attributes(["a.txt", "b.bat"]) + + assert result == {"a.txt": "lf", "b.bat": "crlf"} + + +def test_eol_attributes_skips_non_text_and_unspecified(tmp_path): + """Paths marked -text or without an eol attribute are omitted.""" + output = _check_attr_output( + [ + ("img.png", "text", "unset"), + ("img.png", "eol", "lf"), + ("plain.txt", "text", "auto"), + ("plain.txt", "eol", "unspecified"), + ] + ) + with patch("dfetch.vcs.git.run_on_cmdline") as mock_run: + mock_run.return_value.stdout = output + result = GitLocalRepo(tmp_path).eol_attributes(["img.png", "plain.txt"]) + + assert result == {} + + +def test_eol_attributes_without_git_returns_empty(tmp_path): + """A missing git binary must not break the update.""" + with patch( + "dfetch.vcs.git.run_on_cmdline", + side_effect=RuntimeError("git not available on system, please install"), + ): + assert GitLocalRepo(tmp_path).eol_attributes(["a.txt"]) == {} + + +def test_eol_attributes_no_paths_skips_git_call(tmp_path): + """An empty path list returns early without invoking git.""" + with patch("dfetch.vcs.git.run_on_cmdline") as mock_run: + assert GitLocalRepo(tmp_path).eol_attributes([]) == {} + mock_run.assert_not_called() diff --git a/tests/test_subproject.py b/tests/test_subproject.py index fcf645d6..e4b391c9 100644 --- a/tests/test_subproject.py +++ b/tests/test_subproject.py @@ -17,7 +17,9 @@ class ConcreteSubProject(SubProject): _wanted_version: Version - def _fetch_impl(self, version: Version) -> tuple[Version, list[Dependency]]: + def _fetch_impl( + self, version: Version, eol_hint: str | None = None + ) -> tuple[Version, list[Dependency]]: return Version(), [] def _latest_revision_on_branch(self, branch): @@ -171,6 +173,46 @@ def test_update_uses_ignored_files_callback_for_stored_hash(): assert "old_file.txt" not in hash_call_skiplist +def test_update_eol_hint_propagated_for_file_destination(): + """EOL hint from an exact-path gitattributes rule reaches _fetch_impl. + + When the destination is a single file, _destination_eol_hint must check + the exact path (not only the probe child path) so that rules like + ``vendor/lib.c eol=lf`` are picked up and forwarded to the VCS backend. + """ + eol_callback = MagicMock(return_value={"vendor/lib.c": "lf"}) + + with patch("dfetch.project.subproject.os.path.exists") as mock_exists: + with patch("dfetch.project.subproject.Metadata.from_file") as mock_meta_file: + with patch("dfetch.project.subproject.hash_directory"): + with patch("dfetch.project.subproject.safe_rm"): + with patch("dfetch.project.subproject.Metadata.dump"): + with patch.object( + ConcreteSubProject, + "_fetch_impl", + return_value=(Version(), []), + ) as mock_fetch_impl: + mock_exists.return_value = True + mock_meta_file.return_value.version = Version( + revision="abc" + ) + + subproject = ConcreteSubProject( + ProjectEntry( + {"name": "vendor/lib.c", "dst": "vendor/lib.c"} + ) + ) + subproject._wanted_version = Version(revision="new") + + subproject.update( + force=True, eol_preferences_callback=eol_callback + ) + + mock_fetch_impl.assert_called_once() + _, eol_hint = mock_fetch_impl.call_args[0] + assert eol_hint == "lf" + + @pytest.mark.parametrize( "name, project_version, on_disk_version, expect_return, expect_project_version", [ diff --git a/tests/test_svn_vcs.py b/tests/test_svn_vcs.py index 39960176..e2f90569 100644 --- a/tests/test_svn_vcs.py +++ b/tests/test_svn_vcs.py @@ -4,7 +4,7 @@ import pytest -from dfetch.vcs.svn import SvnRepo +from dfetch.vcs.svn import SvnRepo, _match_auto_props_eol_style def test_export_with_revision_passes_correct_args(): @@ -39,6 +39,82 @@ def test_export_without_revision_omits_revision_args(): ] +def test_export_with_native_eol_passes_correct_args(): + """export() with a native eol must pass --native-eol to svn.""" + with patch("dfetch.vcs.svn.run_on_cmdline") as mock_run: + SvnRepo.export("svn://example.com/repo", dst="/tmp/out", native_eol="LF") + mock_run.assert_called_once() + assert mock_run.call_args[0][1] == [ + "svn", + "--non-interactive", + "export", + "--force", + "--native-eol", + "LF", + "svn://example.com/repo", + "/tmp/out", + ] + + +def test_export_rejects_invalid_native_eol(): + """Only LF and CRLF are valid native eol values; never invoke svn otherwise.""" + with patch("dfetch.vcs.svn.run_on_cmdline") as mock_run: + with pytest.raises(ValueError): + SvnRepo.export("svn://example.com/repo", native_eol="--something") + mock_run.assert_not_called() + + +@pytest.mark.parametrize( + "name, props, filename, expected", + [ + ("global rule", "* = svn:eol-style=LF", "any.txt", "LF"), + ("propget path prefix", ". - * = svn:eol-style=CRLF", "any.txt", "CRLF"), + ("extension match", "*.bat = svn:eol-style=CRLF", "run.bat", "CRLF"), + ("extension mismatch", "*.bat = svn:eol-style=CRLF", "run.sh", None), + ( + "last match wins", + "* = svn:eol-style=LF\n*.bat = svn:eol-style=CRLF", + "run.bat", + "CRLF", + ), + ( + "multiple props per pattern", + "* = svn:keywords=Id;svn:eol-style=LF", + "a.c", + "LF", + ), + ("no eol-style prop", "* = svn:keywords=Id", "a.c", None), + ("empty props", "", "a.c", None), + ], +) +def test_match_auto_props_eol_style(name, props, filename, expected): + """Auto-props patterns must resolve to the correct svn:eol-style.""" + assert _match_auto_props_eol_style(props, filename) == expected, name + + +def test_eol_style_for_maps_to_lowercase(tmp_path): + """eol_style_for must translate svn:eol-style to 'lf'/'crlf'.""" + with patch("dfetch.vcs.svn.run_on_cmdline") as mock_run: + mock_run.return_value.stdout = b". - * = svn:eol-style=LF" + assert SvnRepo(tmp_path).eol_style_for("ext/mylib/_") == "lf" + + +def test_eol_style_for_native_gives_no_preference(tmp_path): + """svn:eol-style=native means platform default, not a dfetch preference.""" + with patch("dfetch.vcs.svn.run_on_cmdline") as mock_run: + mock_run.return_value.stdout = b". - * = svn:eol-style=native" + assert SvnRepo(tmp_path).eol_style_for("ext/mylib/_") is None + + +def test_eol_style_for_without_svn_returns_none(tmp_path): + """A missing svn binary must not break the update.""" + with patch( + "dfetch.vcs.svn.run_on_cmdline", + side_effect=RuntimeError("svn not available on system, please install"), + ): + assert SvnRepo(tmp_path).eol_style_for("ext/mylib/_") is None + + def test_export_rejects_non_digit_revision(): """Space-containing or flag-like revision strings must raise ValueError. diff --git a/tests/test_update.py b/tests/test_update.py index f6078185..f9967ca7 100644 --- a/tests/test_update.py +++ b/tests/test_update.py @@ -77,6 +77,7 @@ def test_forced_update(): mocked_create.return_value.update.assert_called_once_with( force=True, ignored_files_callback=ANY, + eol_preferences_callback=ANY, ) cb = mocked_create.return_value.update.call_args.kwargs[