diff --git a/.gitignore b/.gitignore index f6e3170..fbe0a42 100644 --- a/.gitignore +++ b/.gitignore @@ -1,7 +1,6 @@ .DS_Store .git.broken -.claude src/sprite_mask/__pycache__/ __pycache__/ *.py[cod] @@ -17,6 +16,8 @@ dev/ build/ dist/ *.egg-info/ +.cl* +.cod* docs/_build/ validation/ @@ -37,3 +38,5 @@ tests/test_data/**/ref_cache/ # conflicts them on Windows. tests/test_data/1000g_10sample_highcov_subset/1000g_10samples_highcov.vcf.gz tests/test_data/1000g_10sample_highcov_subset/1000g_10samples_highcov.vcf.gz.tbi + +uv.* diff --git a/docs/arguments.rst b/docs/arguments.rst index e6ec44b..41f5c1c 100644 --- a/docs/arguments.rst +++ b/docs/arguments.rst @@ -93,9 +93,10 @@ BAM/CRAM mode options **--reference PATH** FASTA reference for CRAM inputs. -**--strict-depth** - Use precise per-base depth counting. Slower, but avoids the - approximations made by ``mosdepth --fast-mode``. +**--fast-mode** + Opt into ``mosdepth --fast-mode``. Strict per-base depth counting is the + default, so the former ``--strict-depth`` flag is no longer needed. Fast + mode is quicker, but allows mosdepth's fast-mode approximations. Examples ======== diff --git a/src/sprite_mask/cli.py b/src/sprite_mask/cli.py index c82a68d..b8ef816 100644 --- a/src/sprite_mask/cli.py +++ b/src/sprite_mask/cli.py @@ -71,7 +71,7 @@ def _cmd_from_alignments(args: argparse.Namespace) -> int: max_dp=args.max_dp, exclude_flag=args.exclude_flag, reference=Path(args.reference) if args.reference else None, - strict_depth=args.strict_depth, + fast_mode=args.fast_mode, keep_work=args.keep_work, force=args.force, dry_run=args.dry_run, @@ -167,7 +167,12 @@ def _build_from_alignments_parser(subparsers: argparse._SubParsersAction) -> Non p.add_argument("--exclude-flag", type=int, help="SAM FLAG bits to exclude reads") p.add_argument("--reference", help="FASTA reference for CRAM inputs") p.add_argument( - "--strict-depth", action="store_true", help="precise per-base depth counting (slower)" + "--fast-mode", + action="store_true", + help=( + "opt into mosdepth --fast-mode; strict per-base depth counting " + "is the default" + ), ) p.set_defaults(subcommand=_cmd_from_alignments) diff --git a/src/sprite_mask/config.py b/src/sprite_mask/config.py index e484b98..2d0fd9f 100644 --- a/src/sprite_mask/config.py +++ b/src/sprite_mask/config.py @@ -17,7 +17,7 @@ class AlignmentRunConfig: max_dp: int | None = None exclude_flag: int | None = None reference: Path | None = None - strict_depth: bool = False + fast_mode: bool = False keep_work: bool = False force: bool = False dry_run: bool = False diff --git a/src/sprite_mask/mosdepth.py b/src/sprite_mask/mosdepth.py index f49056a..725f817 100644 --- a/src/sprite_mask/mosdepth.py +++ b/src/sprite_mask/mosdepth.py @@ -68,7 +68,7 @@ def build_mosdepth_command(sample: Sample, config: AlignmentRunConfig, prefix: P "--quantize", quantize, ] - if not config.strict_depth: + if config.fast_mode: command.append("--fast-mode") if config.min_mapq is not None: command.extend(["--mapq", str(config.min_mapq)]) diff --git a/tests/test_cli.py b/tests/test_cli.py index cd73c51..4a0dbd2 100644 --- a/tests/test_cli.py +++ b/tests/test_cli.py @@ -1,5 +1,6 @@ from __future__ import annotations +import argparse import gzip import json import re @@ -22,6 +23,20 @@ def test_root_help_starts_with_banner_and_version() -> None: assert help_text.startswith(f"{HELP_BANNER}\nsprite {__version__}\n\nusage:") +def test_from_alignments_help_describes_fast_mode_replacement() -> None: + parser = build_parser() + subparsers = next( + action for action in parser._actions if isinstance(action, argparse._SubParsersAction) + ) + + help_text = subparsers.choices["from-alignments"].format_help() + normalized_help = " ".join(help_text.split()) + + assert "--fast-mode" in help_text + assert "strict per-base depth counting is the default" in normalized_help + assert "--strict-depth" not in help_text + + def assert_sprite_progress(log_output: str, message: str) -> None: matching_lines = [line for line in log_output.splitlines() if message in line] assert matching_lines @@ -143,7 +158,7 @@ def fake_run_workflow(config: object) -> WorkflowOutputs: "1796", "--reference", str(tmp_path / "ref.fa"), - "--strict-depth", + "--fast-mode", "--keep-work", "--force", ] @@ -160,7 +175,7 @@ def fake_run_workflow(config: object) -> WorkflowOutputs: assert seen_config.min_mapq == 20 assert seen_config.exclude_flag == 1796 assert seen_config.reference == tmp_path / "ref.fa" - assert seen_config.strict_depth is True + assert seen_config.fast_mode is True assert seen_config.keep_work is True assert seen_config.force is True diff --git a/tests/test_workflow_commands.py b/tests/test_workflow_commands.py index 223cbaf..6a40108 100644 --- a/tests/test_workflow_commands.py +++ b/tests/test_workflow_commands.py @@ -11,7 +11,7 @@ from sprite_mask.workflow import _required_tools, workflow_output_paths -def test_build_mosdepth_command_defaults_to_fast_mode(tmp_path: Path) -> None: +def test_build_mosdepth_command_default_omits_fast_mode(tmp_path: Path) -> None: sample = Sample("s1", "popA", tmp_path / "s1.bam") config = AlignmentRunConfig( samples_path=tmp_path / "samples.tsv", @@ -32,7 +32,6 @@ def test_build_mosdepth_command_defaults_to_fast_mode(tmp_path: Path) -> None: "--no-per-base", "--quantize", "0:30:", - "--fast-mode", "--mapq", "20", "--flag", @@ -44,18 +43,18 @@ def test_build_mosdepth_command_defaults_to_fast_mode(tmp_path: Path) -> None: ] -def test_build_mosdepth_command_strict_depth_omits_fast_mode(tmp_path: Path) -> None: +def test_build_mosdepth_command_fast_mode_adds_fast_mode(tmp_path: Path) -> None: sample = Sample("s1", "popA", tmp_path / "s1.bam") config = AlignmentRunConfig( samples_path=tmp_path / "samples.tsv", min_dp=30, out_dir=tmp_path / "out", - strict_depth=True, + fast_mode=True, ) command = build_mosdepth_command(sample, config, tmp_path / "work" / "s1.d30") - assert "--fast-mode" not in command + assert "--fast-mode" in command def test_build_mosdepth_command_max_dp_uses_three_bin_quantize(tmp_path: Path) -> None: diff --git a/tests/test_workflow_e2e.py b/tests/test_workflow_e2e.py index 687aa46..69f1811 100644 --- a/tests/test_workflow_e2e.py +++ b/tests/test_workflow_e2e.py @@ -72,6 +72,87 @@ def target_sites(self) -> int: ("chr12", 24_999_999, 25_049_999), ("chr20", 9_999_999, 10_049_999), ), + expected_sample_counts={ + "HG00096": 27_521, + "HG00097": 20_585, + "HG00099": 58_048, + "HG00100": 14_469, + "HG00101": 28_605, + "HG00102": 18_908, + "HG00103": 22_460, + "HG00105": 8_039, + "HG00106": 47_679, + "HG00107": 22_583, + "NA18486": 21_089, + "NA18488": 10_702, + "NA18489": 190_337, + "NA18498": 8_661, + "NA18499": 24_514, + "NA18501": 12_409, + "NA18502": 9_021, + "NA18504": 12_821, + "NA18505": 50_961, + "NA18507": 24_492, + }, + expected_sample_summary={ + 1: 36_250, + 2: 49_967, + 3: 42_286, + 4: 28_271, + 5: 17_221, + 6: 9_096, + 7: 4_991, + 8: 3_030, + 9: 1_822, + 10: 1_134, + 11: 693, + 12: 439, + 13: 291, + 14: 171, + 15: 96, + 16: 76, + 17: 178, + 18: 129, + 19: 130, + 20: 32, + }, + expected_population_summary={ + "GBR": { + 0: 57_972, + 1: 64_395, + 2: 40_867, + 3: 19_713, + 4: 7_959, + 5: 2_947, + 6: 1_177, + 7: 617, + 8: 299, + 9: 285, + 10: 72, + }, + "YRI": { + 0: 2_579, + 1: 90_131, + 2: 62_499, + 3: 25_836, + 4: 9_429, + 5: 3_091, + 6: 1_252, + 7: 789, + 8: 232, + 9: 342, + 10: 123, + }, + }, + expected_population_counts={"GBR": 10, "YRI": 10}, + min_count_rows=30_000, +) + + +FAST_MODE_INTEGRATION_FIXTURE = FixtureCase( + name="1000g_20sample_highcov_4chrom_subset", + fixture=REPO_ROOT / "tests" / "test_data" / "1000g_20sample_highcov_4chrom_subset", + target_intervals=INTEGRATION_FIXTURE.target_intervals, expected_sample_counts={ "HG00096": 29_770, "HG00097": 22_129, @@ -144,8 +225,8 @@ def target_sites(self) -> int: 10: 177, }, }, - expected_population_counts={"GBR": 10, "YRI": 10}, - min_count_rows=30_000, + expected_population_counts=INTEGRATION_FIXTURE.expected_population_counts, + min_count_rows=INTEGRATION_FIXTURE.min_count_rows, ) @@ -154,6 +235,28 @@ def test_cli_workflow_multichromosome_fixture_outputs_expected_counts(tmp_path: require_fixture_and_tools(fixture_case) out_dir, work_dir = run_sprite_mask(tmp_path, fixture_case, keep_work=True) + assert_population_count_output_matches_fixture(out_dir, fixture_case) + assert_expected_work_files_exist(work_dir, fixture_case) + + +def test_cli_workflow_fast_mode_preserves_previous_fixture_counts(tmp_path: Path) -> None: + fixture_case = FAST_MODE_INTEGRATION_FIXTURE + require_fixture_and_tools(fixture_case) + out_dir, work_dir = run_sprite_mask( + tmp_path, + fixture_case, + keep_work=True, + fast_mode=True, + ) + + assert_population_count_output_matches_fixture(out_dir, fixture_case) + assert_expected_work_files_exist(work_dir, fixture_case) + + +def assert_population_count_output_matches_fixture( + out_dir: Path, + fixture_case: FixtureCase, +) -> None: population_count_bed_gz = out_dir / "sprite.bed.gz" population_count_bed_index = Path(f"{population_count_bed_gz}.tbi") @@ -189,8 +292,6 @@ def test_cli_workflow_multichromosome_fixture_outputs_expected_counts(tmp_path: expected_target_sites=fixture_case.target_sites, ) - assert_expected_work_files_exist(work_dir, fixture_case) - def test_cli_workflow_smoke_fixture_writes_only_indexed_population_bed(tmp_path: Path) -> None: fixture_case = SMOKE_FIXTURE @@ -231,6 +332,7 @@ def run_sprite_mask( keep_work: bool, threads: int = 2, jobs: int = 2, + fast_mode: bool = False, ) -> tuple[Path, Path]: out_dir = tmp_path / "results" work_dir = tmp_path / "work" @@ -254,6 +356,8 @@ def run_sprite_mask( "--jobs", str(jobs), ] + if fast_mode: + command.append("--fast-mode") if keep_work: command.append("--keep-work")