Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion .github/workflows/autoformat.yml
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@ jobs:
format:
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v6
- uses: actions/checkout@v7
with:
token: ${{ secrets.GITHUB_TOKEN }}

Expand Down
2 changes: 1 addition & 1 deletion .github/workflows/publish.yml
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,7 @@ jobs:
runs-on: ubuntu-latest

steps:
- uses: actions/checkout@v6
- uses: actions/checkout@v7

- uses: actions/setup-python@v6
with:
Expand Down
2 changes: 1 addition & 1 deletion .github/workflows/python-app.yml
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,7 @@ jobs:
python-version: ["3.10", "3.11", "3.12", "3.13", "3.14"]

steps:
- uses: actions/checkout@v6
- uses: actions/checkout@v7

- name: Install uv
uses: astral-sh/setup-uv@v8.2.0
Expand Down
59 changes: 43 additions & 16 deletions pySEQTarget/analysis/_risk_estimates.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,12 @@
from scipy import stats


def _compute_rd_rr(comp, has_bootstrap, z=None, group_cols=None):
def _ci_label(bootstrap_CI):
"""Format the CI level as a column-name fragment, e.g. 0.95 -> '95%'."""
return f"{bootstrap_CI * 100:g}%"


def _compute_rd_rr(comp, has_bootstrap, z=None, group_cols=None, ci_label="95%"):
"""
Compute Risk Difference and Risk Ratio from a comparison dataframe.
Fallback used when paired bootstrap data is unavailable (e.g. subgroups).
Expand All @@ -14,45 +19,53 @@ def _compute_rd_rr(comp, has_bootstrap, z=None, group_cols=None):

if has_bootstrap:
rd_se = (pl.col("se_x").pow(2) + pl.col("se_y").pow(2)).sqrt()
rd_lci_lab = f"RD {ci_label} LCI"
rd_uci_lab = f"RD {ci_label} UCI"
rd_comp = comp.with_columns(
[
(pl.col("risk_x") - pl.col("risk_y")).alias("Risk Difference"),
(pl.col("risk_x") - pl.col("risk_y") - z * rd_se).alias("RD 95% LCI"),
(pl.col("risk_x") - pl.col("risk_y") + z * rd_se).alias("RD 95% UCI"),
(pl.col("risk_x") - pl.col("risk_y") - z * rd_se).alias(rd_lci_lab),
(pl.col("risk_x") - pl.col("risk_y") + z * rd_se).alias(rd_uci_lab),
rd_se.alias("RD SE"),
]
)
rd_comp = rd_comp.drop(["risk_x", "risk_y", "se_x", "se_y"])
col_order = group_cols + [
"A_x",
"A_y",
"Risk Difference",
"RD 95% LCI",
"RD 95% UCI",
rd_lci_lab,
rd_uci_lab,
"RD SE",
]
rd_comp = rd_comp.select([c for c in col_order if c in rd_comp.columns])

rr_log_se = (
(pl.col("se_x") / pl.col("risk_x")).pow(2)
+ (pl.col("se_y") / pl.col("risk_y")).pow(2)
).sqrt()
rr_lci_lab = f"RR {ci_label} LCI"
rr_uci_lab = f"RR {ci_label} UCI"
rr_comp = comp.with_columns(
[
(pl.col("risk_x") / pl.col("risk_y")).alias("Risk Ratio"),
((pl.col("risk_x") / pl.col("risk_y")) * (-z * rr_log_se).exp()).alias(
"RR 95% LCI"
rr_lci_lab
),
((pl.col("risk_x") / pl.col("risk_y")) * (z * rr_log_se).exp()).alias(
"RR 95% UCI"
rr_uci_lab
),
rr_log_se.alias("log(RR) SE"),
]
)
rr_comp = rr_comp.drop(["risk_x", "risk_y", "se_x", "se_y"])
col_order = group_cols + [
"A_x",
"A_y",
"Risk Ratio",
"RR 95% LCI",
"RR 95% UCI",
rr_lci_lab,
rr_uci_lab,
"log(RR) SE",
]
rr_comp = rr_comp.select([c for c in col_order if c in rr_comp.columns])
else:
Expand Down Expand Up @@ -129,9 +142,11 @@ def _risk_estimates(self):
if has_bootstrap:
alpha = 1 - self.bootstrap_CI
z = stats.norm.ppf(1 - alpha / 2)
ci_label = _ci_label(self.bootstrap_CI)
else:
z = None
alpha = None
ci_label = _ci_label(self.bootstrap_CI)

rd_comparisons = []
rr_comparisons = []
Expand Down Expand Up @@ -180,6 +195,18 @@ def _risk_estimates(self):

n_valid_rr = len(valid_rr)

# Bootstrap SEs, retained regardless of CI method: the risk
# difference SE is on the natural scale, the risk ratio SE
# on the log scale (the scale ratio measures are pooled on
# for inverse-variance meta-analysis: combine the log Risk
# Ratio with log(RR) SE, then exponentiate).
rd_se = float(paired["RD"].std())
log_rr_se = (
float(valid_rr["RR"].log().std())
if n_valid_rr >= 2
else float("nan")
)

if self.bootstrap_CI_method == "percentile":
rd_lci = float(paired["RD"].quantile(alpha / 2))
rd_uci = float(paired["RD"].quantile(1 - alpha / 2))
Expand All @@ -190,11 +217,9 @@ def _risk_estimates(self):
rr_lci = float("nan")
rr_uci = float("nan")
else:
rd_se = float(paired["RD"].std())
rd_lci = rd_point - z * rd_se
rd_uci = rd_point + z * rd_se
if n_valid_rr >= 2 and rr_point > 0:
log_rr_se = float(valid_rr["RR"].log().std())
rr_lci = math.exp(math.log(rr_point) - z * log_rr_se)
rr_uci = math.exp(math.log(rr_point) + z * log_rr_se)
else:
Expand All @@ -207,8 +232,9 @@ def _risk_estimates(self):
"A_x": [tx_x],
"A_y": [tx_y],
"Risk Difference": [rd_point],
"RD 95% LCI": [rd_lci],
"RD 95% UCI": [rd_uci],
f"RD {ci_label} LCI": [rd_lci],
f"RD {ci_label} UCI": [rd_uci],
"RD SE": [rd_se],
}
)
rr_comp = pl.DataFrame(
Expand All @@ -217,8 +243,9 @@ def _risk_estimates(self):
"A_x": [tx_x],
"A_y": [tx_y],
"Risk Ratio": [rr_point],
"RR 95% LCI": [rr_lci],
"RR 95% UCI": [rr_uci],
f"RR {ci_label} LCI": [rr_lci],
f"RR {ci_label} UCI": [rr_uci],
"log(RR) SE": [log_rr_se],
}
)
else:
Expand Down Expand Up @@ -246,7 +273,7 @@ def _risk_estimates(self):
comp = comp.join(se_y, how="cross")

rd_comp, rr_comp = _compute_rd_rr(
comp, has_bootstrap, z, group_cols
comp, has_bootstrap, z, group_cols, ci_label
)
rd_cols = rd_comp.columns
rr_cols = rr_comp.columns
Expand Down
2 changes: 1 addition & 1 deletion pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@ build-backend = "setuptools.build_meta"

[project]
name = "pySEQTarget"
version = "0.13.8"
version = "0.13.9"
description = "Sequentially Nested Target Trial Emulation"
readme = "README.md"
license = {text = "MIT"}
Expand Down
64 changes: 64 additions & 0 deletions tests/test_survival.py
Original file line number Diff line number Diff line change
Expand Up @@ -293,3 +293,67 @@ def test_subgroup_compevent():
s.fit()
s.survival()
return


def _risk_estimates(ci, method, paired_subgroup=None):
opts = dict(
km_curves=True,
bootstrap_nboot=4,
seed=42,
bootstrap_CI=ci,
bootstrap_CI_method=method,
)
if paired_subgroup:
opts["subgroup_colname"] = paired_subgroup
s = SEQuential(
load_data("SEQdata"),
id_col="ID",
time_col="time",
eligible_col="eligible",
treatment_col="tx_init",
outcome_col="outcome",
time_varying_cols=["N", "L", "P"],
fixed_cols=["sex"],
method="ITT",
parameters=SEQopts(**opts),
)
s.expand()
s.bootstrap()
s.fit()
s.survival()
return s.risk_estimates


@pytest.mark.parametrize("method", ["se", "percentile"])
def test_risk_ci_columns_labelled_with_requested_level(method):
# The CI columns must be labelled with the requested level, not a hardcoded
# 95% (the interval itself was already computed at the right level).
est = _risk_estimates(0.9, method)
rd, rr = est["risk_difference"], est["risk_ratio"]
assert "RD 90% LCI" in rd.columns and "RD 90% UCI" in rd.columns
assert "RR 90% LCI" in rr.columns and "RR 90% UCI" in rr.columns
assert "RD 95% LCI" not in rd.columns
assert "RR 95% LCI" not in rr.columns


@pytest.mark.parametrize("method", ["se", "percentile"])
def test_risk_se_columns_present_for_both_methods(method):
# Bootstrap SEs are reported regardless of CI method: RD SE (natural scale)
# and log(RR) SE (log scale) for inverse-variance meta-analysis pooling.
est = _risk_estimates(0.95, method)
rd, rr = est["risk_difference"], est["risk_ratio"]
assert "RD SE" in rd.columns
assert "log(RR) SE" in rr.columns
assert rd["RD SE"].null_count() == 0
assert (rd["RD SE"] >= 0).all()


def test_risk_se_columns_present_subgroup_delta_path():
# Subgroups use the independent delta-method fallback (_compute_rd_rr),
# which must also emit the SE columns and the requested CI label.
est = _risk_estimates(0.9, "se", paired_subgroup="sex")
rd, rr = est["risk_difference"], est["risk_ratio"]
assert "RD SE" in rd.columns
assert "log(RR) SE" in rr.columns
assert "RD 90% LCI" in rd.columns
assert "RR 90% LCI" in rr.columns
Loading