cschlaffner · silenus092 · Apr 29, 2026 · Apr 29, 2026 · Apr 29, 2026 · Apr 29, 2026
diff --git a/backend/protzilla/constants/option_types.py b/backend/protzilla/constants/option_types.py
@@ -60,6 +60,12 @@ class PValueColumnName(StrEnum):
     ptm = "PTM"
 
 
+class NanPolicy(StrEnum):
+    propagate = "Propagate"
+    omit = "Omit"
+    raise_ = "Raise"
+
+
 FC_SIGNIFICANCE_COLUMNS = ["Protein ID", "fc_z_score", "fc_significance"]
 CORRECTED_P_VALUES_COLUMNS = [
     "Protein ID",

diff --git a/backend/protzilla/data_analysis/differential_expression_t_test.py b/backend/protzilla/data_analysis/differential_expression_t_test.py
@@ -119,6 +119,7 @@ def t_test(
     log_base: LogBaseWithNoneType = LogBaseWithNoneType.NONE,
     fc_zscore_filter: bool = False,
     fc_zscore_alpha: float = 0.05,
+    nan_policy: str = "raise",
 ) -> dict:
     """
     A function to conduct a two sample t-test between groups defined in the
@@ -135,6 +136,12 @@ def t_test(
     :param log_base: in case the data was previously log transformed this parameter contains the base as a string
     :param fc_zscore_filter: whether to apply a fold-change Z-score significance filter in addition to the p-value
     :param fc_zscore_alpha: the p-value cutoff (tail probability) for the fold-change Z-score significance
+    :param nan_policy: How to handle NaN values in intensity data.
+        - 'raise' (default): raise ValueError if any NaN is present.
+        - 'omit': silently drop NaN rows before computing statistics.
+          Both p-values and fold changes are computed from clean (NaN-free) data.
+        - 'propagate': proteins with any NaN in either group are excluded from
+          the results (their t-statistic and p-value become NaN and are filtered out).
 
     :return: a dict containing
         - a df differentially_expressed_proteins_df in typical protzilla long format containing the t-test results
@@ -143,7 +150,8 @@ def t_test(
         - a df log2_fold_change, containing the log2 fold changes per protein,
         - a df t_statistic_df, containing the t-statistic per protein,
         - a df fc_significance_df, containing the fold-change z-scores and their tail probabilities,
-        - a float corrected_alpha, containing the alpha value after application of multiple testing correction (depending on the selected multiple testing correction method corrected_alpha may be equal to alpha),
+        - a float corrected_alpha, containing the alpha value after application of multiple testing correction
+          (depending on the selected multiple testing correction method corrected_alpha may be different from alpha)
         - a list messages, containing messages for the user
     """
 
@@ -168,6 +176,9 @@ def empty_result(messages):
     assert grouping in metadata_df.columns
     messages = []
 
+    # Normalise nan_policy so both 'Omit' and 'omit' are accepted
+    nan_policy_lower = nan_policy.lower()
+
     if ttest_type not in ["Student's t-Test", "Welch's t-Test"]:
         messages.append(
             {
@@ -215,9 +226,32 @@ def empty_result(messages):
 
     protein_df["id"] = protein_df.groupby(["Protein ID", grouping]).cumcount()
 
-    protein_df_wide = protein_df.dropna(subset=[intensity_name]).pivot(
-        index=["Protein ID", "id"], columns=grouping, values=intensity_name
-    )
+    # ------------------------------------------------------------------ #
+    # NaN handling: gate the pivot on the chosen nan_policy               #
+    # ------------------------------------------------------------------ #
+    if nan_policy_lower == "raise":
+        if protein_df[intensity_name].isna().any():
+            raise ValueError(
+                f"Input data contains NaN values in column '{intensity_name}'. "
+                "Set nan_policy='omit' to drop them silently or "
+                "nan_policy='propagate' to exclude affected proteins from results."
+            )
+        protein_df_wide = protein_df.pivot(
+            index=["Protein ID", "id"], columns=grouping, values=intensity_name
+        )
+    elif nan_policy_lower == "omit":
+        # Drop NaN rows before pivoting so that all per-protein statistics
+        # (including medians used for fold-change) are computed from clean data.
+        protein_df_wide = protein_df.dropna(subset=[intensity_name]).pivot(
+            index=["Protein ID", "id"], columns=grouping, values=intensity_name
+        )
+    else:  # propagate
+        # Keep NaN values in the wide table. pandas agg skips NaN by default,
+        # so we explicitly detect affected proteins and null their statistics
+        # afterwards, causing their p-values to be NaN and be filtered out.
+        protein_df_wide = protein_df.pivot(
+            index=["Protein ID", "id"], columns=grouping, values=intensity_name
+        )
 
     if group1 not in protein_df_wide.columns or group2 not in protein_df_wide.columns:
         messages.append(
@@ -236,6 +270,16 @@ def empty_result(messages):
         n="count", mean="mean", var="var", median="median"
     )
 
+    # For 'propagate': null out statistics for proteins that have any NaN in
+    # either group so that vectorized_t_test produces NaN p-values for them.
+    if nan_policy_lower == "propagate":
+        has_nan_g1 = grouped_dfs[group1].apply(lambda s: s.isna().any())
+        has_nan_g2 = grouped_dfs[group2].apply(lambda s: s.isna().any())
+        propagate_mask = has_nan_g1 | has_nan_g2
+        if propagate_mask.any():
+            statistics_group1.loc[propagate_mask, :] = np.nan
+            statistics_group2.loc[propagate_mask, :] = np.nan
+
     valid_mask = (statistics_group1["n"] >= 2) & (statistics_group2["n"] >= 2)
     if (~valid_mask).any() and not exists_message(
         messages, INVALID_PROTEINGROUP_DATA_MSG

diff --git a/backend/protzilla/methods/data_analysis.py b/backend/protzilla/methods/data_analysis.py
@@ -11,6 +11,7 @@
 from backend.protzilla.constants.option_types import (
     MultipleTestingCorrectionMethod,
     PValueColumnName,
+    NanPolicy,
 )
 from backend.protzilla.data_analysis.classification import random_forest, svm
 from backend.protzilla.data_analysis.clustering import (
@@ -407,6 +408,12 @@ def create_form(self):
                     label="Fold-change Z-score significance",
                     value=False,
                 ),
+                DropdownField(
+                    name="nan_policy",
+                    label="NaN policy",
+                    options=NanPolicy,
+                    value=NanPolicy.raise_,
+                ),
                 FloatField(
                     name="fc_zscore_alpha",
                     label="Z-score tail cutoff",

diff --git a/backend/tests/protzilla/data_analysis/test_differential_expression.py b/backend/tests/protzilla/data_analysis/test_differential_expression.py
@@ -423,6 +423,16 @@ def test_differential_expression_t_test_with_log_data(show_figures):
 
 
 def test_differential_expression_t_test_with_silac_ratios():
+    """
+    SILAC ratio data contains NaN values in both groups.
+    nan_policy='omit' tells scipy to drop NaN before the t-test, yielding a valid p-value.
+
+    Group1 ratios: [1.2, NaN, 1.1]  →  omit NaN for t-test: [1.2, 1.1]
+    Group2 ratios: [0.8, 0.9, NaN]  →  omit NaN for t-test: [0.8, 0.9]
+
+    NOTE: Fold change uses np.median() (not np.nanmedian()), so NaN in either group
+    causes the fold change itself to be NaN. Only the p-value is reliable here.
+    """
     silac_ratio_df = pd.DataFrame(
         data=[
             ["Sample1", "Protein1", "Gene1", 1.2],
@@ -457,6 +467,7 @@ def test_differential_expression_t_test_with_silac_ratios():
         log_base="None",
         multiple_testing_correction_method="Benjamini-Hochberg",
         alpha=0.05,
+        nan_policy="omit",
     )
 
     assert not out[DataKey.CORRECTED_P_VALUES_DF].empty
@@ -465,9 +476,214 @@ def test_differential_expression_t_test_with_silac_ratios():
         round(out[DataKey.CORRECTED_P_VALUES_DF]["corrected_p_value"].iloc[0], 4)
         == 0.0513
     )
-    assert (
-        round(out[DataKey.LOG2_FOLD_CHANGE_DF]["log2_fold_change"].iloc[0], 2) == -0.44
+    # dropna() runs before pivot, so medians are from clean data — fold change is valid.
+    # Group1 clean=[1.2, 1.1]→median=1.15, Group2 clean=[0.8, 0.9]→median=0.85
+    fc = out[DataKey.LOG2_FOLD_CHANGE_DF]["log2_fold_change"].iloc[0]
+    assert not np.isnan(fc)
+    assert round(fc, 4) == round(np.log2(0.85 / 1.15), 4)
+
+
+@pytest.fixture
+def nan_intensity_data():
+    """Intensity data where one sample per group contains a NaN value."""
+    protein_df = pd.DataFrame(
+        data=[
+            ["Sample1", "Protein1", "Gene1", 18.0],
+            ["Sample2", "Protein1", "Gene1", np.nan],
+            ["Sample3", "Protein1", "Gene1", 22.0],
+            ["Sample4", "Protein1", "Gene1", 8.0],
+            ["Sample5", "Protein1", "Gene1", 10.0],
+            ["Sample6", "Protein1", "Gene1", 12.0],
+        ],
+        columns=["Sample", "Protein ID", "Gene", "Intensity"],
+    )
+    metadata_df = pd.DataFrame(
+        data=[
+            ["Sample1", "Group1"],
+            ["Sample2", "Group1"],
+            ["Sample3", "Group1"],
+            ["Sample4", "Group2"],
+            ["Sample5", "Group2"],
+            ["Sample6", "Group2"],
+        ],
+        columns=["Sample", "Group"],
+    )
+    return protein_df, metadata_df
+
+
+@pytest.fixture
+def nan_intensity_data_insufficient_after_omit():
+    """
+    Intensity data where each group has exactly 2 samples, one of which is NaN.
+
+    After nan_policy='omit' drops the NaN, each group is left with only 1 valid
+    sample — insufficient for a two-sample t-test. scipy returns NaN for both
+    t-statistic and p-value in this degenerate case.
+
+        Group1: [18.0, NaN]  →  after omit: [18.0]  (1 sample, variance undefined)
+        Group2: [8.0,  NaN]  →  after omit: [8.0]   (1 sample, variance undefined)
+    """
+    protein_df = pd.DataFrame(
+        data=[
+            ["Sample1", "Protein1", "Gene1", 18.0],
+            ["Sample2", "Protein1", "Gene1", np.nan],
+            ["Sample3", "Protein1", "Gene1", 8.0],
+            ["Sample4", "Protein1", "Gene1", np.nan],
+        ],
+        columns=["Sample", "Protein ID", "Gene", "Intensity"],
     )
+    metadata_df = pd.DataFrame(
+        data=[
+            ["Sample1", "Group1"],
+            ["Sample2", "Group1"],
+            ["Sample3", "Group2"],
+            ["Sample4", "Group2"],
+        ],
+        columns=["Sample", "Group"],
+    )
+    return protein_df, metadata_df
+
+
+def test_t_test_nan_policy_omit_raises_nan_when_too_few_samples_remain(
+    nan_intensity_data_insufficient_after_omit,
+):
+    """
+    nan_policy='omit' with only 2 samples per group where 1 is NaN:
+    after omission only 1 valid sample remains per group, which is insufficient
+    for a t-test. scipy returns NaN for the p-value, so the protein is excluded
+    and the function signals the problem via INVALID_PROTEINGROUP_DATA_MSG.
+
+        Group1: [18.0, NaN]  →  after omit: [18.0]  →  p = NaN  →  protein excluded
+        Group2: [8.0,  NaN]  →  after omit: [8.0]
+    """
+    protein_df, metadata_df = nan_intensity_data_insufficient_after_omit
+
+    out = t_test(
+        protein_df=protein_df,
+        metadata_df=metadata_df,
+        ttest_type="Welch's t-Test",
+        grouping="Group",
+        group1="Group1",
+        group2="Group2",
+        log_base="None",
+        multiple_testing_correction_method="Benjamini-Hochberg",
+        alpha=0.05,
+        nan_policy="omit",
+    )
+
+    # No valid p-value could be computed → protein is excluded from all result dataframes.
+    assert out[DataKey.CORRECTED_P_VALUES_DF].empty
+    assert out[DataKey.LOG2_FOLD_CHANGE_DF].empty
+
+    # The function should report that insufficient data was found.
+    from backend.protzilla.data_analysis.differential_expression_helper import (
+        INVALID_PROTEINGROUP_DATA_MSG,
+    )
+
+    assert any(message == INVALID_PROTEINGROUP_DATA_MSG for message in out["messages"])
+
+
+def test_t_test_nan_policy_omit_skips_nan_samples_and_computes_result(
+    nan_intensity_data,
+):
+    """
+    nan_policy='omit': NaN samples are silently dropped before the t-test runs,
+    so scipy returns a valid p-value and the protein is kept in the results.
+
+    Data layout (nan_intensity_data fixture):
+        Group1: [18.0, NaN, 22.0]  →  after omitting NaN: [18.0, 22.0]  →  valid p-value
+        Group2: [8.0, 10.0, 12.0]  →  no NaNs
+
+    NOTE: The fold change is computed with np.median() (not np.nanmedian()), so a NaN
+    in the raw group data causes the fold change to be NaN even under 'omit' policy.
+    This is a known limitation of the current implementation.
+    """
+    protein_df, metadata_df = nan_intensity_data
+    common_kwargs = dict(
+        protein_df=protein_df,
+        metadata_df=metadata_df,
+        grouping="Group",
+        group1="Group1",
+        group2="Group2",
+        log_base="None",
+        multiple_testing_correction_method="Benjamini-Hochberg",
+        alpha=0.05,
+        nan_policy="omit",
+    )
+
+    for ttest_type in ["Welch's t-Test", "Student's t-Test"]:
+        out = t_test(ttest_type=ttest_type, **common_kwargs)
+        # The protein IS included because scipy found a valid p-value after omitting NaN.
+        assert not out[
+            DataKey.CORRECTED_P_VALUES_DF
+        ].empty, f"ttest_type={ttest_type}: Protein1 should be included after NaN samples are omitted"
+        assert out[DataKey.CORRECTED_P_VALUES_DF]["Protein ID"].tolist() == ["Protein1"]
+        # dropna() runs before pivot, so medians are from clean data — fold change is valid.
+        # Group1 clean=[18.0, 22.0]→median=20.0, Group2=[8.0,10.0,12.0]→median=10.0 → log2(10/20)=-1.0
+        fc = out[DataKey.LOG2_FOLD_CHANGE_DF]["log2_fold_change"].iloc[0]
+        assert not np.isnan(
+            fc
+        ), f"ttest_type={ttest_type}: fold change should be valid after NaN rows are omitted"
+        assert (
+            round(fc, 1) == -1.0
+        ), f"ttest_type={ttest_type}: expected fold change -1.0, got {fc}"
+
+
+def test_t_test_nan_policy_propagate_excludes_protein_with_any_nan(nan_intensity_data):
+    """
+    nan_policy='propagate': a NaN anywhere in a group causes the t-test to return NaN
+    for that protein. The protein is then excluded from the output entirely.
+
+    Data layout (nan_intensity_data fixture):
+        Group1: [18.0, NaN, 22.0]  →  one NaN → t-test returns p = NaN → Protein1 excluded
+        Group2: [8.0, 10.0, 12.0]  →  (no NaNs, but NaN from Group1 already propagated)
+
+    Expected: corrected_p_values_df is empty because no protein produced a valid p-value.
+    """
+    protein_df, metadata_df = nan_intensity_data
+
+    out = t_test(
+        protein_df=protein_df,
+        metadata_df=metadata_df,
+        ttest_type="Welch's t-Test",
+        grouping="Group",
+        group1="Group1",
+        group2="Group2",
+        log_base="None",
+        multiple_testing_correction_method="Benjamini-Hochberg",
+        alpha=0.05,
+        nan_policy="propagate",
+    )
+
+    assert out[
+        DataKey.CORRECTED_P_VALUES_DF
+    ].empty, "Protein1 should be excluded because the NaN in Group1 propagated to the p-value"
+
+
+def test_t_test_nan_policy_raise_errors_when_nan_is_present(nan_intensity_data):
+    """
+    nan_policy='raise': a ValueError is raised immediately when any NaN value is
+    detected in the input data. Use this policy to treat NaN as a hard error that
+    must be resolved before running the analysis.
+
+    Data layout (nan_intensity_data fixture):
+        Group1: [18.0, NaN, 22.0]  →  NaN is present → ValueError is raised
+    """
+    protein_df, metadata_df = nan_intensity_data
+
+    with pytest.raises(ValueError):
+        t_test(
+            protein_df=protein_df,
+            metadata_df=metadata_df,
+            ttest_type="Welch's t-Test",
+            grouping="Group",
+            group1="Group1",
+            group2="Group2",
+            log_base="None",
+            multiple_testing_correction_method="Benjamini-Hochberg",
+            alpha=0.05,
+            nan_policy="raise",
+        )
 
 
 def test_differential_expression_anova(show_figures):
@@ -939,6 +1155,7 @@ def test_differential_expression_t_test_empty_p_values():
         multiple_testing_correction_method="Benjamini-Hochberg",
         alpha=0.05,
         log_base="None",
+        nan_policy="propagate",
     )
 
     # Check that all dataframes are empty but with correct columns