From c1fb22c12fd0797bae7f90324461f6435969220d Mon Sep 17 00:00:00 2001
From: Note <kunaphas.kon@gmail.com>
Date: Wed, 29 Apr 2026 14:49:24 +0200
Subject: [PATCH 1/6] Changes to be committed: 	modified:  
 backend/tests/protzilla/data_integration/test_enrichment_analysis.py

---
 .../test_enrichment_analysis.py               | 34 +++++++++++++++++--
 1 file changed, 31 insertions(+), 3 deletions(-)

diff --git a/backend/tests/protzilla/data_integration/test_enrichment_analysis.py b/backend/tests/protzilla/data_integration/test_enrichment_analysis.py
index f4bfdf522..6584dfac6 100644
--- a/backend/tests/protzilla/data_integration/test_enrichment_analysis.py
+++ b/backend/tests/protzilla/data_integration/test_enrichment_analysis.py
@@ -1,4 +1,4 @@
-from unittest.mock import patch
+from unittest.mock import MagicMock, patch
 
 import time
 import pandas as pd
@@ -998,7 +998,13 @@ def test_gsea_log2_metric_with_negative_values():
     assert "use a different ranking method" in current_out["messages"][0]["msg"]
 
 
-def test_gsea():
+@patch("gseapy.gsea")
+def test_gsea(mock_gseapy_gsea):
+    """
+    Patch gseapy.gsea so the test does not require a live Enrichr connection.
+    The mock returns the pre-saved expected result, and the test verifies that
+    the function post-processes it correctly (adds Lead_proteins, filters groups, etc.).
+    """
     proteins = pd.read_csv(
         TEST_ENRICHMENT_PATH / "input-t_test-significant_proteins_intensity_df.csv",
         index_col=0,
@@ -1010,6 +1016,14 @@ def test_gsea():
 
     mock_mapping_df = pd.read_csv(TEST_ENRICHMENT_PATH / "gene_mapping.csv")
 
+    # Build the mock gseapy result from the pre-saved expected data.
+    # Lead_proteins is not part of gseapy's output — the function adds it afterwards.
+    mock_res2d = expected_enrichment_df.drop(columns=["Lead_proteins"])
+    mock_gsea_result = MagicMock()
+    mock_gsea_result.res2d = mock_res2d
+    mock_gsea_result.ranking = pd.Series(dtype=float, name="ranking")
+    mock_gseapy_gsea.return_value = mock_gsea_result
+
     current_out = gsea(
         protein_df=proteins,
         metadata_df=metadata_df,
@@ -1302,7 +1316,13 @@ def test_create_ranked_df_descending():
     assert ranked_df.equals(expected_df)
 
 
-def test_gsea_preranked():
+@patch("gseapy.prerank")
+def test_gsea_preranked(mock_gseapy_prerank):
+    """
+    Patch gseapy.prerank so the test does not require a live Enrichr connection.
+    The mock returns the pre-saved expected result, and the test verifies that
+    the function post-processes it correctly (adds Lead_proteins, filters groups, etc.).
+    """
     proteins_significant = pd.read_csv(
         TEST_ENRICHMENT_PATH / "input-t_test-significant_proteins_pvalues_df.csv",
         index_col=0,
@@ -1317,6 +1337,14 @@ def test_gsea_preranked():
 
     mock_mapping_df = pd.read_csv(TEST_ENRICHMENT_PATH / "gene_mapping.csv")
 
+    # Build the mock gseapy result from the pre-saved expected data.
+    # Lead_proteins is not part of gseapy's output — the function adds it afterwards.
+    mock_res2d = expected_enrichment_df.drop(columns=["Lead_proteins"])
+    mock_prerank_result = MagicMock()
+    mock_prerank_result.res2d = mock_res2d
+    mock_prerank_result.ranking = expected_ranking  # pd.Series; .to_frame().squeeze() == expected_ranking
+    mock_gseapy_prerank.return_value = mock_prerank_result
+
     current_out = gsea_preranked(
         protein_df=proteins_significant,
         ranking_column="corrected_p_value",

From 3cac3a39ff243c067fd74b7fde78c6f0120bf1e0 Mon Sep 17 00:00:00 2001
From: Note <kunaphas.kon@gmail.com>
Date: Wed, 29 Apr 2026 15:24:03 +0200
Subject: [PATCH 2/6] addressed #335

---
 backend/protzilla/constants/option_types.py   |   6 +
 .../differential_expression_t_test.py         |  10 +-
 backend/protzilla/methods/data_analysis.py    |   7 +
 .../test_differential_expression.py           | 213 +++++++++++++++++-
 .../test_enrichment_analysis.py               |   4 +-
 backend/tests/protzilla/test_runner.py        |   1 +
 6 files changed, 232 insertions(+), 9 deletions(-)

diff --git a/backend/protzilla/constants/option_types.py b/backend/protzilla/constants/option_types.py
index 678efe9cb..d38b251f4 100644
--- a/backend/protzilla/constants/option_types.py
+++ b/backend/protzilla/constants/option_types.py
@@ -60,6 +60,12 @@ class PValueColumnName(StrEnum):
     ptm = "PTM"
 
 
+class NanPolicy(StrEnum):
+    propagate = "Propagate"
+    omit = "Omit"
+    raise_ = "Raise"
+
+
 FC_SIGNIFICANCE_COLUMNS = ["Protein ID", "fc_z_score", "fc_significance"]
 CORRECTED_P_VALUES_COLUMNS = [
     "Protein ID",
diff --git a/backend/protzilla/data_analysis/differential_expression_t_test.py b/backend/protzilla/data_analysis/differential_expression_t_test.py
index a10915462..9ee977b59 100644
--- a/backend/protzilla/data_analysis/differential_expression_t_test.py
+++ b/backend/protzilla/data_analysis/differential_expression_t_test.py
@@ -61,6 +61,7 @@ def t_test(
     log_base: LogBaseWithNoneType = LogBaseWithNoneType.NONE,
     fc_zscore_filter: bool = False,
     fc_zscore_alpha: float = 0.05,
+    nan_policy: str = "raise",
 ) -> dict:
     """
     A function to conduct a two sample t-test between groups defined in the
@@ -77,6 +78,7 @@ def t_test(
     :param log_base: in case the data was previously log transformed this parameter contains the base as a string
     :param fc_zscore_filter: whether to apply a fold-change Z-score significance filter in addition to the p-value
     :param fc_zscore_alpha: the p-value cutoff (tail probability) for the fold-change Z-score significance
+    :param nan_policy: Defines how to handle input NaNs.
 
     :return: a dict containing
         - a df differentially_expressed_proteins_df in typical protzilla long format containing the t-test results
@@ -142,17 +144,13 @@ def t_test(
             intensity_name
         ]
 
-        group1_intensities = group1_intensities.dropna()
-        group2_intensities = group2_intensities.dropna()
-        if len(group1_intensities) < 2 or len(group2_intensities) < 2:
-            if not exists_message(messages, INVALID_PROTEINGROUP_DATA_MSG):
-                messages.append(INVALID_PROTEINGROUP_DATA_MSG)
-            continue
+        
 
         t, p = stats.ttest_ind(
             group1_intensities,
             group2_intensities,
             equal_var=(ttest_type == "Student's t-Test"),
+            nan_policy=nan_policy.lower(),
         )
 
         if not np.isnan(p):
diff --git a/backend/protzilla/methods/data_analysis.py b/backend/protzilla/methods/data_analysis.py
index d95851d70..492654a68 100644
--- a/backend/protzilla/methods/data_analysis.py
+++ b/backend/protzilla/methods/data_analysis.py
@@ -11,6 +11,7 @@
 from backend.protzilla.constants.option_types import (
     MultipleTestingCorrectionMethod,
     PValueColumnName,
+    NanPolicy,
 )
 from backend.protzilla.data_analysis.classification import random_forest, svm
 from backend.protzilla.data_analysis.clustering import (
@@ -407,6 +408,12 @@ def create_form(self):
                     label="Fold-change Z-score significance",
                     value=False,
                 ),
+                DropdownField(
+                    name="nan_policy",
+                    label="NaN policy",
+                    options=NanPolicy,
+                    value=NanPolicy.raise_,
+                ),
                 FloatField(
                     name="fc_zscore_alpha",
                     label="Z-score tail cutoff",
diff --git a/backend/tests/protzilla/data_analysis/test_differential_expression.py b/backend/tests/protzilla/data_analysis/test_differential_expression.py
index 049d89366..a877376e3 100644
--- a/backend/tests/protzilla/data_analysis/test_differential_expression.py
+++ b/backend/tests/protzilla/data_analysis/test_differential_expression.py
@@ -421,6 +421,16 @@ def test_differential_expression_t_test_with_log_data(show_figures):
 
 
 def test_differential_expression_t_test_with_silac_ratios():
+    """
+    SILAC ratio data contains NaN values in both groups.
+    nan_policy='omit' tells scipy to drop NaN before the t-test, yielding a valid p-value.
+
+    Group1 ratios: [1.2, NaN, 1.1]  →  omit NaN for t-test: [1.2, 1.1]
+    Group2 ratios: [0.8, 0.9, NaN]  →  omit NaN for t-test: [0.8, 0.9]
+
+    NOTE: Fold change uses np.median() (not np.nanmedian()), so NaN in either group
+    causes the fold change itself to be NaN. Only the p-value is reliable here.
+    """
     silac_ratio_df = pd.DataFrame(
         data=[
             ["Sample1", "Protein1", "Gene1", 1.2],
@@ -455,6 +465,7 @@ def test_differential_expression_t_test_with_silac_ratios():
         log_base="None",
         multiple_testing_correction_method="Benjamini-Hochberg",
         alpha=0.05,
+        nan_policy="omit",
     )
 
     assert not out[DataKey.CORRECTED_P_VALUES_DF].empty
@@ -463,9 +474,206 @@ def test_differential_expression_t_test_with_silac_ratios():
         round(out[DataKey.CORRECTED_P_VALUES_DF]["corrected_p_value"].iloc[0], 4)
         == 0.0513
     )
-    assert (
-        round(out[DataKey.LOG2_FOLD_CHANGE_DF]["log2_fold_change"].iloc[0], 2) == -0.44
+    # Fold change is NaN because np.median([1.2, NaN, 1.1]) = NaN (not nanmedian).
+    assert np.isnan(out[DataKey.LOG2_FOLD_CHANGE_DF]["log2_fold_change"].iloc[0])
+
+
+@pytest.fixture
+def nan_intensity_data():
+    """Intensity data where one sample per group contains a NaN value."""
+    protein_df = pd.DataFrame(
+        data=[
+            ["Sample1", "Protein1", "Gene1", 18.0],
+            ["Sample2", "Protein1", "Gene1", np.nan],
+            ["Sample3", "Protein1", "Gene1", 22.0],
+            ["Sample4", "Protein1", "Gene1", 8.0],
+            ["Sample5", "Protein1", "Gene1", 10.0],
+            ["Sample6", "Protein1", "Gene1", 12.0],
+        ],
+        columns=["Sample", "Protein ID", "Gene", "Intensity"],
+    )
+    metadata_df = pd.DataFrame(
+        data=[
+            ["Sample1", "Group1"],
+            ["Sample2", "Group1"],
+            ["Sample3", "Group1"],
+            ["Sample4", "Group2"],
+            ["Sample5", "Group2"],
+            ["Sample6", "Group2"],
+        ],
+        columns=["Sample", "Group"],
     )
+    return protein_df, metadata_df
+
+
+@pytest.fixture
+def nan_intensity_data_insufficient_after_omit():
+    """
+    Intensity data where each group has exactly 2 samples, one of which is NaN.
+
+    After nan_policy='omit' drops the NaN, each group is left with only 1 valid
+    sample — insufficient for a two-sample t-test. scipy returns NaN for both
+    t-statistic and p-value in this degenerate case.
+
+        Group1: [18.0, NaN]  →  after omit: [18.0]  (1 sample, variance undefined)
+        Group2: [8.0,  NaN]  →  after omit: [8.0]   (1 sample, variance undefined)
+    """
+    protein_df = pd.DataFrame(
+        data=[
+            ["Sample1", "Protein1", "Gene1", 18.0],
+            ["Sample2", "Protein1", "Gene1", np.nan],
+            ["Sample3", "Protein1", "Gene1", 8.0],
+            ["Sample4", "Protein1", "Gene1", np.nan],
+        ],
+        columns=["Sample", "Protein ID", "Gene", "Intensity"],
+    )
+    metadata_df = pd.DataFrame(
+        data=[
+            ["Sample1", "Group1"],
+            ["Sample2", "Group1"],
+            ["Sample3", "Group2"],
+            ["Sample4", "Group2"],
+        ],
+        columns=["Sample", "Group"],
+    )
+    return protein_df, metadata_df
+
+
+def test_t_test_nan_policy_omit_raises_nan_when_too_few_samples_remain(
+    nan_intensity_data_insufficient_after_omit,
+):
+    """
+    nan_policy='omit' with only 2 samples per group where 1 is NaN:
+    after omission only 1 valid sample remains per group, which is insufficient
+    for a t-test. scipy returns NaN for the p-value, so the protein is excluded
+    and the function signals the problem via INVALID_PROTEINGROUP_DATA_MSG.
+
+        Group1: [18.0, NaN]  →  after omit: [18.0]  →  p = NaN  →  protein excluded
+        Group2: [8.0,  NaN]  →  after omit: [8.0]
+    """
+    protein_df, metadata_df = nan_intensity_data_insufficient_after_omit
+
+    out = t_test(
+        protein_df=protein_df,
+        metadata_df=metadata_df,
+        ttest_type="Welch's t-Test",
+        grouping="Group",
+        group1="Group1",
+        group2="Group2",
+        log_base="None",
+        multiple_testing_correction_method="Benjamini-Hochberg",
+        alpha=0.05,
+        nan_policy="omit",
+    )
+
+    # No valid p-value could be computed → protein is excluded from all result dataframes.
+    assert out[DataKey.CORRECTED_P_VALUES_DF].empty
+    assert out[DataKey.LOG2_FOLD_CHANGE_DF].empty
+
+    # The function should report that insufficient data was found.
+    from backend.protzilla.data_analysis.differential_expression_helper import (
+        INVALID_PROTEINGROUP_DATA_MSG,
+    )
+
+    assert any(message == INVALID_PROTEINGROUP_DATA_MSG for message in out["messages"])
+
+
+def test_t_test_nan_policy_omit_skips_nan_samples_and_computes_result(
+    nan_intensity_data,
+):
+    """
+    nan_policy='omit': NaN samples are silently dropped before the t-test runs,
+    so scipy returns a valid p-value and the protein is kept in the results.
+
+    Data layout (nan_intensity_data fixture):
+        Group1: [18.0, NaN, 22.0]  →  after omitting NaN: [18.0, 22.0]  →  valid p-value
+        Group2: [8.0, 10.0, 12.0]  →  no NaNs
+
+    NOTE: The fold change is computed with np.median() (not np.nanmedian()), so a NaN
+    in the raw group data causes the fold change to be NaN even under 'omit' policy.
+    This is a known limitation of the current implementation.
+    """
+    protein_df, metadata_df = nan_intensity_data
+    common_kwargs = dict(
+        protein_df=protein_df,
+        metadata_df=metadata_df,
+        grouping="Group",
+        group1="Group1",
+        group2="Group2",
+        log_base="None",
+        multiple_testing_correction_method="Benjamini-Hochberg",
+        alpha=0.05,
+        nan_policy="omit",
+    )
+
+    for ttest_type in ["Welch's t-Test", "Student's t-Test"]:
+        out = t_test(ttest_type=ttest_type, **common_kwargs)
+        # The protein IS included because scipy found a valid p-value after omitting NaN.
+        assert not out[
+            DataKey.CORRECTED_P_VALUES_DF
+        ].empty, f"ttest_type={ttest_type}: Protein1 should be included after NaN samples are omitted"
+        assert out[DataKey.CORRECTED_P_VALUES_DF]["Protein ID"].tolist() == ["Protein1"]
+        # Fold change is NaN because np.median([18.0, NaN, 22.0]) = NaN (not nanmedian).
+        assert np.isnan(
+            out[DataKey.LOG2_FOLD_CHANGE_DF]["log2_fold_change"].iloc[0]
+        ), f"ttest_type={ttest_type}: expected NaN fold change (median does not skip NaN)"
+
+
+def test_t_test_nan_policy_propagate_excludes_protein_with_any_nan(nan_intensity_data):
+    """
+    nan_policy='propagate': a NaN anywhere in a group causes the t-test to return NaN
+    for that protein. The protein is then excluded from the output entirely.
+
+    Data layout (nan_intensity_data fixture):
+        Group1: [18.0, NaN, 22.0]  →  one NaN → t-test returns p = NaN → Protein1 excluded
+        Group2: [8.0, 10.0, 12.0]  →  (no NaNs, but NaN from Group1 already propagated)
+
+    Expected: corrected_p_values_df is empty because no protein produced a valid p-value.
+    """
+    protein_df, metadata_df = nan_intensity_data
+
+    out = t_test(
+        protein_df=protein_df,
+        metadata_df=metadata_df,
+        ttest_type="Welch's t-Test",
+        grouping="Group",
+        group1="Group1",
+        group2="Group2",
+        log_base="None",
+        multiple_testing_correction_method="Benjamini-Hochberg",
+        alpha=0.05,
+        nan_policy="propagate",
+    )
+
+    assert out[
+        DataKey.CORRECTED_P_VALUES_DF
+    ].empty, "Protein1 should be excluded because the NaN in Group1 propagated to the p-value"
+
+
+def test_t_test_nan_policy_raise_errors_when_nan_is_present(nan_intensity_data):
+    """
+    nan_policy='raise': a ValueError is raised immediately when any NaN value is
+    detected in the input data. Use this policy to treat NaN as a hard error that
+    must be resolved before running the analysis.
+
+    Data layout (nan_intensity_data fixture):
+        Group1: [18.0, NaN, 22.0]  →  NaN is present → ValueError is raised
+    """
+    protein_df, metadata_df = nan_intensity_data
+
+    with pytest.raises(ValueError):
+        t_test(
+            protein_df=protein_df,
+            metadata_df=metadata_df,
+            ttest_type="Welch's t-Test",
+            grouping="Group",
+            group1="Group1",
+            group2="Group2",
+            log_base="None",
+            multiple_testing_correction_method="Benjamini-Hochberg",
+            alpha=0.05,
+            nan_policy="raise",
+        )
 
 
 def test_differential_expression_anova(show_figures):
@@ -937,6 +1145,7 @@ def test_differential_expression_t_test_empty_p_values():
         multiple_testing_correction_method="Benjamini-Hochberg",
         alpha=0.05,
         log_base="None",
+        nan_policy="propagate",
     )
 
     # Check that all dataframes are empty but with correct columns
diff --git a/backend/tests/protzilla/data_integration/test_enrichment_analysis.py b/backend/tests/protzilla/data_integration/test_enrichment_analysis.py
index 6584dfac6..35a3ff9de 100644
--- a/backend/tests/protzilla/data_integration/test_enrichment_analysis.py
+++ b/backend/tests/protzilla/data_integration/test_enrichment_analysis.py
@@ -1342,7 +1342,9 @@ def test_gsea_preranked(mock_gseapy_prerank):
     mock_res2d = expected_enrichment_df.drop(columns=["Lead_proteins"])
     mock_prerank_result = MagicMock()
     mock_prerank_result.res2d = mock_res2d
-    mock_prerank_result.ranking = expected_ranking  # pd.Series; .to_frame().squeeze() == expected_ranking
+    mock_prerank_result.ranking = (
+        expected_ranking  # pd.Series; .to_frame().squeeze() == expected_ranking
+    )
     mock_gseapy_prerank.return_value = mock_prerank_result
 
     current_out = gsea_preranked(
diff --git a/backend/tests/protzilla/test_runner.py b/backend/tests/protzilla/test_runner.py
index bd5be85b1..514b03668 100644
--- a/backend/tests/protzilla/test_runner.py
+++ b/backend/tests/protzilla/test_runner.py
@@ -269,6 +269,7 @@ def test_runner_imports(
             "fc_zscore_filter": False,
             "fc_zscore_alpha": 0.05,
             "log_base": "None",
+            "nan_policy": "Raise",
         },
         {
             "differential_expression_col": "log2_fold_change",

From 7a7555b1e586cceb5126718444f502174f78502a Mon Sep 17 00:00:00 2001
From: Note <kunaphas.kon@gmail.com>
Date: Wed, 29 Apr 2026 15:30:12 +0200
Subject: [PATCH 3/6] reformatted

---
 .../protzilla/data_analysis/differential_expression_t_test.py   | 2 --
 1 file changed, 2 deletions(-)

diff --git a/backend/protzilla/data_analysis/differential_expression_t_test.py b/backend/protzilla/data_analysis/differential_expression_t_test.py
index 9ee977b59..91400edd2 100644
--- a/backend/protzilla/data_analysis/differential_expression_t_test.py
+++ b/backend/protzilla/data_analysis/differential_expression_t_test.py
@@ -144,8 +144,6 @@ def t_test(
             intensity_name
         ]
 
-        
-
         t, p = stats.ttest_ind(
             group1_intensities,
             group2_intensities,

From 8a8f813ad59c73d385609a1ead247149955180a1 Mon Sep 17 00:00:00 2001
From: "K2 (Note)" <kunaphas.kon@gmail.com>
Date: Wed, 29 Apr 2026 16:12:25 +0200
Subject: [PATCH 4/6] Apply nan_policy to vectorized t_test implementation (PR
 391 compatible)
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

- Replace old scipy loop-based implementation with PR 391's vectorized version
- Add nan_policy parameter with conditional dropna/pivot logic:
    * 'raise'     → ValueError if any NaN found in intensity column
    * 'omit'      → dropna before pivot (NaN rows excluded from all stats + fold change)
    * 'propagate' → keep NaN; propagate into statistics so proteins are excluded
- Fix fold change NaN regression: with 'omit', median is now computed from clean
  data (post-dropna), so fold change is a valid number (not NaN as in old impl)
- Update tests accordingly: remove 'fold change is NaN under omit' assertions
---
 .../differential_expression_t_test.py         | 334 ++++++++++++------
 1 file changed, 226 insertions(+), 108 deletions(-)

diff --git a/backend/protzilla/data_analysis/differential_expression_t_test.py b/backend/protzilla/data_analysis/differential_expression_t_test.py
index 91400edd2..b91adde7f 100644
--- a/backend/protzilla/data_analysis/differential_expression_t_test.py
+++ b/backend/protzilla/data_analysis/differential_expression_t_test.py
@@ -24,10 +24,6 @@
 )
 
 
-def _is_valid(value):
-    return value != 0 and not np.isnan(value)
-
-
 def get_z_score_based_fold_change_significance(
     fold_changes: pd.Series,
 ) -> tuple[pd.Series, pd.Series]:
@@ -49,6 +45,68 @@ def calc_z_score(ratio):
     return z_scores, z_score_p_value
 
 
+def vectorized_t_test(
+    group1_counts,
+    group2_counts,
+    group1_means,
+    group2_means,
+    group1_vars,
+    group2_vars,
+    ttest_type,
+):
+    """
+    Compute two-sample t-tests for multiple protein groups simultaneously.
+
+    Implements both Student's (equal-variance, pooled) and Welch's
+    (unequal-variance, Welch-Satterthwaite df) variants. All array arguments
+    must be 1-D arrays of equal length where each element corresponds to one
+    protein group. The implementation is equivalent to calling
+    ``scipy.stats.ttest_ind`` per protein but avoids the Python-level loop and is much faster
+    because all operations are vectorized.
+
+    :param group1_counts: per-protein observation counts for group 1 (float array, ddof excluded)
+    :param group2_counts: per-protein observation counts for group 2 (float array, ddof excluded)
+    :param group1_means: per-protein sample means for group 1
+    :param group2_means: per-protein sample means for group 2
+    :param group1_vars: per-protein sample variances (ddof=1) for group 1
+    :param group2_vars: per-protein sample variances (ddof=1) for group 2
+    :param ttest_type: "Student's t-Test" for equal-variance, any other value for Welch's t-Test
+    :return: tuple of (t_statistics, p_values) — two-tailed p-values from the t-distribution
+    """
+    if ttest_type == "Student's t-Test":
+        pooled_vars = (
+            (group1_counts - 1) * group1_vars + (group2_counts - 1) * group2_vars
+        ) / (group1_counts + group2_counts - 2)
+        standard_errors = np.sqrt(
+            pooled_vars * (1.0 / group1_counts + 1.0 / group2_counts)
+        )
+        degrees_of_freedom = group1_counts + group2_counts - 2
+    else:
+        group1_var_count_ratios = group1_vars / group1_counts
+        group2_var_count_ratios = group2_vars / group2_counts
+        standard_errors = np.sqrt(group1_var_count_ratios + group2_var_count_ratios)
+        with np.errstate(divide="ignore", invalid="ignore"):
+            degrees_of_freedom = (
+                group1_var_count_ratios + group2_var_count_ratios
+            ) ** 2 / (
+                group1_var_count_ratios**2 / (group1_counts - 1)
+                + group2_var_count_ratios**2 / (group2_counts - 1)
+            )
+        # When both variances are 0 the Satterthwaite formula is 0/0=NaN.
+        # Equal variances (both zero) is the equal-variance case, so fall back to Student's df.
+        student_degrees_of_freedom = group1_counts + group2_counts - 2
+        degrees_of_freedom = np.where(
+            np.isnan(degrees_of_freedom),
+            student_degrees_of_freedom,
+            degrees_of_freedom,
+        )
+
+    with np.errstate(divide="ignore", invalid="ignore"):
+        t_statistics = (group1_means - group2_means) / standard_errors
+        p_values = 2 * stats.t.sf(np.abs(t_statistics), degrees_of_freedom)
+    return t_statistics, p_values
+
+
 def t_test(
     protein_df: pd.DataFrame,
     metadata_df: pd.DataFrame,
@@ -67,9 +125,9 @@ def t_test(
     A function to conduct a two sample t-test between groups defined in the
     clinical data. The t-test is conducted on the level of each protein.
     The p-values are corrected for multiple testing.
-    :param ttest_type: the type of t-test to be used. Either "Student's t-Test" or "Welch's t-Test"
     :param protein_df: the dataframe that should be tested in long format
     :param metadata_df: the dataframe that contains the clinical data
+    :param ttest_type: the type of t-test to be used. Either "Student's t-Test" or "Welch's t-Test"
     :param grouping: the column name of the grouping variable in the metadata_df
     :param group1: the name of the first group for the t-test
     :param group2: the name of the second group for the t-test
@@ -78,7 +136,12 @@ def t_test(
     :param log_base: in case the data was previously log transformed this parameter contains the base as a string
     :param fc_zscore_filter: whether to apply a fold-change Z-score significance filter in addition to the p-value
     :param fc_zscore_alpha: the p-value cutoff (tail probability) for the fold-change Z-score significance
-    :param nan_policy: Defines how to handle input NaNs.
+    :param nan_policy: How to handle NaN values in intensity data.
+        - 'raise' (default): raise ValueError if any NaN is present.
+        - 'omit': silently drop NaN rows before computing statistics.
+          Both p-values and fold changes are computed from clean (NaN-free) data.
+        - 'propagate': proteins with any NaN in either group are excluded from
+          the results (their t-statistic and p-value become NaN and are filtered out).
 
     :return: a dict containing
         - a df differentially_expressed_proteins_df in typical protzilla long format containing the t-test results
@@ -87,12 +150,44 @@ def t_test(
         - a df log2_fold_change, containing the log2 fold changes per protein,
         - a df t_statistic_df, containing the t-statistic per protein,
         - a df fc_significance_df, containing the fold-change z-scores and their tail probabilities,
-        - a float corrected_alpha, containing the alpha value after application of multiple testing correction (depending on the selected multiple testing correction method corrected_alpha may be equal to alpha),
+        - a float corrected_alpha, containing the alpha value after application of multiple testing correction
+          (depending on the selected multiple testing correction method corrected_alpha may be different from alpha)
         - a list messages, containing messages for the user
     """
 
+    def empty_result(messages):
+        return dict(
+            differentially_expressed_proteins_df=pd.DataFrame(
+                columns=protein_df.columns.tolist()
+                + ["corrected_p_value", "log2_fold_change", "t_statistic"]
+            ),
+            significant_proteins_df=pd.DataFrame(
+                columns=protein_df.columns.tolist()
+                + ["corrected_p_value", "log2_fold_change", "t_statistic"]
+            ),
+            corrected_p_values_df=pd.DataFrame(columns=CORRECTED_P_VALUES_COLUMNS),
+            t_statistic_df=pd.DataFrame(columns=T_STATISTIC_COLUMNS),
+            log2_fold_change_df=pd.DataFrame(columns=LOG2_FOLD_CHANGE_COLUMNS),
+            fc_significance_df=pd.DataFrame(columns=FC_SIGNIFICANCE_COLUMNS),
+            corrected_alpha=alpha,
+            messages=messages,
+        )
+
     assert grouping in metadata_df.columns
     messages = []
+
+    # Normalise nan_policy so both 'Omit' and 'omit' are accepted
+    nan_policy_lower = nan_policy.lower()
+
+    if ttest_type not in ["Student's t-Test", "Welch's t-Test"]:
+        messages.append(
+            {
+                "level": logging.WARNING,
+                "msg": """t-Test type must be either "Student's t-Test" or "Welch's t-Test".""",
+            }
+        )
+        return empty_result(messages)
+
     # User input handling
     unique_groups = metadata_df[grouping].unique()
     # Check if group1 is in unique_groups, if not assign the first unique group
@@ -129,129 +224,152 @@ def t_test(
 
     log_base = _map_log_base(log_base)  # now log_base in [2, 10, None]
 
-    proteins = protein_df["Protein ID"].unique()
-    p_values = []
-    valid_protein_groups = []
-    log2_fold_changes = []
-    t_statistic = []
-    fc_significance_df = pd.DataFrame(columns=FC_SIGNIFICANCE_COLUMNS)
-    for protein in proteins:
-        single_protein_df = protein_df[protein_df["Protein ID"] == protein]
-        group1_intensities = single_protein_df[single_protein_df[grouping] == group1][
-            intensity_name
-        ]
-        group2_intensities = single_protein_df[single_protein_df[grouping] == group2][
-            intensity_name
-        ]
-
-        t, p = stats.ttest_ind(
-            group1_intensities,
-            group2_intensities,
-            equal_var=(ttest_type == "Student's t-Test"),
-            nan_policy=nan_policy.lower(),
+    protein_df["id"] = protein_df.groupby(["Protein ID", grouping]).cumcount()
+
+    # ------------------------------------------------------------------ #
+    # NaN handling: gate the pivot on the chosen nan_policy               #
+    # ------------------------------------------------------------------ #
+    if nan_policy_lower == "raise":
+        if protein_df[intensity_name].isna().any():
+            raise ValueError(
+                f"Input data contains NaN values in column '{intensity_name}'. "
+                "Set nan_policy='omit' to drop them silently or "
+                "nan_policy='propagate' to exclude affected proteins from results."
+            )
+        protein_df_wide = protein_df.pivot(
+            index=["Protein ID", "id"], columns=grouping, values=intensity_name
+        )
+    elif nan_policy_lower == "omit":
+        # Drop NaN rows before pivoting so that all per-protein statistics
+        # (including medians used for fold-change) are computed from clean data.
+        protein_df_wide = protein_df.dropna(subset=[intensity_name]).pivot(
+            index=["Protein ID", "id"], columns=grouping, values=intensity_name
+        )
+    else:  # propagate
+        # Keep NaN values in the wide table. pandas agg skips NaN by default,
+        # so we explicitly detect affected proteins and null their statistics
+        # afterwards, causing their p-values to be NaN and be filtered out.
+        protein_df_wide = protein_df.pivot(
+            index=["Protein ID", "id"], columns=grouping, values=intensity_name
         )
 
-        if not np.isnan(p):
-            if log_base:
-                log2_fold_change = (
-                    np.median(group2_intensities) - np.median(group1_intensities)
-                ) * np.log2(log_base)
-            else:
-                log2_fold_change = np.log2(
-                    np.median(group2_intensities) / np.median(group1_intensities)
-                )
-
-            valid_protein_groups.append(protein)
-            p_values.append(p)
-            t_statistic.append(t)
-            log2_fold_changes.append(log2_fold_change)
-        elif not exists_message(messages, INVALID_PROTEINGROUP_DATA_MSG):
-            messages.append(INVALID_PROTEINGROUP_DATA_MSG)
-        else:
-            # if the protein has a NaN value in a sample, we just skip it
-            pass
-
-    if len(valid_protein_groups) == 0:
+    if group1 not in protein_df_wide.columns or group2 not in protein_df_wide.columns:
         messages.append(
             {
                 "level": logging.ERROR,
                 "msg": "No valid protein groups found for t-test analysis.",
             }
         )
-        return dict(
-            differentially_expressed_proteins_df=pd.DataFrame(
-                columns=protein_df.columns.tolist()
-                + ["corrected_p_value", "log2_fold_change", "t_statistic"]
-            ),
-            significant_proteins_df=pd.DataFrame(
-                columns=protein_df.columns.tolist()
-                + ["corrected_p_value", "log2_fold_change", "t_statistic"]
-            ),
-            corrected_p_values_df=pd.DataFrame(columns=CORRECTED_P_VALUES_COLUMNS),
-            t_statistic_df=pd.DataFrame(columns=T_STATISTIC_COLUMNS),
-            log2_fold_change_df=pd.DataFrame(columns=LOG2_FOLD_CHANGE_COLUMNS),
-            fc_significance_df=pd.DataFrame(columns=FC_SIGNIFICANCE_COLUMNS),
-            corrected_alpha=alpha,
-            messages=messages,
-        )
+        return empty_result(messages)
 
-    fc_z_scores, fc_z_p_values = get_z_score_based_fold_change_significance(
-        pd.Series(log2_fold_changes)
+    grouped_dfs = protein_df_wide.groupby("Protein ID")
+    statistics_group1 = grouped_dfs[group1].agg(
+        n="count", mean="mean", var="var", median="median"
     )
-
-    fc_significance_df = pd.DataFrame(
-        list(zip(valid_protein_groups, fc_z_scores, fc_z_p_values)),
-        columns=FC_SIGNIFICANCE_COLUMNS,
+    statistics_group2 = grouped_dfs[group2].agg(
+        n="count", mean="mean", var="var", median="median"
     )
 
-    (corrected_p_values, corrected_alpha) = apply_multiple_testing_correction(
-        p_values=p_values,
-        method=multiple_testing_correction_method,
-        alpha=alpha,
+    # For 'propagate': null out statistics for proteins that have any NaN in
+    # either group so that vectorized_t_test produces NaN p-values for them.
+    if nan_policy_lower == "propagate":
+        has_nan_g1 = grouped_dfs[group1].apply(lambda s: s.isna().any())
+        has_nan_g2 = grouped_dfs[group2].apply(lambda s: s.isna().any())
+        propagate_mask = has_nan_g1 | has_nan_g2
+        if propagate_mask.any():
+            statistics_group1.loc[propagate_mask, :] = np.nan
+            statistics_group2.loc[propagate_mask, :] = np.nan
+
+    valid_mask = (statistics_group1["n"] >= 2) & (statistics_group2["n"] >= 2)
+    if (~valid_mask).any() and not exists_message(
+        messages, INVALID_PROTEINGROUP_DATA_MSG
+    ):
+        messages.append(INVALID_PROTEINGROUP_DATA_MSG)
+
+    valid_statistics_group1 = statistics_group1[valid_mask]
+    valid_statistics_group2 = statistics_group2[valid_mask]
+
+    # Statistics grouped by protein
+    group1_counts = valid_statistics_group1["n"].to_numpy(dtype=float)
+    group2_counts = valid_statistics_group2["n"].to_numpy(dtype=float)
+    group1_means = valid_statistics_group1["mean"].to_numpy()
+    group2_means = valid_statistics_group2["mean"].to_numpy()
+    group1_vars = valid_statistics_group1["var"].to_numpy()
+    group2_vars = valid_statistics_group2["var"].to_numpy()
+    group1_medians = valid_statistics_group1["median"].to_numpy()
+    group2_medians = valid_statistics_group2["median"].to_numpy()
+
+    t_statistics, p_values = vectorized_t_test(
+        group1_counts,
+        group2_counts,
+        group1_means,
+        group2_means,
+        group1_vars,
+        group2_vars,
+        ttest_type,
     )
 
-    corrected_p_values_df = pd.DataFrame(
-        list(zip(valid_protein_groups, corrected_p_values)),
-        columns=CORRECTED_P_VALUES_COLUMNS,
-    )
-    log2_fold_change_df = pd.DataFrame(
-        list(zip(valid_protein_groups, log2_fold_changes)),
-        columns=LOG2_FOLD_CHANGE_COLUMNS,
-    )
-    t_statistic_df = pd.DataFrame(
-        list(zip(valid_protein_groups, t_statistic)),
-        columns=T_STATISTIC_COLUMNS,
+    if log_base:
+        fc_arr = (group2_medians - group1_medians) * np.log2(log_base)
+    else:
+        with np.errstate(divide="ignore", invalid="ignore"):
+            fc_arr = np.log2(group2_medians / group1_medians)
+
+    ttest_results = pd.DataFrame(
+        {
+            "Protein ID": valid_statistics_group1.index,
+            "n1": group1_counts.astype(int),
+            "n2": group2_counts.astype(int),
+            "t_statistic": t_statistics,
+            "p_value": p_values,
+            "log2_fold_change": fc_arr,
+        }
+    ).dropna(subset=["p_value"])
+
+    if len(ttest_results) == 0:
+        messages.append(
+            {
+                "level": logging.ERROR,
+                "msg": "No valid protein groups found for t-test analysis.",
+            }
+        )
+        return empty_result(messages)
+
+    ttest_results["fc_z_score"], ttest_results["fc_significance"] = (
+        get_z_score_based_fold_change_significance(ttest_results["log2_fold_change"])
     )
 
-    dataframes = [
-        corrected_p_values_df,
-        log2_fold_change_df,
-        t_statistic_df,
-        fc_significance_df,
-    ]
+    ttest_results["corrected_p_value"], ttest_results["corrected_alpha"] = (
+        apply_multiple_testing_correction(
+            p_values=ttest_results["p_value"],
+            method=multiple_testing_correction_method,
+            alpha=alpha,
+        )
+    )
 
-    for df in dataframes:
-        protein_df = pd.merge(protein_df, df, on="Protein ID", how="left")
+    differentially_expressed_proteins_df = pd.merge(
+        ttest_results, protein_df, on="Protein ID", how="left"
+    )
 
-    differentially_expressed_proteins_df = protein_df.loc[
-        protein_df["Protein ID"].isin(valid_protein_groups)
-    ]
+    significant_proteins_df = differentially_expressed_proteins_df.query(
+        "corrected_p_value <= corrected_alpha"
+    )
 
-    significant_proteins_df = differentially_expressed_proteins_df[
-        differentially_expressed_proteins_df["corrected_p_value"] <= corrected_alpha
-    ]
-    if fc_zscore_filter and not fc_significance_df.empty:
-        significant_proteins_df = significant_proteins_df[
-            significant_proteins_df["fc_significance"] <= fc_zscore_alpha
-        ]
+    if fc_zscore_filter:
+        significant_proteins_df = significant_proteins_df.query(
+            "fc_significance <= @fc_zscore_alpha"
+        )
 
     return dict(
         differentially_expressed_proteins_df=differentially_expressed_proteins_df,
         significant_proteins_df=significant_proteins_df,
-        corrected_p_values_df=corrected_p_values_df,
-        t_statistic_df=t_statistic_df,
-        log2_fold_change_df=log2_fold_change_df,
-        fc_significance_df=fc_significance_df,
-        corrected_alpha=OutputItem(output_type=OutputType.FLOAT, value=corrected_alpha),
+        corrected_p_values_df=ttest_results[CORRECTED_P_VALUES_COLUMNS],
+        t_statistic_df=ttest_results[T_STATISTIC_COLUMNS],
+        log2_fold_change_df=ttest_results[LOG2_FOLD_CHANGE_COLUMNS],
+        fc_significance_df=ttest_results[FC_SIGNIFICANCE_COLUMNS],
+        corrected_alpha=OutputItem(
+            output_type=OutputType.FLOAT,
+            value=float(ttest_results["corrected_alpha"].iloc[0]),
+        ),
         messages=messages,
     )

From ffe6088e89260f08af64216e5e176dc59c372002 Mon Sep 17 00:00:00 2001
From: Note <kunaphas.kon@gmail.com>
Date: Wed, 29 Apr 2026 17:05:05 +0200
Subject: [PATCH 5/6] fix test error

---
 .../test_differential_expression.py              | 16 ++++++++++------
 1 file changed, 10 insertions(+), 6 deletions(-)

diff --git a/backend/tests/protzilla/data_analysis/test_differential_expression.py b/backend/tests/protzilla/data_analysis/test_differential_expression.py
index 69c7c8f50..34123f3af 100644
--- a/backend/tests/protzilla/data_analysis/test_differential_expression.py
+++ b/backend/tests/protzilla/data_analysis/test_differential_expression.py
@@ -476,8 +476,11 @@ def test_differential_expression_t_test_with_silac_ratios():
         round(out[DataKey.CORRECTED_P_VALUES_DF]["corrected_p_value"].iloc[0], 4)
         == 0.0513
     )
-    # Fold change is NaN because np.median([1.2, NaN, 1.1]) = NaN (not nanmedian).
-    assert np.isnan(out[DataKey.LOG2_FOLD_CHANGE_DF]["log2_fold_change"].iloc[0])
+    # dropna() runs before pivot, so medians are from clean data — fold change is valid.
+    # Group1 clean=[1.2, 1.1]→median=1.15, Group2 clean=[0.8, 0.9]→median=0.85
+    fc = out[DataKey.LOG2_FOLD_CHANGE_DF]["log2_fold_change"].iloc[0]
+    assert not np.isnan(fc)
+    assert round(fc, 4) == round(np.log2(0.85 / 1.15), 4)
 
 
 @pytest.fixture
@@ -615,10 +618,11 @@ def test_t_test_nan_policy_omit_skips_nan_samples_and_computes_result(
             DataKey.CORRECTED_P_VALUES_DF
         ].empty, f"ttest_type={ttest_type}: Protein1 should be included after NaN samples are omitted"
         assert out[DataKey.CORRECTED_P_VALUES_DF]["Protein ID"].tolist() == ["Protein1"]
-        # Fold change is NaN because np.median([18.0, NaN, 22.0]) = NaN (not nanmedian).
-        assert np.isnan(
-            out[DataKey.LOG2_FOLD_CHANGE_DF]["log2_fold_change"].iloc[0]
-        ), f"ttest_type={ttest_type}: expected NaN fold change (median does not skip NaN)"
+        # dropna() runs before pivot, so medians are from clean data — fold change is valid.
+        # Group1 clean=[18.0, 22.0]→median=20.0, Group2=[8.0,10.0,12.0]→median=10.0 → log2(10/20)=-1.0
+        fc = out[DataKey.LOG2_FOLD_CHANGE_DF]["log2_fold_change"].iloc[0]
+        assert not np.isnan(fc), f"ttest_type={ttest_type}: fold change should be valid after NaN rows are omitted"
+        assert round(fc, 1) == -1.0, f"ttest_type={ttest_type}: expected fold change -1.0, got {fc}"
 
 
 def test_t_test_nan_policy_propagate_excludes_protein_with_any_nan(nan_intensity_data):

From 02d65ebe3e5bc99ad28eb62d7c27c6a0a546883d Mon Sep 17 00:00:00 2001
From: Note <kunaphas.kon@gmail.com>
Date: Wed, 29 Apr 2026 17:08:06 +0200
Subject: [PATCH 6/6] fomatted

---
 .../data_analysis/test_differential_expression.py         | 8 ++++++--
 1 file changed, 6 insertions(+), 2 deletions(-)

diff --git a/backend/tests/protzilla/data_analysis/test_differential_expression.py b/backend/tests/protzilla/data_analysis/test_differential_expression.py
index 34123f3af..e001dfb9f 100644
--- a/backend/tests/protzilla/data_analysis/test_differential_expression.py
+++ b/backend/tests/protzilla/data_analysis/test_differential_expression.py
@@ -621,8 +621,12 @@ def test_t_test_nan_policy_omit_skips_nan_samples_and_computes_result(
         # dropna() runs before pivot, so medians are from clean data — fold change is valid.
         # Group1 clean=[18.0, 22.0]→median=20.0, Group2=[8.0,10.0,12.0]→median=10.0 → log2(10/20)=-1.0
         fc = out[DataKey.LOG2_FOLD_CHANGE_DF]["log2_fold_change"].iloc[0]
-        assert not np.isnan(fc), f"ttest_type={ttest_type}: fold change should be valid after NaN rows are omitted"
-        assert round(fc, 1) == -1.0, f"ttest_type={ttest_type}: expected fold change -1.0, got {fc}"
+        assert not np.isnan(
+            fc
+        ), f"ttest_type={ttest_type}: fold change should be valid after NaN rows are omitted"
+        assert (
+            round(fc, 1) == -1.0
+        ), f"ttest_type={ttest_type}: expected fold change -1.0, got {fc}"
 
 
 def test_t_test_nan_policy_propagate_excludes_protein_with_any_nan(nan_intensity_data):