From 5598f5853a03bf01972cc9ce9d7b618f3a46d9b3 Mon Sep 17 00:00:00 2001 From: Alejandro Fontal Date: Mon, 9 Feb 2026 23:05:14 +0100 Subject: [PATCH 1/3] fix: correct sdc edge cases and plotting warnings --- README.md | 4 +- docs/_static/.gitkeep | 1 + docs/index.rst | 1 - docs/installation.rst | 2 +- sdcpy/core.py | 182 ++++++++++++++++----------- sdcpy/io.py | 10 +- sdcpy/plotting.py | 24 +++- sdcpy/scale_dependent_correlation.py | 35 ++++-- tests/test_core.py | 40 +++++- tests/test_io.py | 18 ++- tests/test_plotting.py | 28 +++++ tests/test_ranges_df.py | 27 ++++ tests/test_sdc_analysis.py | 8 +- 13 files changed, 278 insertions(+), 102 deletions(-) create mode 100644 docs/_static/.gitkeep diff --git a/README.md b/README.md index fd87053..f1c8b80 100644 --- a/README.md +++ b/README.md @@ -132,7 +132,7 @@ sdc.get_ranges_df( (1.0, 1.5] Positive 0 0 0.0000 0.0 % ``` -See [examples/basic_usage.py](examples/basic_usage.py) for a complete example with synthetic data showing transient correlations. +See [examples/basic_usage.py](https://github.com/AlFontal/sdcpy/blob/master/examples/basic_usage.py) for a complete example with synthetic data showing transient correlations. ## Development @@ -151,7 +151,7 @@ uv sync --all-groups uv run pytest ``` -See [CONTRIBUTING.md](CONTRIBUTING.md) for more details. +See [CONTRIBUTING.md](https://github.com/AlFontal/sdcpy/blob/master/CONTRIBUTING.md) for more details. ## References diff --git a/docs/_static/.gitkeep b/docs/_static/.gitkeep new file mode 100644 index 0000000..8b13789 --- /dev/null +++ b/docs/_static/.gitkeep @@ -0,0 +1 @@ + diff --git a/docs/index.rst b/docs/index.rst index d11b0b7..4cbb1f8 100644 --- a/docs/index.rst +++ b/docs/index.rst @@ -8,7 +8,6 @@ Welcome to sdcpy's documentation! readme installation usage - modules contributing Indices and tables diff --git a/docs/installation.rst b/docs/installation.rst index 7edc19e..a926115 100644 --- a/docs/installation.rst +++ b/docs/installation.rst @@ -44,7 +44,7 @@ Once you have a copy of the source, you can install it with: .. code-block:: console - $ python setup.py install + $ pip install . .. _Github repo: https://github.com/AlFontal/sdcpy diff --git a/sdcpy/core.py b/sdcpy/core.py index b04c809..b7ffe85 100644 --- a/sdcpy/core.py +++ b/sdcpy/core.py @@ -12,8 +12,11 @@ RECOGNIZED_METHODS = { "pearson": lambda x, y: stats.pearsonr(x, y), "spearman": lambda x, y: stats.spearmanr(x, y), + "kendall": lambda x, y: stats.kendalltau(x, y), } +VECTORIZED_METHODS = {"pearson", "spearman"} + # Default maximum memory threshold (in GB) for full vectorized computation. # Above this, chunked processing is used automatically. @@ -27,7 +30,7 @@ def _estimate_vectorized_memory( Estimate peak memory usage (in GB) for the fully vectorized SDC computation. The dominant memory consumers are: - - Permuted correlation matrices: (n_root^2, n1, n2) where n_root = sqrt(n_permutations) + - Permuted correlation matrices: (n_permutations, n1, n2) - Fragment matrices: (n1, fragment_size) + (n2, fragment_size) - Correlation matrix: (n1, n2) - Grid/lag matrices: 3 * (n1, n2) @@ -49,11 +52,8 @@ def _estimate_vectorized_memory( Estimated peak memory usage in gigabytes """ bytes_per_element = np.dtype(dtype).itemsize - n_root = int(np.sqrt(n_permutations).round()) - n_actual_perms = n_root * n_root - # Main memory consumers - perm_matrices = n_actual_perms * n1 * n2 * bytes_per_element + perm_matrices = n_permutations * n1 * n2 * bytes_per_element corr_matrix = n1 * n2 * bytes_per_element grid_matrices = 3 * n1 * n2 * bytes_per_element # start_1_grid, start_2_grid, lag_matrix @@ -97,7 +97,8 @@ def generate_correlation_map(x: np.ndarray, y: np.ndarray, method: str = "pearso s_x = x.std(axis=1, ddof=n - 1) s_y = y.std(axis=1, ddof=n - 1) cov = np.dot(x, y.T) - n * np.dot(mu_x[:, np.newaxis], mu_y[np.newaxis, :]) - return cov / np.dot(s_x[:, np.newaxis], s_y[np.newaxis, :]) + with np.errstate(divide="ignore", invalid="ignore"): + return cov / np.dot(s_x[:, np.newaxis], s_y[np.newaxis, :]) def shuffle_along_axis(a: np.ndarray, axis: int) -> np.ndarray: @@ -121,13 +122,26 @@ def shuffle_along_axis(a: np.ndarray, axis: int) -> np.ndarray: def _build_fragment_matrix(ts: np.ndarray, fragment_size: int) -> np.ndarray: """Build a matrix where each row is a sliding window fragment of the time series.""" - n_fragments = len(ts) - fragment_size + n_fragments = len(ts) - fragment_size + 1 # Use stride tricks for efficient view-based slicing from numpy.lib.stride_tricks import sliding_window_view return sliding_window_view(ts, fragment_size)[:n_fragments] +def _extract_statistic_and_pvalue(result: object) -> tuple[float, float]: + """Extract (statistic, p-value) from method outputs.""" + if hasattr(result, "statistic") and hasattr(result, "pvalue"): + return float(result.statistic), float(result.pvalue) + if isinstance(result, tuple): + if not result: + raise ValueError("Correlation method returned an empty tuple.") + statistic = float(result[0]) + p_value = np.nan if len(result) < 2 else float(result[1]) + return statistic, p_value + return float(result), np.nan + + def compute_sdc( ts1: np.ndarray, ts2: np.ndarray, @@ -184,33 +198,52 @@ def compute_sdc( ts1_arr = np.asarray(ts1) ts2_arr = np.asarray(ts2) - # Use vectorized path for built-in methods - if method in RECOGNIZED_METHODS: - return _compute_sdc_vectorized( - ts1_arr, - ts2_arr, - fragment_size, - n_permutations, - method, - two_tailed, - permutations, - min_lag, - max_lag, - max_memory_gb, - ) + if ts1_arr.ndim != 1 or ts2_arr.ndim != 1: + raise ValueError("ts1 and ts2 must be one-dimensional array-like objects.") + if fragment_size < 1: + raise ValueError("fragment_size must be >= 1.") + if fragment_size > len(ts1_arr) or fragment_size > len(ts2_arr): + raise ValueError("fragment_size cannot be larger than the length of either time series.") + if permutations and n_permutations < 1: + raise ValueError("n_permutations must be >= 1 when permutations=True.") + + if isinstance(method, str): + method_key = method.lower() + if method_key in VECTORIZED_METHODS: + return _compute_sdc_vectorized( + ts1_arr, + ts2_arr, + fragment_size, + n_permutations, + method_key, + two_tailed, + permutations, + min_lag, + max_lag, + max_memory_gb, + ) + if method_key in RECOGNIZED_METHODS: + method_fun = RECOGNIZED_METHODS[method_key] + else: + recognized = ", ".join(sorted(RECOGNIZED_METHODS)) + raise ValueError(f"Unknown method '{method}'. Supported methods: {recognized}, or a callable.") + elif callable(method): + method_fun = method else: - # Fall back to original loop-based implementation for custom callables - return _compute_sdc_loop( - ts1_arr, - ts2_arr, - fragment_size, - n_permutations, - method, - two_tailed, - permutations, - min_lag, - max_lag, - ) + raise TypeError("method must be a string identifier or a callable.") + + # Fall back to loop-based implementation for methods that are not vectorized + return _compute_sdc_loop( + ts1_arr, + ts2_arr, + fragment_size, + n_permutations, + method_fun, + two_tailed, + permutations, + min_lag, + max_lag, + ) def _compute_sdc_vectorized( @@ -233,8 +266,8 @@ def _compute_sdc_vectorized( Maximum memory (in GB) to use for full vectorization. If estimated memory exceeds this, chunked processing is used automatically. """ - n1 = len(ts1) - fragment_size - n2 = len(ts2) - fragment_size + n1 = len(ts1) - fragment_size + 1 + n2 = len(ts2) - fragment_size + 1 # Build fragment matrices using sliding window frags1 = _build_fragment_matrix(ts1, fragment_size) # (n1, fragment_size) @@ -265,8 +298,7 @@ def _compute_sdc_vectorized( # Compute p-values if permutations: - n_root = int(np.sqrt(n_permutations).round()) - n_actual_perms = n_root * n_root + n_root = int(np.ceil(np.sqrt(n_permutations))) # Estimate memory and decide strategy estimated_memory = _estimate_vectorized_memory(n1, n2, n_permutations) @@ -281,22 +313,23 @@ def _compute_sdc_vectorized( ) # Chunked approach: accumulate counts without storing all permutation matrices counts = np.zeros((n1, n2), dtype=np.int32) - - # Pre-compute shuffled versions of all fragments + abs_observed = np.abs(corr_matrix) if two_tailed else None shuffled_frags1 = np.array( [shuffle_along_axis(frags1.copy(), axis=1) for _ in range(n_root)] ) shuffled_frags2 = np.array( [shuffle_along_axis(frags2.copy(), axis=1) for _ in range(n_root)] ) - - # Process one permutation pair at a time, accumulating counts - abs_observed = np.abs(corr_matrix) if two_tailed else None with tqdm( - total=n_actual_perms, desc="Computing permutations (chunked)", leave=False + total=n_permutations, desc="Computing permutations (chunked)", leave=False ) as pbar: + n_done = 0 for i in range(n_root): + if n_done >= n_permutations: + break for j in range(n_root): + if n_done >= n_permutations: + break with warnings.catch_warnings(): warnings.simplefilter("ignore") perm_corr = generate_correlation_map( @@ -306,48 +339,48 @@ def _compute_sdc_vectorized( counts += (np.abs(perm_corr) >= abs_observed).astype(np.int32) else: counts += (perm_corr >= corr_matrix).astype(np.int32) - pbar.update(n_root) + pbar.update(1) + n_done += 1 # P-value: (count + 1) / (n_perms + 1) for proper permutation test - p_value_matrix = (counts + 1) / (n_actual_perms + 1) + p_value_matrix = (counts + 1) / (n_permutations + 1) else: # Full vectorized approach: store all permutation matrices - # Pre-compute shuffled versions of all fragments - # Shape: (n_root, n_fragments, fragment_size) + # Shape: (n_permutations, n1, n2) + perm_corr_matrices = np.zeros((n_permutations, n1, n2)) shuffled_frags1 = np.array( [shuffle_along_axis(frags1.copy(), axis=1) for _ in range(n_root)] ) shuffled_frags2 = np.array( [shuffle_along_axis(frags2.copy(), axis=1) for _ in range(n_root)] ) - - # Compute permuted correlation matrices for all combinations of shuffled fragments - # Shape: (n_root, n_root, n1, n2) - perm_corr_matrices = np.zeros((n_root, n_root, n1, n2)) - with tqdm(total=n_actual_perms, desc="Computing permutations", leave=False) as pbar: + with tqdm(total=n_permutations, desc="Computing permutations", leave=False) as pbar: + k = 0 for i in range(n_root): + if k >= n_permutations: + break for j in range(n_root): + if k >= n_permutations: + break with warnings.catch_warnings(): warnings.simplefilter("ignore") - perm_corr_matrices[i, j] = generate_correlation_map( + perm_corr_matrices[k] = generate_correlation_map( shuffled_frags1[i], shuffled_frags2[j], method=method ) - pbar.update(n_root) - - # Reshape to (n_actual_perms, n1, n2) - perm_corrs_flat = perm_corr_matrices.reshape(n_actual_perms, n1, n2) + pbar.update(1) + k += 1 # Compute p-values vectorized if two_tailed: # Count how many abs(perm) >= abs(observed) for each position abs_observed = np.abs(corr_matrix) - abs_perms = np.abs(perm_corrs_flat) + abs_perms = np.abs(perm_corr_matrices) counts = (abs_perms >= abs_observed[np.newaxis, :, :]).sum(axis=0) else: - counts = (perm_corrs_flat >= corr_matrix[np.newaxis, :, :]).sum(axis=0) + counts = (perm_corr_matrices >= corr_matrix[np.newaxis, :, :]).sum(axis=0) # P-value: (count + 1) / (n_perms + 1) for proper permutation test - p_value_matrix = (counts + 1) / (n_actual_perms + 1) + p_value_matrix = (counts + 1) / (n_permutations + 1) # Extract p-values for valid entries p_values = p_value_matrix[valid_mask] @@ -390,16 +423,16 @@ def _compute_sdc_loop( ) -> pd.DataFrame: """Original loop-based implementation for custom callable methods.""" method_fun = method - n_iterations = (len(ts1) - fragment_size) * (len(ts2) - fragment_size) + n_iterations = (len(ts1) - fragment_size + 1) * (len(ts2) - fragment_size + 1) sdc_array = np.empty(shape=(n_iterations, 7)) sdc_array[:] = np.nan i = 0 progress_bar = tqdm(total=n_iterations, desc="Computing SDC", leave=False) - for start_1 in range(len(ts1) - fragment_size): + for start_1 in range(len(ts1) - fragment_size + 1): stop_1 = start_1 + fragment_size - for start_2 in range(len(ts2) - fragment_size): + for start_2 in range(len(ts2) - fragment_size + 1): lag = start_1 - start_2 if min_lag <= lag <= max_lag: stop_2 = start_2 + fragment_size @@ -408,27 +441,24 @@ def _compute_sdc_loop( with warnings.catch_warnings(): warnings.simplefilter("ignore") - statistic, p_value = method_fun(fragment_1, fragment_2) + statistic, p_value = _extract_statistic_and_pvalue(method_fun(fragment_1, fragment_2)) if permutations: - permuted_scores = [ - method_fun( + permuted_scores = np.empty(n_permutations, dtype=float) + for k in range(n_permutations): + perm_result = method_fun( np.random.permutation(fragment_1), np.random.permutation(fragment_2) - )[0] - for _ in range(n_permutations) - ] - if two_tailed: - p_value = ( - 1 - - stats.percentileofscore(np.abs(permuted_scores), np.abs(statistic)) - / 100 ) + permuted_scores[k], _ = _extract_statistic_and_pvalue(perm_result) + if two_tailed: + count = np.sum(np.abs(permuted_scores) >= np.abs(statistic)) else: - p_value = 1 - stats.percentileofscore(permuted_scores, statistic) / 100 + count = np.sum(permuted_scores >= statistic) + p_value = (count + 1) / (n_permutations + 1) sdc_array[i] = [start_1, stop_1, start_2, stop_2, lag, statistic, p_value] i += 1 - progress_bar.update(1) + progress_bar.update(1) progress_bar.close() sdc_df = pd.DataFrame( diff --git a/sdcpy/io.py b/sdcpy/io.py index aadb93c..cf35102 100644 --- a/sdcpy/io.py +++ b/sdcpy/io.py @@ -18,6 +18,7 @@ def save_to_excel( n_permutations: int, method: str, filename: str, + way: str = "two-way", ) -> None: """Save SDC analysis results to Excel file.""" with pd.ExcelWriter(filename) as writer: @@ -45,6 +46,7 @@ def save_to_excel( "fragment_size": fragment_size, "n_permutations": n_permutations, "method": method, + "way": way, }, index=[1], ).to_excel(writer, sheet_name="config", index=False) @@ -56,7 +58,11 @@ def load_from_excel(filename: str) -> dict: Returns a dict with keys: ts1, ts2, fragment_size, n_permutations, method, sdc_df """ - fragment_size, n_permutations, method = pd.read_excel(filename, "config").loc[0] + config = pd.read_excel(filename, "config").iloc[0].to_dict() + fragment_size = int(config["fragment_size"]) + n_permutations = int(config["n_permutations"]) + method = config["method"] + way = config.get("way", "two-way") ts1 = pd.read_excel(filename, "time_series").set_index("date_1")[["start_1", "ts1"]] ts2 = pd.read_excel(filename, "time_series").set_index("date_2")[["start_2", "ts2"]] sdc_df = ( @@ -67,6 +73,7 @@ def load_from_excel(filename: str) -> dict: ), on=["start_1", "start_2"], ) + .dropna(subset=["r", "p_value"]) .assign( stop_1=lambda dd: dd.start_1 + fragment_size, stop_2=lambda dd: dd.start_2 + fragment_size, @@ -82,5 +89,6 @@ def load_from_excel(filename: str) -> dict: "fragment_size": fragment_size, "n_permutations": n_permutations, "method": method, + "way": way, "sdc_df": sdc_df, } diff --git a/sdcpy/plotting.py b/sdcpy/plotting.py index 21ae0ee..015f112 100644 --- a/sdcpy/plotting.py +++ b/sdcpy/plotting.py @@ -252,8 +252,12 @@ def combi_plot( metric_labels = { "pearson": "Pearson's $r$", "spearman": "Spearman's $\\rho$", + "kendall": "Kendall's $\\tau$", } - metric_label = metric_labels.get(method, method.capitalize()) + if isinstance(method, str): + metric_label = metric_labels.get(method.lower(), method.capitalize()) + else: + metric_label = getattr(method, "__name__", method.__class__.__name__) # Validate alignment align = align.lower() @@ -334,12 +338,24 @@ def combi_plot( filtered_df = sdc_df.loc[lambda dd: (dd.lag <= max_lag) & (dd.lag >= min_lag)] pivot_r = filtered_df.pivot(index="date_2", columns="date_1", values="r") pivot_p = filtered_df.pivot(index="date_2", columns="date_1", values="p_value") - mask = pivot_p >= alpha + if pivot_r.empty: + pivot_r = pd.DataFrame([[0.0]]) + mask = np.zeros((1, 1), dtype=bool) + else: + mask = (pivot_p >= alpha) | pivot_r.isna() | pivot_p.isna() + # seaborn emits runtime warnings when the full heatmap is masked (all non-significant). + # In that case, render a neutral heatmap without masking so plotting remains warning-free. + if np.asarray(mask).all(): + heatmap_data = pivot_r.fillna(0.0) + heatmap_mask = np.zeros_like(np.asarray(mask), dtype=bool) + else: + heatmap_data = pivot_r + heatmap_mask = mask sns.heatmap( - pivot_r, + heatmap_data, cbar=False, - mask=mask, + mask=heatmap_mask, cmap="RdBu_r", ax=hm, ) diff --git a/sdcpy/scale_dependent_correlation.py b/sdcpy/scale_dependent_correlation.py index 4ce46e3..6594f7a 100755 --- a/sdcpy/scale_dependent_correlation.py +++ b/sdcpy/scale_dependent_correlation.py @@ -46,8 +46,10 @@ def __init__( ts2 = pd.Series(ts2) min_date = max(ts1.index.min(), ts2.index.min()) max_date = min(ts1.index.max(), ts2.index.max()) - self.ts1 = ts1[min_date:max_date] - self.ts2 = ts2[min_date:max_date] + self.ts1 = ts1.loc[min_date:max_date] + self.ts2 = ts2.loc[min_date:max_date] + if self.ts1.empty or self.ts2.empty: + raise ValueError("ts1 and ts2 must have overlapping index values.") self.fragment_size = fragment_size self.n_permutations = n_permutations self.ts1.index.name = "date_1" @@ -86,6 +88,7 @@ def to_excel(self, filename: str): self.n_permutations, self.method, filename, + self.way, ) @classmethod @@ -93,7 +96,7 @@ def from_excel(cls, filename: str): data = load_from_excel(filename) return cls( ts1=data["ts1"], - ts2=data["ts2"], + ts2=None if data.get("way", "two-way") == "one-way" else data["ts2"], fragment_size=data["fragment_size"], n_permutations=data["n_permutations"], method=data["method"], @@ -151,24 +154,34 @@ def get_ranges_df( """ ts_series = self.ts1 if ts == 1 else self.ts2 - # Compute rolling aggregate for fragments - # This gives the aggregate value for each fragment starting at each index + # Compute per-fragment aggregates by fragment start position. + from numpy.lib.stride_tricks import sliding_window_view + + n_fragments = len(ts_series) - self.fragment_size + 1 + if n_fragments < 1: + raise ValueError("fragment_size cannot be larger than the selected time series length.") + fragment_windows = sliding_window_view(ts_series.to_numpy(), self.fragment_size)[:n_fragments] + if agg_func == "mean": - fragment_values = ts_series.rolling(window=self.fragment_size, min_periods=1).mean() + agg_values = fragment_windows.mean(axis=1) elif agg_func == "median": - fragment_values = ts_series.rolling(window=self.fragment_size, min_periods=1).median() + agg_values = np.median(fragment_windows, axis=1) elif agg_func == "min": - fragment_values = ts_series.rolling(window=self.fragment_size, min_periods=1).min() + agg_values = fragment_windows.min(axis=1) elif agg_func == "max": - fragment_values = ts_series.rolling(window=self.fragment_size, min_periods=1).max() + agg_values = fragment_windows.max(axis=1) else: raise ValueError( f"Unknown agg_func: {agg_func}. Use 'mean', 'median', 'min', or 'max'." ) # Create lookup from date to fragment aggregate value - fragment_values_df = fragment_values.reset_index() - fragment_values_df.columns = [f"date_{ts}", "fragment_value"] + fragment_values_df = pd.DataFrame( + { + f"date_{ts}": ts_series.index[:n_fragments], + "fragment_value": agg_values, + } + ) # Join sdc_df with fragment values first df = ( diff --git a/tests/test_core.py b/tests/test_core.py index 7fb46fd..b3780d2 100644 --- a/tests/test_core.py +++ b/tests/test_core.py @@ -97,7 +97,7 @@ def test_output_shape(self, numpy_ts_pair): result = compute_sdc( ts1, ts2, fragment_size=fragment_size, n_permutations=9, permutations=True ) - expected_rows = (len(ts1) - fragment_size) * (len(ts2) - fragment_size) + expected_rows = (len(ts1) - fragment_size + 1) * (len(ts2) - fragment_size + 1) assert len(result) == expected_rows def test_output_columns(self, numpy_ts_pair): @@ -159,9 +159,45 @@ def test_large_fragment_size(self, numpy_ts_pair): ts1, ts2 = numpy_ts_pair fragment_size = len(ts1) - 5 result = compute_sdc(ts1, ts2, fragment_size=fragment_size, n_permutations=9) - expected_rows = (len(ts1) - fragment_size) * (len(ts2) - fragment_size) + expected_rows = (len(ts1) - fragment_size + 1) * (len(ts2) - fragment_size + 1) assert len(result) == expected_rows + def test_fragment_size_equal_series_length(self): + """fragment_size equal to series length should produce one comparison.""" + ts = np.arange(10) + result = compute_sdc(ts, ts, fragment_size=10, n_permutations=9, permutations=False) + assert len(result) == 1 + assert result.iloc[0]["start_1"] == 0 + assert result.iloc[0]["stop_1"] == 10 + + def test_fragment_size_larger_than_series_raises(self): + """fragment_size larger than series length should raise ValueError.""" + ts = np.arange(10) + with pytest.raises(ValueError, match="fragment_size cannot be larger"): + compute_sdc(ts, ts, fragment_size=11, n_permutations=9) + + def test_kendall_method_string(self, numpy_ts_pair): + """kendall should work when requested by name.""" + ts1, ts2 = numpy_ts_pair + result = compute_sdc(ts1, ts2, fragment_size=10, n_permutations=9, method="kendall") + assert len(result) > 0 + assert result["p_value"].between(0, 1).all() + + def test_unknown_method_raises(self, numpy_ts_pair): + """Unknown method names should fail clearly.""" + ts1, ts2 = numpy_ts_pair + with pytest.raises(ValueError, match="Unknown method"): + compute_sdc(ts1, ts2, fragment_size=10, n_permutations=9, method="not-a-method") + + def test_non_square_permutations_use_requested_count(self): + """Permutation p-value resolution should match requested count.""" + np.random.seed(42) + ts = np.random.randn(30) + n_permutations = 10 + result = compute_sdc(ts, ts, fragment_size=6, n_permutations=n_permutations) + scaled = result["p_value"] * (n_permutations + 1) + assert np.allclose(scaled, np.round(scaled), atol=1e-10) + class TestComputeSDCStatisticalProperties: """Tests for statistical correctness of compute_sdc.""" diff --git a/tests/test_io.py b/tests/test_io.py index 1961193..7c48209 100644 --- a/tests/test_io.py +++ b/tests/test_io.py @@ -59,6 +59,22 @@ def test_time_series_preserved(self, random_ts_pair): finally: os.unlink(filepath) + def test_one_way_round_trip_preserves_mode(self, random_ts_pair): + """One-way analyses should remain one-way after Excel round-trip.""" + ts1, _ = random_ts_pair + sdc_original = SDCAnalysis(ts1, fragment_size=10, n_permutations=9) + + with tempfile.NamedTemporaryFile(suffix=".xlsx", delete=False) as f: + filepath = f.name + + try: + sdc_original.to_excel(filepath) + sdc_loaded = SDCAnalysis.from_excel(filepath) + assert sdc_loaded.way == "one-way" + assert (sdc_loaded.sdc_df["start_1"] == sdc_loaded.sdc_df["start_2"]).sum() == 0 + finally: + os.unlink(filepath) + def test_different_methods(self, random_ts_pair): """Should preserve method information.""" ts1, ts2 = random_ts_pair @@ -130,7 +146,7 @@ def test_returns_dict(self, random_ts_pair): sdc.to_excel(filepath) data = load_from_excel(filepath) - expected_keys = {"ts1", "ts2", "fragment_size", "n_permutations", "method", "sdc_df"} + expected_keys = {"ts1", "ts2", "fragment_size", "n_permutations", "method", "way", "sdc_df"} assert set(data.keys()) == expected_keys finally: os.unlink(filepath) diff --git a/tests/test_plotting.py b/tests/test_plotting.py index 52bca8b..aaa1160 100644 --- a/tests/test_plotting.py +++ b/tests/test_plotting.py @@ -1,7 +1,10 @@ """Smoke tests for plotting functions.""" +import warnings + import matplotlib import matplotlib.pyplot as plt +from scipy import stats from sdcpy import SDCAnalysis @@ -134,6 +137,20 @@ def test_combi_plot_spearman_method(self, random_ts_pair): assert isinstance(result, plt.Figure) plt.close(result) + def test_combi_plot_custom_callable_method(self, random_ts_pair): + """combi_plot should work when method is a callable.""" + ts1, ts2 = random_ts_pair + sdc = SDCAnalysis( + ts1, + ts2, + fragment_size=10, + n_permutations=9, + method=lambda x, y: stats.kendalltau(x, y), + ) + result = sdc.combi_plot() + assert isinstance(result, plt.Figure) + plt.close(result) + def test_combi_plot_figsize(self, random_ts_pair): """combi_plot should accept figsize parameter.""" ts1, ts2 = random_ts_pair @@ -173,3 +190,14 @@ def test_combi_plot_integer_index(self, numpy_ts_pair): result = sdc.combi_plot() assert isinstance(result, plt.Figure) plt.close(result) + + def test_combi_plot_all_masked_heatmap_no_runtime_warning(self, random_ts_pair): + """All-masked heatmaps should not emit seaborn all-NaN runtime warnings.""" + ts1, ts2 = random_ts_pair + sdc = SDCAnalysis(ts1, ts2, fragment_size=10, n_permutations=9) + with warnings.catch_warnings(record=True) as caught: + warnings.simplefilter("always", RuntimeWarning) + result = sdc.combi_plot(alpha=0.0) + assert isinstance(result, plt.Figure) + plt.close(result) + assert not any("All-NaN slice encountered" in str(w.message) for w in caught) diff --git a/tests/test_ranges_df.py b/tests/test_ranges_df.py index 25f64ac..c851978 100644 --- a/tests/test_ranges_df.py +++ b/tests/test_ranges_df.py @@ -27,3 +27,30 @@ def test_get_ranges_df_lag_filtering(): # Test extreme filtering ranges_df_strict = sdc.get_ranges_df(min_lag=0, max_lag=0) assert ranges_df_strict["counts"].sum() == sdc.sdc_df.query("lag == 0").shape[0] + + +def test_get_ranges_df_uses_fragment_start_window(): + """Fragment values should be aggregated from [start:start+fragment_size].""" + ts = pd.Series(np.arange(6, dtype=float)) + sdc_df = pd.DataFrame( + { + "start_1": [0.0, 1.0], + "stop_1": [3.0, 4.0], + "start_2": [0.0, 1.0], + "stop_2": [3.0, 4.0], + "lag": [0.0, 0.0], + "r": [0.9, 0.9], + "p_value": [0.01, 0.01], + "date_1": [0, 1], + "date_2": [0, 1], + } + ) + sdc = SDCAnalysis(ts1=ts, ts2=ts, fragment_size=3, n_permutations=9, sdc_df=sdc_df) + + ranges_df = sdc.get_ranges_df(ts=1, bin_size=1, alpha=0.05, min_bin=0, max_bin=4) + positive_bins = ranges_df.loc[ + (ranges_df["direction"] == "Positive") & (ranges_df["counts"] > 0), "cat_value" + ].tolist() + + # Means for starts 0 and 1 are 1.0 and 2.0, so populated bins must end at 1 and 2. + assert {interval.right for interval in positive_bins} == {1.0, 2.0} diff --git a/tests/test_sdc_analysis.py b/tests/test_sdc_analysis.py index 2165083..24ead69 100644 --- a/tests/test_sdc_analysis.py +++ b/tests/test_sdc_analysis.py @@ -25,6 +25,8 @@ def test_numpy_array_input(self, numpy_ts_pair): # Should have created integer indices assert 0 in sdc.ts1.index assert pd.api.types.is_integer_dtype(sdc.ts1.index) + assert len(sdc.ts1) == len(ts1) + assert len(sdc.ts2) == len(ts2) assert len(sdc.sdc_df) > 0 def test_one_way_sdc(self, random_ts_pair): @@ -43,7 +45,7 @@ def test_short_time_series(self, short_ts_pair): """Should work with very short time series.""" ts1, ts2 = short_ts_pair sdc = SDCAnalysis(ts1, ts2, fragment_size=5, n_permutations=9) - expected = (len(ts1) - 5) * (len(ts2) - 5) + expected = (len(ts1) - 5 + 1) * (len(ts2) - 5 + 1) assert len(sdc.sdc_df) == expected def test_weekly_frequency(self, weekly_ts_pair): @@ -154,7 +156,7 @@ def test_various_fragment_sizes(self, random_ts_pair, fragment_size): """Should work with various fragment sizes.""" ts1, ts2 = random_ts_pair sdc = SDCAnalysis(ts1, ts2, fragment_size=fragment_size, n_permutations=9) - expected = (len(ts1) - fragment_size) * (len(ts2) - fragment_size) + expected = (len(ts1) - fragment_size + 1) * (len(ts2) - fragment_size + 1) assert len(sdc.sdc_df) == expected def test_fragment_size_equals_length_minus_one(self, random_ts_pair): @@ -166,4 +168,4 @@ def test_fragment_size_equals_length_minus_one(self, random_ts_pair): fragment_size = len(ts1_short) - 2 sdc = SDCAnalysis(ts1_short, ts2_short, fragment_size=fragment_size, n_permutations=9) # Should have very few comparisons - assert len(sdc.sdc_df) == 4 # (20-18)^2 = 2^2 = 4 + assert len(sdc.sdc_df) == 9 # (20-18+1)^2 = 3^2 = 9 From 07dbebba164fb489523693f085c83c5023c6a577 Mon Sep 17 00:00:00 2001 From: Alejandro Fontal Date: Tue, 10 Feb 2026 10:52:46 +0100 Subject: [PATCH 2/3] chore: sync uv lockfile version --- uv.lock | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/uv.lock b/uv.lock index 86e6cdb..767c87f 100644 --- a/uv.lock +++ b/uv.lock @@ -2264,7 +2264,7 @@ wheels = [ [[package]] name = "sdcpy" -version = "0.6.0" +version = "0.7.0" source = { editable = "." } dependencies = [ { name = "matplotlib", version = "3.9.4", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version < '3.10'" }, From a6f2c90a86a38d582f5cb3747b2b3b4ea10cf62e Mon Sep 17 00:00:00 2001 From: Alejandro Fontal Date: Tue, 10 Feb 2026 11:01:49 +0100 Subject: [PATCH 3/3] chore: add docs/_build to .gitignore --- .gitignore | 35 +++++++++++++++++++++++++++++++++++ 1 file changed, 35 insertions(+) diff --git a/.gitignore b/.gitignore index b57bb18..7a5afa2 100644 --- a/.gitignore +++ b/.gitignore @@ -6,3 +6,38 @@ build/* dist/* */*/AGENTS.md **/.DS_Store +docs/_build/doctrees/contributing.doctree +docs/_build/doctrees/environment.pickle +docs/_build/doctrees/index.doctree +docs/_build/doctrees/installation.doctree +docs/_build/doctrees/readme.doctree +docs/_build/doctrees/usage.doctree +docs/_build/html/.buildinfo +docs/_build/html/.buildinfo.bak +docs/_build/html/contributing.html +docs/_build/html/genindex.html +docs/_build/html/index.html +docs/_build/html/installation.html +docs/_build/html/objects.inv +docs/_build/html/readme.html +docs/_build/html/search.html +docs/_build/html/searchindex.js +docs/_build/html/usage.html +docs/_build/html/_sources/contributing.md.txt +docs/_build/html/_sources/index.rst.txt +docs/_build/html/_sources/installation.rst.txt +docs/_build/html/_sources/readme.md.txt +docs/_build/html/_sources/usage.rst.txt +docs/_build/html/_static/alabaster.css +docs/_build/html/_static/basic.css +docs/_build/html/_static/custom.css +docs/_build/html/_static/doctools.js +docs/_build/html/_static/documentation_options.js +docs/_build/html/_static/file.png +docs/_build/html/_static/github-banner.svg +docs/_build/html/_static/language_data.js +docs/_build/html/_static/minus.png +docs/_build/html/_static/plus.png +docs/_build/html/_static/pygments.css +docs/_build/html/_static/searchtools.js +docs/_build/html/_static/sphinx_highlight.js