diff --git a/CHANGELOG.md b/CHANGELOG.md index c7cb8c7..14de0a7 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,6 +1,10 @@ # Change Log All notable changes to this project will be documented in this file. +## 2.1.4 - 2026-02 + ### Common + - table reader optimization + ## 2.1.3 - 2026-02 ### Runner - Separate writer from runner, sorting schema to align to written table diff --git a/pyproject.toml b/pyproject.toml index 4a850c3..b6c15cb 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -1,6 +1,6 @@ [project] name = "rialto" -version = "2.1.3" +version = "2.1.4" description = "Rialto is a framework for building and deploying machine learning features in a scalable and reusable way. It provides a set of tools that make it easy to define and deploy features and models, and it provides a way to orchestrate the execution of these features and models." authors = [ { name = "Marek Dobransky", email = "marekdobr@gmail.com" }, diff --git a/rialto/common/table_reader.py b/rialto/common/table_reader.py index d3926f2..16a6b38 100644 --- a/rialto/common/table_reader.py +++ b/rialto/common/table_reader.py @@ -88,9 +88,7 @@ def _uppercase_column_names(self, df: DataFrame) -> DataFrame: :param df: Dataframe :return: renamed Dataframe """ - for col in df.columns: - df = df.withColumnRenamed(col, col.upper()) - return df + return df.select(*[F.col(c).alias(c.upper()) for c in df.columns]) def _get_latest_available_date(self, df: DataFrame, date_col: str, until: Optional[datetime.date]) -> datetime.date: if until: diff --git a/tests/common/test_reader.py b/tests/common/test_reader.py new file mode 100644 index 0000000..c42b20b --- /dev/null +++ b/tests/common/test_reader.py @@ -0,0 +1,33 @@ +# Copyright 2022 ABSA Group Limited +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import pytest + +from rialto.common.table_reader import TableReader + + +@pytest.fixture +def sample_df(spark): + df = spark.createDataFrame( + [(1, 2.33, "str", 4.55, 5.66), (1, 2.33, "str", 4.55, 5.66), (1, 2.33, "str", 4.55, 5.66)], + schema="a long, b float, c string, d float, e float", + ) + + return df + + +def test_uppercase_columns(spark, sample_df): + tr = TableReader(spark) + df = tr._uppercase_column_names(sample_df) + assert df.columns == ["A", "B", "C", "D", "E"] diff --git a/uv.lock b/uv.lock index eae6d1f..5a6f718 100644 --- a/uv.lock +++ b/uv.lock @@ -1078,7 +1078,7 @@ wheels = [ [[package]] name = "rialto" -version = "2.1.3" +version = "2.1.4" source = { editable = "." } dependencies = [ { name = "delta-spark" },