Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
189 changes: 104 additions & 85 deletions .github/workflows/bot.yml
Original file line number Diff line number Diff line change
Expand Up @@ -311,14 +311,8 @@ jobs:
strategy:
matrix:
include:
- scalaProfile: "scala-2.12"
sparkProfile: "spark3.3"
sparkModules: "hudi-spark-datasource/hudi-spark3.3.x"

- scalaProfile: "scala-2.12"
sparkProfile: "spark3.4"
sparkModules: "hudi-spark-datasource/hudi-spark3.4.x"

# Full suite runs only on the latest 3.x (Spark 3.5 / Scala 2.12 / Java 11).
# Older 3.x versions run the curated core set in test-spark-core-tests.
- scalaProfile: "scala-2.12"
sparkProfile: "spark3.5"
sparkModules: "hudi-spark-datasource/hudi-spark3.5.x"
Expand Down Expand Up @@ -368,14 +362,6 @@ jobs:
strategy:
matrix:
include:
- scalaProfile: "scala-2.12"
sparkProfile: "spark3.3"
sparkModules: "hudi-spark-datasource/hudi-spark3.3.x"

- scalaProfile: "scala-2.12"
sparkProfile: "spark3.4"
sparkModules: "hudi-spark-datasource/hudi-spark3.4.x"

- scalaProfile: "scala-2.12"
sparkProfile: "spark3.5"
sparkModules: "hudi-spark-datasource/hudi-spark3.5.x"
Expand Down Expand Up @@ -440,14 +426,6 @@ jobs:
strategy:
matrix:
include:
- scalaProfile: "scala-2.12"
sparkProfile: "spark3.3"
sparkModules: "hudi-spark-datasource/hudi-spark3.3.x"

- scalaProfile: "scala-2.12"
sparkProfile: "spark3.4"
sparkModules: "hudi-spark-datasource/hudi-spark3.4.x"

- scalaProfile: "scala-2.12"
sparkProfile: "spark3.5"
sparkModules: "hudi-spark-datasource/hudi-spark3.5.x"
Expand Down Expand Up @@ -505,14 +483,6 @@ jobs:
strategy:
matrix:
include:
- scalaProfile: "scala-2.12"
sparkProfile: "spark3.3"
sparkModules: "hudi-spark-datasource/hudi-spark3.3.x"

- scalaProfile: "scala-2.12"
sparkProfile: "spark3.4"
sparkModules: "hudi-spark-datasource/hudi-spark3.4.x"

- scalaProfile: "scala-2.12"
sparkProfile: "spark3.5"
sparkModules: "hudi-spark-datasource/hudi-spark3.5.x"
Expand Down Expand Up @@ -570,14 +540,6 @@ jobs:
strategy:
matrix:
include:
- scalaProfile: "scala-2.12"
sparkProfile: "spark3.3"
sparkModules: "hudi-spark-datasource/hudi-spark3.3.x"

- scalaProfile: "scala-2.12"
sparkProfile: "spark3.4"
sparkModules: "hudi-spark-datasource/hudi-spark3.4.x"

- scalaProfile: "scala-2.12"
sparkProfile: "spark3.5"
sparkModules: "hudi-spark-datasource/hudi-spark3.5.x"
Expand Down Expand Up @@ -685,15 +647,9 @@ jobs:
strategy:
matrix:
include:
- scalaProfile: "scala-2.13"
sparkProfile: "spark3.5"
sparkModules: "hudi-spark-datasource/hudi-spark3.5.x"
- scalaProfile: "scala-2.13"
sparkProfile: "spark4.0"
sparkModules: "hudi-spark-datasource/hudi-spark4.0.x"
- scalaProfile: "scala-2.13"
sparkProfile: "spark4.1"
sparkModules: "hudi-spark-datasource/hudi-spark4.1.x"
# Full suite runs only on the latest 4.x (Spark 4.2 / Scala 2.13 / Java 17).
# Spark 3.5/4.0/4.1 on Java 17 run the curated core set in
# test-spark-core-tests.
- scalaProfile: "scala-2.13"
sparkProfile: "spark4.2"
sparkModules: "hudi-spark-datasource/hudi-spark4.2.x"
Expand Down Expand Up @@ -743,15 +699,6 @@ jobs:
strategy:
matrix:
include:
- scalaProfile: "scala-2.13"
sparkProfile: "spark3.5"
sparkModules: "hudi-spark-datasource/hudi-spark3.5.x"
- scalaProfile: "scala-2.13"
sparkProfile: "spark4.0"
sparkModules: "hudi-spark-datasource/hudi-spark4.0.x"
- scalaProfile: "scala-2.13"
sparkProfile: "spark4.1"
sparkModules: "hudi-spark-datasource/hudi-spark4.1.x"
- scalaProfile: "scala-2.13"
sparkProfile: "spark4.2"
sparkModules: "hudi-spark-datasource/hudi-spark4.2.x"
Expand Down Expand Up @@ -816,15 +763,6 @@ jobs:
strategy:
matrix:
include:
- scalaProfile: "scala-2.13"
sparkProfile: "spark3.5"
sparkModules: "hudi-spark-datasource/hudi-spark3.5.x"
- scalaProfile: "scala-2.13"
sparkProfile: "spark4.0"
sparkModules: "hudi-spark-datasource/hudi-spark4.0.x"
- scalaProfile: "scala-2.13"
sparkProfile: "spark4.1"
sparkModules: "hudi-spark-datasource/hudi-spark4.1.x"
- scalaProfile: "scala-2.13"
sparkProfile: "spark4.2"
sparkModules: "hudi-spark-datasource/hudi-spark4.2.x"
Expand Down Expand Up @@ -882,15 +820,6 @@ jobs:
strategy:
matrix:
include:
- scalaProfile: "scala-2.13"
sparkProfile: "spark3.5"
sparkModules: "hudi-spark-datasource/hudi-spark3.5.x"
- scalaProfile: "scala-2.13"
sparkProfile: "spark4.0"
sparkModules: "hudi-spark-datasource/hudi-spark4.0.x"
- scalaProfile: "scala-2.13"
sparkProfile: "spark4.1"
sparkModules: "hudi-spark-datasource/hudi-spark4.1.x"
- scalaProfile: "scala-2.13"
sparkProfile: "spark4.2"
sparkModules: "hudi-spark-datasource/hudi-spark4.2.x"
Expand Down Expand Up @@ -948,15 +877,6 @@ jobs:
strategy:
matrix:
include:
- scalaProfile: "scala-2.13"
sparkProfile: "spark3.5"
sparkModules: "hudi-spark-datasource/hudi-spark3.5.x"
- scalaProfile: "scala-2.13"
sparkProfile: "spark4.0"
sparkModules: "hudi-spark-datasource/hudi-spark4.0.x"
- scalaProfile: "scala-2.13"
sparkProfile: "spark4.1"
sparkModules: "hudi-spark-datasource/hudi-spark4.1.x"
- scalaProfile: "scala-2.13"
sparkProfile: "spark4.2"
sparkModules: "hudi-spark-datasource/hudi-spark4.2.x"
Expand Down Expand Up @@ -1008,6 +928,105 @@ jobs:
flags: spark-scala-tests
token: ${{ secrets.CODECOV_TOKEN }}

# Curated core flow subset (-Pcore-tests) on every Spark version, for both the
# Java 11 / Scala 2.12 and Java 17 / Scala 2.13 tracks in one matrix-driven job.
# The full unit/functional/scala suites above run only on the latest 3.x (spark3.5)
# and latest 4.x (spark4.2); these entries give the other versions a fast
# critical-path signal. The core set is excluded from the full suites
# (excludedGroups=core, tagsToExclude=SparkSQLCoreFlow), so no test runs twice.
# The per-entry mvnProfiles carries -Pjava17 for the Java 17 rows (empty for Java 11).
test-spark-core-tests:
name: test-spark-core-tests (${{ matrix.scalaProfile }}, ${{ matrix.sparkProfile }}, java${{ matrix.javaVersion }})
runs-on: ubuntu-latest
needs: changes
strategy:
matrix:
include:
- scalaProfile: "scala-2.12"
sparkProfile: "spark3.3"
sparkModules: "hudi-spark-datasource/hudi-spark3.3.x"
javaVersion: "11"
mvnProfiles: ""
- scalaProfile: "scala-2.12"
sparkProfile: "spark3.4"
sparkModules: "hudi-spark-datasource/hudi-spark3.4.x"
javaVersion: "11"
mvnProfiles: ""
- scalaProfile: "scala-2.12"
sparkProfile: "spark3.5"
sparkModules: "hudi-spark-datasource/hudi-spark3.5.x"
javaVersion: "11"
mvnProfiles: ""
- scalaProfile: "scala-2.13"
sparkProfile: "spark3.5"
sparkModules: "hudi-spark-datasource/hudi-spark3.5.x"
javaVersion: "17"
mvnProfiles: "-Pjava17"
- scalaProfile: "scala-2.13"
sparkProfile: "spark4.0"
sparkModules: "hudi-spark-datasource/hudi-spark4.0.x"
javaVersion: "17"
mvnProfiles: "-Pjava17"
- scalaProfile: "scala-2.13"
sparkProfile: "spark4.1"
sparkModules: "hudi-spark-datasource/hudi-spark4.1.x"
javaVersion: "17"
mvnProfiles: "-Pjava17"
- scalaProfile: "scala-2.13"
sparkProfile: "spark4.2"
sparkModules: "hudi-spark-datasource/hudi-spark4.2.x"
javaVersion: "17"
mvnProfiles: "-Pjava17"

steps:
- if: needs.changes.outputs.relevant == 'true'
uses: actions/checkout@v5
- name: Set up JDK ${{ matrix.javaVersion }}
if: needs.changes.outputs.relevant == 'true'
uses: actions/setup-java@v5
with:
java-version: "${{ matrix.javaVersion }}"
distribution: 'temurin'
architecture: x64
cache: maven
- name: Build Project
if: needs.changes.outputs.relevant == 'true'
env:
SCALA_PROFILE: ${{ matrix.scalaProfile }}
SPARK_PROFILE: ${{ matrix.sparkProfile }}
SPARK_MODULES: ${{ matrix.sparkModules }}
MVN_PROFILES: ${{ matrix.mvnProfiles }}
run:
mvn clean install -T 2 $MVN_PROFILES -D"$SCALA_PROFILE" -D"$SPARK_PROFILE" -DskipTests=true $MVN_ARGS -am -pl "hudi-examples/hudi-examples-spark,hudi-common,$SPARK_COMMON_MODULES,$SPARK_MODULES"
- name: Core Tests - Common & Spark
if: needs.changes.outputs.relevant == 'true'
env:
SCALA_PROFILE: ${{ matrix.scalaProfile }}
SPARK_PROFILE: ${{ matrix.sparkProfile }}
SPARK_MODULES: ${{ matrix.sparkModules }}
MVN_PROFILES: ${{ matrix.mvnProfiles }}
run:
mvn test -Pcore-tests $MVN_PROFILES -D"$SCALA_PROFILE" -D"$SPARK_PROFILE" -fae -pl "hudi-common,$SPARK_COMMON_MODULES,$SPARK_MODULES" $MVN_ARGS -Djacoco.skip=false
- name: Quickstart Test
if: needs.changes.outputs.relevant == 'true'
env:
SCALA_PROFILE: ${{ matrix.scalaProfile }}
SPARK_PROFILE: ${{ matrix.sparkProfile }}
MVN_PROFILES: ${{ matrix.mvnProfiles }}
run:
mvn test -Punit-tests $MVN_PROFILES -D"$SCALA_PROFILE" -D"$SPARK_PROFILE" -pl hudi-examples/hudi-examples-spark $MVN_ARGS -Djacoco.skip=false
- name: Generate merged coverage report
if: always() && needs.changes.outputs.relevant == 'true'
run: ./scripts/jacoco/generate_merged_coverage_report.sh $GITHUB_WORKSPACE
- name: Upload coverage to Codecov
if: always() && needs.changes.outputs.relevant == 'true'
uses: codecov/codecov-action@75cd11691c0faa626561e295848008c8a7dddffe # v5
with:
files: ./jacoco-report.xml
disable_search: true
flags: spark-core-tests
token: ${{ secrets.CODECOV_TOKEN }}

test-flink-1:
runs-on: ubuntu-latest
needs: changes
Expand Down
2 changes: 1 addition & 1 deletion hudi-spark-datasource/hudi-spark/pom.xml
Original file line number Diff line number Diff line change
Expand Up @@ -116,7 +116,7 @@
<groupId>org.scalatest</groupId>
<artifactId>scalatest-maven-plugin</artifactId>
<configuration>
<tagsToExclude>org.apache.hudi.functional.SparkSQLCoreFlow</tagsToExclude>
<tagsToExclude>${hoodie.scalatest.tagsToExclude}</tagsToExclude>
</configuration>
</plugin>
<plugin>
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,7 @@
import static org.junit.jupiter.api.Assertions.assertEquals;

@Tag("functional")
@Tag("core")
public class TestSparkOrcReader extends TestBootstrapReadBase {

@Test
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,7 @@
import static org.junit.jupiter.api.Assertions.assertEquals;

@Tag("functional")
@Tag("core")
public class TestSparkParquetReader extends TestBootstrapReadBase {

@Test
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -42,6 +42,7 @@
import org.apache.spark.sql.types.StructType;
import org.junit.jupiter.api.AfterEach;
import org.junit.jupiter.api.BeforeEach;
import org.junit.jupiter.api.Tag;
import org.junit.jupiter.params.ParameterizedTest;
import org.junit.jupiter.params.provider.ValueSource;

Expand All @@ -56,6 +57,7 @@
/**
* Unit tests {@link HoodieInternalRowParquetWriter}.
*/
@Tag("core")
public class TestHoodieInternalRowParquetWriter extends HoodieSparkClientTestHarness {

@BeforeEach
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,34 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*/

package org.apache.hudi.functional

import org.scalatest.Tag

/**
* ScalaTest tag for the curated "core flow" subset that runs on every Spark
* version in CI (via the {@code core-tests} Maven profile), as opposed to the
* full suite that runs only on the latest Spark major versions.
*
* The tag name matches the {@link SparkSQLCoreFlow} Java {@code @TagAnnotation}
* so that class-level {@code @SparkSQLCoreFlow} and per-test
* {@code taggedAs(CoreFlow)} are selected by the same scalatest
* {@code tagsToInclude}/{@code tagsToExclude} value.
*/
object CoreFlow extends Tag("org.apache.hudi.functional.SparkSQLCoreFlow")
Original file line number Diff line number Diff line change
Expand Up @@ -55,7 +55,7 @@ import org.apache.spark.sql.hudi.HoodieSparkSessionExtension
import org.apache.spark.sql.types.{ArrayType, DataTypes, DateType, IntegerType, LongType, MapType, StringType, StructField, StructType, TimestampType}
import org.joda.time.DateTime
import org.joda.time.format.DateTimeFormat
import org.junit.jupiter.api.{AfterEach, BeforeEach, Disabled, Test}
import org.junit.jupiter.api.{AfterEach, BeforeEach, Disabled, Tag, Test}
import org.junit.jupiter.api.Assertions.{assertDoesNotThrow, assertEquals, assertFalse, assertTrue, fail}
import org.junit.jupiter.api.function.Executable
import org.junit.jupiter.params.ParameterizedTest
Expand Down Expand Up @@ -800,6 +800,7 @@ class TestCOWDataSource extends HoodieSparkClientTestBase with ScalaAssertionSup
assertEquals(schema, actualSchema)
}

@Tag("core")
@ParameterizedTest
@EnumSource(value = classOf[HoodieRecordType], names = Array("AVRO", "SPARK"))
def testCopyOnWriteDeletes(recordType: HoodieRecordType): Unit = {
Expand Down Expand Up @@ -831,6 +832,7 @@ class TestCOWDataSource extends HoodieSparkClientTestBase with ScalaAssertionSup
assertEquals(snapshotDF2.count(), 80)
}

@Tag("core")
@Test
def testCopyOnWriteUpserts(): Unit = {
val recordType = HoodieRecordType.AVRO
Expand Down Expand Up @@ -1217,6 +1219,7 @@ class TestCOWDataSource extends HoodieSparkClientTestBase with ScalaAssertionSup
assertEquals(hoodieIncViewDF2.count(), insert2NewKeyCnt)
}

@Tag("core")
@ParameterizedTest
@EnumSource(value = classOf[HoodieRecordType], names = Array("AVRO", "SPARK"))
def testComplexDataTypeWriteAndReadConsistency(recordType: HoodieRecordType): Unit = {
Expand Down Expand Up @@ -1549,6 +1552,7 @@ class TestCOWDataSource extends HoodieSparkClientTestBase with ScalaAssertionSup
assertEquals(false, Metrics.isInitialized(basePath))
}

@Tag("core")
@ParameterizedTest
@CsvSource(Array(
"true,false,AVRO", "true,true,AVRO", "false,true,AVRO", "false,false,AVRO"
Expand Down Expand Up @@ -1967,6 +1971,7 @@ class TestCOWDataSource extends HoodieSparkClientTestBase with ScalaAssertionSup
}

/** Test case to verify MAKE_NEW_COLUMNS_NULLABLE config parameter. */
@Tag("core")
@Test
def testSchemaEvolutionWithNewColumn(): Unit = {
val df1 = spark.sql("select '1' as event_id, '2' as ts, '3' as version, 'foo' as event_date")
Expand Down Expand Up @@ -2263,6 +2268,7 @@ class TestCOWDataSource extends HoodieSparkClientTestBase with ScalaAssertionSup
* - v6: Uses requestedTime for commits, open_close incremental ranges
* - v9: Uses completionTime for commits, close_close incremental ranges
*/
@Tag("core")
@ParameterizedTest
@CsvSource(Array("6", "9"))
def testIncrementalAndTimeTravelWithEventTimeOrdering(tableVersion: String): Unit = {
Expand Down
Loading
Loading