From 0420b8f1d2f18e7155733176bc4a630936e358cd Mon Sep 17 00:00:00 2001
From: Y Ethan Guo <ethan.guoyihua@gmail.com>
Date: Fri, 26 Jun 2026 18:17:33 -0700
Subject: [PATCH 1/4] Add core-tests profile and tag a cross-version core flow
 subset

Introduce a curated 'core' test set that can run on every Spark version,
while the full unit/functional suites run only on the latest Spark majors.

- core-tests Maven profile: surefire groups=core plus scalatest
  tagsToInclude=SparkSQLCoreFlow (injected only here; an empty
  tagsToInclude would emit -n "" and run zero scala tests).
- Exclude the core set from the normal tasks: add 'core' to excludedGroups
  in unit-tests/functional-tests/-b/-c, and drive the scalatest
  tagsToExclude via the hoodie.scalatest.tagsToExclude property (default
  SparkSQLCoreFlow) so core-flow blocks stay out of normal scala runs.
- CoreFlow scalatest Tag object (name matches the @SparkSQLCoreFlow
  annotation) for per-test taggedAs(CoreFlow).
- Tag the core set: @Tag("core") on representative TestCOWDataSource /
  TestMORDataSource methods and parquet/orc/InternalRow writers;
  taggedAs(CoreFlow) on basic INSERT/UPDATE/DELETE/MERGE/CREATE blocks.
- Trim the dead-wired TestSparkSqlCoreFlow anchor to a representative
  table-type/metadata/keygen/index spread so it fits the per-version budget.
---
 hudi-spark-datasource/hudi-spark/pom.xml      |  2 +-
 .../hudi/functional/TestSparkOrcReader.java   |  1 +
 .../functional/TestSparkParquetReader.java    |  1 +
 .../TestHoodieInternalRowParquetWriter.java   |  2 +
 .../org/apache/hudi/functional/CoreFlow.scala | 34 +++++++++
 .../hudi/functional/TestCOWDataSource.scala   |  8 +-
 .../hudi/functional/TestMORDataSource.scala   |  9 ++-
 .../functional/TestSparkSqlCoreFlow.scala     | 48 +++---------
 .../spark/sql/hudi/ddl/TestCreateTable.scala  |  9 ++-
 .../sql/hudi/dml/insert/TestInsertTable.scala |  9 ++-
 .../sql/hudi/dml/others/TestDeleteTable.scala |  7 +-
 .../hudi/dml/others/TestMergeIntoTable.scala  |  9 ++-
 .../sql/hudi/dml/others/TestUpdateTable.scala |  7 +-
 pom.xml                                       | 73 ++++++++++++++++++-
 14 files changed, 158 insertions(+), 61 deletions(-)
 create mode 100644 hudi-spark-datasource/hudi-spark/src/test/scala/org/apache/hudi/functional/CoreFlow.scala
diff --git a/hudi-spark-datasource/hudi-spark/pom.xml b/hudi-spark-datasource/hudi-spark/pom.xml
index efc0f33915236..ff045d782eb3e 100644
--- a/hudi-spark-datasource/hudi-spark/pom.xml
+++ b/hudi-spark-datasource/hudi-spark/pom.xml
@@ -116,7 +116,7 @@
         <groupId>org.scalatest</groupId>
         <artifactId>scalatest-maven-plugin</artifactId>
         <configuration>
-          <tagsToExclude>org.apache.hudi.functional.SparkSQLCoreFlow</tagsToExclude>
+          <tagsToExclude>${hoodie.scalatest.tagsToExclude}</tagsToExclude>
         </configuration>
       </plugin>
       <plugin>
diff --git a/hudi-spark-datasource/hudi-spark/src/test/java/org/apache/hudi/functional/TestSparkOrcReader.java b/hudi-spark-datasource/hudi-spark/src/test/java/org/apache/hudi/functional/TestSparkOrcReader.java
index 596ac5bfca240..63c991d53fb07 100644
--- a/hudi-spark-datasource/hudi-spark/src/test/java/org/apache/hudi/functional/TestSparkOrcReader.java
+++ b/hudi-spark-datasource/hudi-spark/src/test/java/org/apache/hudi/functional/TestSparkOrcReader.java
@@ -31,6 +31,7 @@
 import static org.junit.jupiter.api.Assertions.assertEquals;
 
 @Tag("functional")
+@Tag("core")
 public class TestSparkOrcReader extends TestBootstrapReadBase {
 
   @Test
diff --git a/hudi-spark-datasource/hudi-spark/src/test/java/org/apache/hudi/functional/TestSparkParquetReader.java b/hudi-spark-datasource/hudi-spark/src/test/java/org/apache/hudi/functional/TestSparkParquetReader.java
index 4b6ce42866fd3..8f2aa006cd12f 100644
--- a/hudi-spark-datasource/hudi-spark/src/test/java/org/apache/hudi/functional/TestSparkParquetReader.java
+++ b/hudi-spark-datasource/hudi-spark/src/test/java/org/apache/hudi/functional/TestSparkParquetReader.java
@@ -31,6 +31,7 @@
 import static org.junit.jupiter.api.Assertions.assertEquals;
 
 @Tag("functional")
+@Tag("core")
 public class TestSparkParquetReader extends TestBootstrapReadBase {
 
   @Test
diff --git a/hudi-spark-datasource/hudi-spark/src/test/java/org/apache/hudi/io/storage/row/TestHoodieInternalRowParquetWriter.java b/hudi-spark-datasource/hudi-spark/src/test/java/org/apache/hudi/io/storage/row/TestHoodieInternalRowParquetWriter.java
index b2c9d24b1b961..3151fe3918a4b 100644
--- a/hudi-spark-datasource/hudi-spark/src/test/java/org/apache/hudi/io/storage/row/TestHoodieInternalRowParquetWriter.java
+++ b/hudi-spark-datasource/hudi-spark/src/test/java/org/apache/hudi/io/storage/row/TestHoodieInternalRowParquetWriter.java
@@ -42,6 +42,7 @@
 import org.apache.spark.sql.types.StructType;
 import org.junit.jupiter.api.AfterEach;
 import org.junit.jupiter.api.BeforeEach;
+import org.junit.jupiter.api.Tag;
 import org.junit.jupiter.params.ParameterizedTest;
 import org.junit.jupiter.params.provider.ValueSource;
 
@@ -56,6 +57,7 @@
 /**
  * Unit tests {@link HoodieInternalRowParquetWriter}.
  */
+@Tag("core")
 public class TestHoodieInternalRowParquetWriter extends HoodieSparkClientTestHarness {
 
   @BeforeEach
diff --git a/hudi-spark-datasource/hudi-spark/src/test/scala/org/apache/hudi/functional/CoreFlow.scala b/hudi-spark-datasource/hudi-spark/src/test/scala/org/apache/hudi/functional/CoreFlow.scala
new file mode 100644
index 0000000000000..2f0280d07ff76
--- /dev/null
+++ b/hudi-spark-datasource/hudi-spark/src/test/scala/org/apache/hudi/functional/CoreFlow.scala
@@ -0,0 +1,34 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+package org.apache.hudi.functional
+
+import org.scalatest.Tag
+
+/**
+ * ScalaTest tag for the curated "core flow" subset that runs on every Spark
+ * version in CI (via the {@code core-tests} Maven profile), as opposed to the
+ * full suite that runs only on the latest Spark major versions.
+ *
+ * The tag name matches the {@link SparkSQLCoreFlow} Java {@code @TagAnnotation}
+ * so that class-level {@code @SparkSQLCoreFlow} and per-test
+ * {@code taggedAs(CoreFlow)} are selected by the same scalatest
+ * {@code tagsToInclude}/{@code tagsToExclude} value.
+ */
+object CoreFlow extends Tag("org.apache.hudi.functional.SparkSQLCoreFlow")
diff --git a/hudi-spark-datasource/hudi-spark/src/test/scala/org/apache/hudi/functional/TestCOWDataSource.scala b/hudi-spark-datasource/hudi-spark/src/test/scala/org/apache/hudi/functional/TestCOWDataSource.scala
index 599a105e6d001..c5b07d060b38b 100644
--- a/hudi-spark-datasource/hudi-spark/src/test/scala/org/apache/hudi/functional/TestCOWDataSource.scala
+++ b/hudi-spark-datasource/hudi-spark/src/test/scala/org/apache/hudi/functional/TestCOWDataSource.scala
@@ -55,7 +55,7 @@ import org.apache.spark.sql.hudi.HoodieSparkSessionExtension
 import org.apache.spark.sql.types.{ArrayType, DataTypes, DateType, IntegerType, LongType, MapType, StringType, StructField, StructType, TimestampType}
 import org.joda.time.DateTime
 import org.joda.time.format.DateTimeFormat
-import org.junit.jupiter.api.{AfterEach, BeforeEach, Disabled, Test}
+import org.junit.jupiter.api.{AfterEach, BeforeEach, Disabled, Tag, Test}
 import org.junit.jupiter.api.Assertions.{assertDoesNotThrow, assertEquals, assertFalse, assertTrue, fail}
 import org.junit.jupiter.api.function.Executable
 import org.junit.jupiter.params.ParameterizedTest
@@ -800,6 +800,7 @@ class TestCOWDataSource extends HoodieSparkClientTestBase with ScalaAssertionSup
     assertEquals(schema, actualSchema)
   }
 
+  @Tag("core")
   @ParameterizedTest
   @EnumSource(value = classOf[HoodieRecordType], names = Array("AVRO", "SPARK"))
   def testCopyOnWriteDeletes(recordType: HoodieRecordType): Unit = {
@@ -831,6 +832,7 @@ class TestCOWDataSource extends HoodieSparkClientTestBase with ScalaAssertionSup
     assertEquals(snapshotDF2.count(), 80)
   }
 
+  @Tag("core")
   @Test
   def testCopyOnWriteUpserts(): Unit = {
     val recordType = HoodieRecordType.AVRO
@@ -1217,6 +1219,7 @@ class TestCOWDataSource extends HoodieSparkClientTestBase with ScalaAssertionSup
     assertEquals(hoodieIncViewDF2.count(), insert2NewKeyCnt)
   }
 
+  @Tag("core")
   @ParameterizedTest
   @EnumSource(value = classOf[HoodieRecordType], names = Array("AVRO", "SPARK"))
   def testComplexDataTypeWriteAndReadConsistency(recordType: HoodieRecordType): Unit = {
@@ -1549,6 +1552,7 @@ class TestCOWDataSource extends HoodieSparkClientTestBase with ScalaAssertionSup
     assertEquals(false, Metrics.isInitialized(basePath))
   }
 
+  @Tag("core")
   @ParameterizedTest
   @CsvSource(Array(
     "true,false,AVRO", "true,true,AVRO", "false,true,AVRO", "false,false,AVRO"
@@ -1967,6 +1971,7 @@ class TestCOWDataSource extends HoodieSparkClientTestBase with ScalaAssertionSup
   }
 
   /** Test case to verify MAKE_NEW_COLUMNS_NULLABLE config parameter. */
+  @Tag("core")
   @Test
   def testSchemaEvolutionWithNewColumn(): Unit = {
     val df1 = spark.sql("select '1' as event_id, '2' as ts, '3' as version, 'foo' as event_date")
@@ -2263,6 +2268,7 @@ class TestCOWDataSource extends HoodieSparkClientTestBase with ScalaAssertionSup
    * - v6: Uses requestedTime for commits, open_close incremental ranges
    * - v9: Uses completionTime for commits, close_close incremental ranges
    */
+  @Tag("core")
   @ParameterizedTest
   @CsvSource(Array("6", "9"))
   def testIncrementalAndTimeTravelWithEventTimeOrdering(tableVersion: String): Unit = {
diff --git a/hudi-spark-datasource/hudi-spark/src/test/scala/org/apache/hudi/functional/TestMORDataSource.scala b/hudi-spark-datasource/hudi-spark/src/test/scala/org/apache/hudi/functional/TestMORDataSource.scala
index 81d049d432439..b88a33f7d2d35 100644
--- a/hudi-spark-datasource/hudi-spark/src/test/scala/org/apache/hudi/functional/TestMORDataSource.scala
+++ b/hudi-spark-datasource/hudi-spark/src/test/scala/org/apache/hudi/functional/TestMORDataSource.scala
@@ -47,7 +47,7 @@ import org.apache.spark.sql._
 import org.apache.spark.sql.functions._
 import org.apache.spark.sql.hudi.HoodieSparkSessionExtension
 import org.apache.spark.sql.types.{LongType, StringType, StructField, StructType}
-import org.junit.jupiter.api.{AfterEach, BeforeEach, Test}
+import org.junit.jupiter.api.{AfterEach, BeforeEach, Tag, Test}
 import org.junit.jupiter.api.Assertions.{assertEquals, assertFalse, assertTrue}
 import org.junit.jupiter.params.ParameterizedTest
 import org.junit.jupiter.params.provider.{Arguments, CsvSource, EnumSource, MethodSource, ValueSource}
@@ -104,6 +104,7 @@ class TestMORDataSource extends HoodieSparkClientTestBase with SparkDatasetMixin
         JFunction.toJavaConsumer((receiver: SparkSessionExtensions) => new HoodieSparkSessionExtension().apply(receiver)))
     )
 
+  @Tag("core")
   @ParameterizedTest
   @CsvSource(Array(
     // Inferred as COMMIT_TIME_ORDERING
@@ -494,6 +495,7 @@ class TestMORDataSource extends HoodieSparkClientTestBase with SparkDatasetMixin
       HoodieMemoryConfig.DEFAULT_MR_COMPACTION_MEMORY_FRACTION)
   }
 
+  @Tag("core")
   @ParameterizedTest
   @CsvSource(value = Array("AVRO,6", "AVRO,8", "SPARK,6", "SPARK,8"))
   def testPayloadDelete(recordType: HoodieRecordType, tableVersion: Int) {
@@ -573,6 +575,7 @@ class TestMORDataSource extends HoodieSparkClientTestBase with SparkDatasetMixin
     assertEquals(0, hudiSnapshotDF3.count()) // 100 records were deleted, 0 record to load
   }
 
+  @Tag("core")
   @ParameterizedTest
   @CsvSource(value = Array("AVRO,6", "AVRO,8", "SPARK,6", "SPARK,8"))
   def testPrunedFiltered(recordType: HoodieRecordType, tableVersion: Int) {
@@ -683,6 +686,7 @@ class TestMORDataSource extends HoodieSparkClientTestBase with SparkDatasetMixin
     assertEquals(0, hudiSnapshotDF3.filter("rider = 'rider-003'").count())
   }
 
+  @Tag("core")
   @ParameterizedTest
   @EnumSource(value = classOf[HoodieRecordType], names = Array("AVRO", "SPARK"))
   def testVectorizedReader(recordType: HoodieRecordType) {
@@ -992,6 +996,7 @@ class TestMORDataSource extends HoodieSparkClientTestBase with SparkDatasetMixin
     assertEquals(partitionCounts("2021/03/03"), count7)
   }
 
+  @Tag("core")
   @ParameterizedTest
   @EnumSource(value = classOf[HoodieRecordType], names = Array("AVRO", "SPARK"))
   def testReadLogOnlyMergeOnReadTable(recordType: HoodieRecordType): Unit = {
@@ -1311,6 +1316,7 @@ class TestMORDataSource extends HoodieSparkClientTestBase with SparkDatasetMixin
    *
    * The read-optimized query should read `fg1_dc1.parquet` only in this case.
    */
+  @Tag("core")
   @Test
   def testReadOptimizedQueryAfterInflightCompactionAndCompletedDeltaCommit(): Unit = {
     val (tableName, tablePath) = ("hoodie_mor_ro_read_test_table", s"${basePath}_mor_test_table")
@@ -1406,6 +1412,7 @@ class TestMORDataSource extends HoodieSparkClientTestBase with SparkDatasetMixin
       roDf.where(col(recordKeyField) === 0).select(dataField).collect()(0).getLong(0))
   }
 
+  @Tag("core")
   @ParameterizedTest
   @ValueSource(ints = Array(6, 8))
   def testSnapshotQueryAfterInflightDeltaCommit(tableVersion: Int): Unit = {
diff --git a/hudi-spark-datasource/hudi-spark/src/test/scala/org/apache/hudi/functional/TestSparkSqlCoreFlow.scala b/hudi-spark-datasource/hudi-spark/src/test/scala/org/apache/hudi/functional/TestSparkSqlCoreFlow.scala
index 2f37470ce208d..4b560897563f4 100644
--- a/hudi-spark-datasource/hudi-spark/src/test/scala/org/apache/hudi/functional/TestSparkSqlCoreFlow.scala
+++ b/hudi-spark-datasource/hudi-spark/src/test/scala/org/apache/hudi/functional/TestSparkSqlCoreFlow.scala
@@ -46,23 +46,19 @@ import scala.collection.JavaConverters._
 class TestSparkSqlCoreFlow extends HoodieSparkSqlTestBase {
   val colsToCompare = "timestamp, _row_key, partition_path, rider, driver, begin_lat, begin_lon, end_lat, end_lon, fare.amount, fare.currency, _hoodie_is_deleted"
 
-  //params for core flow tests
+  // Params for core flow tests. This suite is the cross-Spark-version "core flow" anchor
+  // (runs on every Spark version via the core-tests profile), so the matrix is trimmed to a
+  // representative spread of table type, metadata on/off, key generator, and index type.
+  // The dropped keygen/index permutations are Spark-version-independent and remain covered
+  // by the full suite that runs on the latest Spark versions.
   val params: List[String] = List(
     "COPY_ON_WRITE|false|org.apache.hudi.keygen.SimpleKeyGenerator|GLOBAL_BLOOM",
-    "COPY_ON_WRITE|true|org.apache.hudi.keygen.SimpleKeyGenerator|GLOBAL_BLOOM",
-    "COPY_ON_WRITE|false|org.apache.hudi.keygen.SimpleKeyGenerator|GLOBAL_SIMPLE",
     "COPY_ON_WRITE|true|org.apache.hudi.keygen.SimpleKeyGenerator|GLOBAL_SIMPLE",
     "COPY_ON_WRITE|false|org.apache.hudi.keygen.NonpartitionedKeyGenerator|BLOOM",
-    "COPY_ON_WRITE|true|org.apache.hudi.keygen.NonpartitionedKeyGenerator|BLOOM",
-    "COPY_ON_WRITE|false|org.apache.hudi.keygen.NonpartitionedKeyGenerator|SIMPLE",
     "COPY_ON_WRITE|true|org.apache.hudi.keygen.NonpartitionedKeyGenerator|SIMPLE",
     "MERGE_ON_READ|false|org.apache.hudi.keygen.SimpleKeyGenerator|GLOBAL_BLOOM",
-    "MERGE_ON_READ|true|org.apache.hudi.keygen.SimpleKeyGenerator|GLOBAL_BLOOM",
-    "MERGE_ON_READ|false|org.apache.hudi.keygen.SimpleKeyGenerator|GLOBAL_SIMPLE",
     "MERGE_ON_READ|true|org.apache.hudi.keygen.SimpleKeyGenerator|GLOBAL_SIMPLE",
     "MERGE_ON_READ|false|org.apache.hudi.keygen.NonpartitionedKeyGenerator|BLOOM",
-    "MERGE_ON_READ|true|org.apache.hudi.keygen.NonpartitionedKeyGenerator|BLOOM",
-    "MERGE_ON_READ|false|org.apache.hudi.keygen.NonpartitionedKeyGenerator|SIMPLE",
     "MERGE_ON_READ|true|org.apache.hudi.keygen.NonpartitionedKeyGenerator|SIMPLE"
   )
 
@@ -409,39 +405,13 @@ class TestSparkSqlCoreFlow extends HoodieSparkSqlTestBase {
       HoodieRecord.FILENAME_METADATA_FIELD)
   }
 
-  //params for immutable user flow
+  // Params for immutable user flow. Trimmed to cover both table types and both immutable
+  // write ops (insert, bulk_insert) with a representative metadata/keygen/index spread; see
+  // the note on `params` above for why the full permutation matrix is not needed here.
   val paramsForImmutable: List[String] = List(
     "COPY_ON_WRITE|insert|false|org.apache.hudi.keygen.SimpleKeyGenerator|GLOBAL_BLOOM",
-    "COPY_ON_WRITE|insert|true|org.apache.hudi.keygen.SimpleKeyGenerator|GLOBAL_BLOOM",
-    "COPY_ON_WRITE|insert|false|org.apache.hudi.keygen.SimpleKeyGenerator|GLOBAL_SIMPLE",
-    "COPY_ON_WRITE|insert|true|org.apache.hudi.keygen.SimpleKeyGenerator|GLOBAL_SIMPLE",
-    "COPY_ON_WRITE|insert|false|org.apache.hudi.keygen.NonpartitionedKeyGenerator|BLOOM",
-    "COPY_ON_WRITE|insert|true|org.apache.hudi.keygen.NonpartitionedKeyGenerator|BLOOM",
-    "COPY_ON_WRITE|insert|false|org.apache.hudi.keygen.NonpartitionedKeyGenerator|SIMPLE",
-    "COPY_ON_WRITE|insert|true|org.apache.hudi.keygen.NonpartitionedKeyGenerator|SIMPLE",
-    "MERGE_ON_READ|insert|false|org.apache.hudi.keygen.SimpleKeyGenerator|GLOBAL_BLOOM",
-    "MERGE_ON_READ|insert|true|org.apache.hudi.keygen.SimpleKeyGenerator|GLOBAL_BLOOM",
-    "MERGE_ON_READ|insert|false|org.apache.hudi.keygen.SimpleKeyGenerator|GLOBAL_SIMPLE",
-    "MERGE_ON_READ|insert|true|org.apache.hudi.keygen.SimpleKeyGenerator|GLOBAL_SIMPLE",
-    "MERGE_ON_READ|insert|false|org.apache.hudi.keygen.NonpartitionedKeyGenerator|BLOOM",
-    "MERGE_ON_READ|insert|true|org.apache.hudi.keygen.NonpartitionedKeyGenerator|BLOOM",
-    "MERGE_ON_READ|insert|false|org.apache.hudi.keygen.NonpartitionedKeyGenerator|SIMPLE",
-    "MERGE_ON_READ|insert|true|org.apache.hudi.keygen.NonpartitionedKeyGenerator|SIMPLE",
-    "COPY_ON_WRITE|bulk_insert|false|org.apache.hudi.keygen.SimpleKeyGenerator|GLOBAL_BLOOM",
-    "COPY_ON_WRITE|bulk_insert|true|org.apache.hudi.keygen.SimpleKeyGenerator|GLOBAL_BLOOM",
-    "COPY_ON_WRITE|bulk_insert|false|org.apache.hudi.keygen.SimpleKeyGenerator|GLOBAL_SIMPLE",
-    "COPY_ON_WRITE|bulk_insert|true|org.apache.hudi.keygen.SimpleKeyGenerator|GLOBAL_SIMPLE",
-    "COPY_ON_WRITE|bulk_insert|false|org.apache.hudi.keygen.NonpartitionedKeyGenerator|BLOOM",
-    "COPY_ON_WRITE|bulk_insert|true|org.apache.hudi.keygen.NonpartitionedKeyGenerator|BLOOM",
-    "COPY_ON_WRITE|bulk_insert|false|org.apache.hudi.keygen.NonpartitionedKeyGenerator|SIMPLE",
     "COPY_ON_WRITE|bulk_insert|true|org.apache.hudi.keygen.NonpartitionedKeyGenerator|SIMPLE",
-    "MERGE_ON_READ|bulk_insert|false|org.apache.hudi.keygen.SimpleKeyGenerator|GLOBAL_BLOOM",
-    "MERGE_ON_READ|bulk_insert|true|org.apache.hudi.keygen.SimpleKeyGenerator|GLOBAL_BLOOM",
-    "MERGE_ON_READ|bulk_insert|false|org.apache.hudi.keygen.SimpleKeyGenerator|GLOBAL_SIMPLE",
-    "MERGE_ON_READ|bulk_insert|true|org.apache.hudi.keygen.SimpleKeyGenerator|GLOBAL_SIMPLE",
-    "MERGE_ON_READ|bulk_insert|false|org.apache.hudi.keygen.NonpartitionedKeyGenerator|BLOOM",
-    "MERGE_ON_READ|bulk_insert|true|org.apache.hudi.keygen.NonpartitionedKeyGenerator|BLOOM",
-    "MERGE_ON_READ|bulk_insert|false|org.apache.hudi.keygen.NonpartitionedKeyGenerator|SIMPLE",
+    "MERGE_ON_READ|insert|false|org.apache.hudi.keygen.SimpleKeyGenerator|GLOBAL_BLOOM",
     "MERGE_ON_READ|bulk_insert|true|org.apache.hudi.keygen.NonpartitionedKeyGenerator|SIMPLE"
   )
 
diff --git a/hudi-spark-datasource/hudi-spark/src/test/scala/org/apache/spark/sql/hudi/ddl/TestCreateTable.scala b/hudi-spark-datasource/hudi-spark/src/test/scala/org/apache/spark/sql/hudi/ddl/TestCreateTable.scala
index 88ec4b7fb4c36..bf5e06dd1352a 100644
--- a/hudi-spark-datasource/hudi-spark/src/test/scala/org/apache/spark/sql/hudi/ddl/TestCreateTable.scala
+++ b/hudi-spark-datasource/hudi-spark/src/test/scala/org/apache/spark/sql/hudi/ddl/TestCreateTable.scala
@@ -24,6 +24,7 @@ import org.apache.hudi.common.schema.{HoodieSchema, HoodieSchemaType}
 import org.apache.hudi.common.table.{HoodieTableConfig, HoodieTableMetaClient}
 import org.apache.hudi.common.util.PartitionPathEncodeUtils.escapePathName
 import org.apache.hudi.config.HoodieWriteConfig
+import org.apache.hudi.functional.CoreFlow
 import org.apache.hudi.hadoop.fs.HadoopFSUtils
 import org.apache.hudi.hadoop.realtime.HoodieParquetRealtimeInputFormat
 import org.apache.hudi.keygen.constant.KeyGeneratorType
@@ -47,7 +48,7 @@ import scala.collection.JavaConverters._
 
 class TestCreateTable extends HoodieSparkSqlTestBase {
 
-  test("Test Create Managed Hoodie Table") {
+  test("Test Create Managed Hoodie Table", CoreFlow) {
     val databaseName = "hudi_database"
     spark.sql(s"create database if not exists $databaseName")
     spark.sql(s"use $databaseName")
@@ -101,7 +102,7 @@ class TestCreateTable extends HoodieSparkSqlTestBase {
     spark.sql("use default")
   }
 
-  test("Test Create Hoodie Table With Options") {
+  test("Test Create Hoodie Table With Options", CoreFlow) {
     val tableName = generateTableName
     spark.sql(
       s"""
@@ -154,7 +155,7 @@ class TestCreateTable extends HoodieSparkSqlTestBase {
     assertFalse(tableConfig.contains(OPERATION.key()))
   }
 
-  test("Test Create External Hoodie Table") {
+  test("Test Create External Hoodie Table", CoreFlow) {
     withTempDir { tmp =>
       // Test create cow table.
       val tableName = generateTableName
@@ -236,7 +237,7 @@ class TestCreateTable extends HoodieSparkSqlTestBase {
     }
   }
 
-  test("Test Create External Hoodie Table with data") {
+  test("Test Create External Hoodie Table with data", CoreFlow) {
     withTempDir { tmp =>
       val options = Map(DataSourceWriteOptions.TABLE_TYPE.key -> HoodieTableType.MERGE_ON_READ.name(),
         HoodieTableConfig.ORDERING_FIELDS.key -> "ordering",
diff --git a/hudi-spark-datasource/hudi-spark/src/test/scala/org/apache/spark/sql/hudi/dml/insert/TestInsertTable.scala b/hudi-spark-datasource/hudi-spark/src/test/scala/org/apache/spark/sql/hudi/dml/insert/TestInsertTable.scala
index ebaa13770ed44..155ab028171c3 100644
--- a/hudi-spark-datasource/hudi-spark/src/test/scala/org/apache/spark/sql/hudi/dml/insert/TestInsertTable.scala
+++ b/hudi-spark-datasource/hudi-spark/src/test/scala/org/apache/spark/sql/hudi/dml/insert/TestInsertTable.scala
@@ -23,6 +23,7 @@ import org.apache.hudi.HoodieSparkUtils
 import org.apache.hudi.common.config.HoodieStorageConfig
 import org.apache.hudi.common.model.HoodieRecord.HoodieRecordType
 import org.apache.hudi.exception.{HoodieDuplicateKeyException, HoodieException}
+import org.apache.hudi.functional.CoreFlow
 import org.apache.hudi.testutils.HoodieClientTestUtils.createMetaClient
 
 import org.apache.spark.sql.hudi.HoodieSqlCommonUtils
@@ -218,7 +219,7 @@ class TestInsertTable extends HoodieSparkSqlTestBase {
     }
   }
 
-  test("Test Insert Into with values") {
+  test("Test Insert Into with values", CoreFlow) {
     withRecordType()(withTempDir { tmp =>
       val tableName = generateTableName
       // Create a partitioned table
@@ -305,7 +306,7 @@ class TestInsertTable extends HoodieSparkSqlTestBase {
     }
   }
 
-  test("Test Insert Into with dynamic partition") {
+  test("Test Insert Into with dynamic partition", CoreFlow) {
     Seq("cow", "mor").foreach { tableType =>
       withTempDir { tmp =>
         val tableName = generateTableName
@@ -413,7 +414,7 @@ class TestInsertTable extends HoodieSparkSqlTestBase {
     }
   }
 
-  test("Test Insert Into Non Partitioned Table") {
+  test("Test Insert Into Non Partitioned Table", CoreFlow) {
     withRecordType(Seq(HoodieRecordType.AVRO, HoodieRecordType.SPARK))(withTempDir { tmp =>
       val tableName = generateTableName
       withSQLConf("hoodie.datasource.insert.dup.policy" -> "fail") {
@@ -632,7 +633,7 @@ class TestInsertTable extends HoodieSparkSqlTestBase {
       })
   }
 
-  test("Test Insert Overwrite") {
+  test("Test Insert Overwrite", CoreFlow) {
     withTempDir { tmp =>
       Seq("cow", "mor").foreach { tableType =>
         withTable(generateTableName) { tableName =>
diff --git a/hudi-spark-datasource/hudi-spark/src/test/scala/org/apache/spark/sql/hudi/dml/others/TestDeleteTable.scala b/hudi-spark-datasource/hudi-spark/src/test/scala/org/apache/spark/sql/hudi/dml/others/TestDeleteTable.scala
index 1bbf05557201f..99008ea9752fc 100644
--- a/hudi-spark-datasource/hudi-spark/src/test/scala/org/apache/spark/sql/hudi/dml/others/TestDeleteTable.scala
+++ b/hudi-spark-datasource/hudi-spark/src/test/scala/org/apache/spark/sql/hudi/dml/others/TestDeleteTable.scala
@@ -22,13 +22,14 @@ package org.apache.spark.sql.hudi.dml.others
 import org.apache.hudi.DataSourceWriteOptions._
 import org.apache.hudi.common.table.HoodieTableConfig
 import org.apache.hudi.config.HoodieWriteConfig
+import org.apache.hudi.functional.CoreFlow
 
 import org.apache.spark.sql.SaveMode
 import org.apache.spark.sql.hudi.common.HoodieSparkSqlTestBase
 
 class TestDeleteTable extends HoodieSparkSqlTestBase {
 
-  test("Test Delete Table") {
+  test("Test Delete Table", CoreFlow) {
     withTempDir { tmp =>
       Seq(true, false).foreach { sparkSqlOptimizedWrites =>
         Seq("cow", "mor").foreach { tableType =>
@@ -187,7 +188,7 @@ class TestDeleteTable extends HoodieSparkSqlTestBase {
     }
   }
 
-  test("Test Delete Table On Non-PK Condition") {
+  test("Test Delete Table On Non-PK Condition", CoreFlow) {
     withTempDir { tmp =>
       Seq("cow", "mor").foreach { tableType =>
 
@@ -277,7 +278,7 @@ class TestDeleteTable extends HoodieSparkSqlTestBase {
     }
   }
 
-  test("Test Delete Table with op upsert") {
+  test("Test Delete Table with op upsert", CoreFlow) {
     withTempDir { tmp =>
       Seq("cow", "mor").foreach { tableType =>
         val tableName = generateTableName
diff --git a/hudi-spark-datasource/hudi-spark/src/test/scala/org/apache/spark/sql/hudi/dml/others/TestMergeIntoTable.scala b/hudi-spark-datasource/hudi-spark/src/test/scala/org/apache/spark/sql/hudi/dml/others/TestMergeIntoTable.scala
index 6f480eae6134f..a600863eb0598 100644
--- a/hudi-spark-datasource/hudi-spark/src/test/scala/org/apache/spark/sql/hudi/dml/others/TestMergeIntoTable.scala
+++ b/hudi-spark-datasource/hudi-spark/src/test/scala/org/apache/spark/sql/hudi/dml/others/TestMergeIntoTable.scala
@@ -26,6 +26,7 @@ import org.apache.hudi.common.table.timeline.HoodieTimeline
 import org.apache.hudi.common.testutils.HoodieTestUtils
 import org.apache.hudi.config.{HoodieClusteringConfig, HoodieWriteConfig}
 import org.apache.hudi.config.HoodieWriteConfig.MERGE_SMALL_FILE_GROUP_CANDIDATES_LIMIT
+import org.apache.hudi.functional.CoreFlow
 import org.apache.hudi.storage.StoragePath
 import org.apache.hudi.testutils.DataSourceTestUtils
 import org.apache.hudi.testutils.HoodieClientTestUtils.createMetaClient
@@ -40,7 +41,7 @@ import org.slf4j.LoggerFactory
 class TestMergeIntoTable extends HoodieSparkSqlTestBase with ScalaAssertionSupport {
   private val log = LoggerFactory.getLogger(getClass)
 
-  test("Test MergeInto Basic") {
+  test("Test MergeInto Basic", CoreFlow) {
     Seq(true, false).foreach { sparkSqlOptimizedWrites =>
       withRecordType()(withTempDir { tmp =>
         withSparkSqlSessionConfig("hoodie.payload.combined.schema.validate" -> "false",
@@ -474,7 +475,7 @@ class TestMergeIntoTable extends HoodieSparkSqlTestBase with ScalaAssertionSuppo
     }
   }
 
-  test("Test MergeInto for MOR table") {
+  test("Test MergeInto for MOR table", CoreFlow) {
     withTempDir { tmp =>
       withSQLConf("hoodie.payload.combined.schema.validate" -> "true", MERGE_SMALL_FILE_GROUP_CANDIDATES_LIMIT.key() -> "0") {
         val tableName = generateTableName
@@ -614,7 +615,7 @@ class TestMergeIntoTable extends HoodieSparkSqlTestBase with ScalaAssertionSuppo
     }
   }
 
-  test("Test MergeInto with insert only") {
+  test("Test MergeInto with insert only", CoreFlow) {
     withTempDir { tmp =>
       withSQLConf("hoodie.payload.combined.schema.validate" -> "true", MERGE_SMALL_FILE_GROUP_CANDIDATES_LIMIT.key() -> "0") {
         // Create a partitioned mor table
@@ -915,7 +916,7 @@ class TestMergeIntoTable extends HoodieSparkSqlTestBase with ScalaAssertionSuppo
     }
   }
 
-  test("Test MergeInto with combination of delete update insert") {
+  test("Test MergeInto with combination of delete update insert", CoreFlow) {
     withRecordType()(withTempDir { tmp =>
       withSQLConf("hoodie.payload.combined.schema.validate" -> "true") {
         val sourceTable = generateTableName
diff --git a/hudi-spark-datasource/hudi-spark/src/test/scala/org/apache/spark/sql/hudi/dml/others/TestUpdateTable.scala b/hudi-spark-datasource/hudi-spark/src/test/scala/org/apache/spark/sql/hudi/dml/others/TestUpdateTable.scala
index 9d67da0db5cee..cd487fb4db4f9 100644
--- a/hudi-spark-datasource/hudi-spark/src/test/scala/org/apache/spark/sql/hudi/dml/others/TestUpdateTable.scala
+++ b/hudi-spark-datasource/hudi-spark/src/test/scala/org/apache/spark/sql/hudi/dml/others/TestUpdateTable.scala
@@ -25,6 +25,7 @@ import org.apache.hudi.HoodieSparkUtils.gteqSpark3_4
 import org.apache.hudi.common.model.HoodieTableType
 import org.apache.hudi.common.table.timeline.HoodieInstant
 import org.apache.hudi.common.util.{Option => HOption}
+import org.apache.hudi.functional.CoreFlow
 import org.apache.hudi.testutils.HoodieClientTestUtils.createMetaClient
 
 import org.apache.spark.sql.{AnalysisException, Row}
@@ -33,7 +34,7 @@ import org.junit.jupiter.api.Assertions.assertEquals
 
 class TestUpdateTable extends HoodieSparkSqlTestBase {
 
-  test("Test Update Table") {
+  test("Test Update Table", CoreFlow) {
     withRecordType()(withTempDir { tmp =>
       Seq(true, false).foreach { sparkSqlOptimizedWrites =>
         Seq("cow", "mor").foreach { tableType =>
@@ -144,7 +145,7 @@ class TestUpdateTable extends HoodieSparkSqlTestBase {
     })
   }
 
-  test("Test Update Table On Non-PK Condition") {
+  test("Test Update Table On Non-PK Condition", CoreFlow) {
     withRecordType()(withTempDir { tmp =>
       Seq("cow", "mor").foreach {tableType =>
         /** non-partitioned table */
@@ -233,7 +234,7 @@ class TestUpdateTable extends HoodieSparkSqlTestBase {
     })
   }
 
-  test("Test ignoring case for Update Table") {
+  test("Test ignoring case for Update Table", CoreFlow) {
     withTempDir { tmp =>
       Seq("cow", "mor").foreach { tableType =>
         val tableName = generateTableName
diff --git a/pom.xml b/pom.xml
index f142ebf009b85..38a2a2965bd93 100644
--- a/pom.xml
+++ b/pom.xml
@@ -210,6 +210,10 @@
     <skip.hudi-spark3.unit.tests>${skipTests}</skip.hudi-spark3.unit.tests>
     <skip.hudi-spark4.unit.tests>${skipTests}</skip.hudi-spark4.unit.tests>
     <skipDocker>${skipTests}</skipDocker>
+    <!-- Default scalatest tag exclusion: keep the curated core-flow tag out of the
+         normal unit/functional scala runs. The core-tests profile clears this and adds
+         tagsToInclude so the core-flow blocks run only in the dedicated per-version core job. -->
+    <hoodie.scalatest.tagsToExclude>org.apache.hudi.functional.SparkSQLCoreFlow</hoodie.scalatest.tagsToExclude>
     <project.build.sourceEncoding>UTF-8</project.build.sourceEncoding>
     <main.basedir>${project.basedir}</main.basedir>
     <spark.bundle.hive.scope>provided</spark.bundle.hive.scope>
@@ -2088,7 +2092,7 @@
             <configuration combine.self="append">
               <skip>${skipUTs}</skip>
               <forkedProcessExitTimeoutInSeconds>120</forkedProcessExitTimeoutInSeconds>
-              <excludedGroups>functional,functional-b,functional-c</excludedGroups>
+              <excludedGroups>functional,functional-b,functional-c,core</excludedGroups>
               <excludes>
                 <exclude>**/IT*.java</exclude>
                 <exclude>**/testsuite/**/Test*.java</exclude>
@@ -2141,6 +2145,7 @@
               <forkCount>1</forkCount>
               <reuseForks>true</reuseForks>
               <groups>functional</groups>
+              <excludedGroups>core</excludedGroups>
             </configuration>
           </plugin>
           <plugin>
@@ -2189,6 +2194,7 @@
               <forkCount>1</forkCount>
               <reuseForks>true</reuseForks>
               <groups>functional-b</groups>
+              <excludedGroups>core</excludedGroups>
             </configuration>
           </plugin>
           <plugin>
@@ -2237,6 +2243,7 @@
               <forkCount>1</forkCount>
               <reuseForks>true</reuseForks>
               <groups>functional-c</groups>
+              <excludedGroups>core</excludedGroups>
             </configuration>
           </plugin>
           <plugin>
@@ -2267,6 +2274,70 @@
         </plugins>
       </build>
     </profile>
+    <profile>
+      <!-- Curated "core flow" subset that runs on every Spark version in CI, while the
+           full unit/functional suites run only on the latest Spark major versions.
+           Runs surefire @Tag("core") JUnit5 tests and scalatest @SparkSQLCoreFlow /
+           taggedAs(CoreFlow) blocks in one pass. tagsToInclude is set ONLY here (an
+           empty tagsToInclude would emit -n "" and run zero scala tests). -->
+      <id>core-tests</id>
+      <properties>
+        <skipUTs>false</skipUTs>
+        <skipFTs>true</skipFTs>
+        <skipITs>true</skipITs>
+        <hoodie.scalatest.tagsToExclude></hoodie.scalatest.tagsToExclude>
+      </properties>
+      <build>
+        <plugins>
+          <plugin>
+            <groupId>org.apache.maven.plugins</groupId>
+            <artifactId>maven-surefire-plugin</artifactId>
+            <version>${maven-surefire-plugin.version}</version>
+            <configuration combine.self="append">
+              <skip>${skipUTs}</skip>
+              <forkedProcessExitTimeoutInSeconds>120</forkedProcessExitTimeoutInSeconds>
+              <groups>core</groups>
+              <excludes>
+                <exclude>**/IT*.java</exclude>
+                <exclude>**/testsuite/**/Test*.java</exclude>
+              </excludes>
+            </configuration>
+          </plugin>
+          <plugin>
+            <groupId>org.scalatest</groupId>
+            <artifactId>scalatest-maven-plugin</artifactId>
+            <configuration>
+              <tagsToInclude>org.apache.hudi.functional.SparkSQLCoreFlow</tagsToInclude>
+            </configuration>
+          </plugin>
+          <plugin>
+            <groupId>org.jacoco</groupId>
+            <artifactId>jacoco-maven-plugin</artifactId>
+            <executions>
+              <execution>
+                <goals>
+                  <goal>prepare-agent</goal>
+                </goals>
+                <configuration>
+                  <destFile>${project.build.directory}/jacoco-agent/${jacoco.agent.dest.filename}</destFile>
+                </configuration>
+              </execution>
+              <execution>
+                <id>post-unit-tests</id>
+                <phase>test</phase>
+                <goals>
+                  <goal>report</goal>
+                </goals>
+                <configuration>
+                  <dataFile>${project.build.directory}/jacoco-agent/${jacoco.agent.dest.filename}</dataFile>
+                  <outputDirectory>${project.reporting.outputDirectory}/jacoco-ut</outputDirectory>
+                </configuration>
+              </execution>
+            </executions>
+          </plugin>
+        </plugins>
+      </build>
+    </profile>
     <profile>
       <id>hudi-platform-service</id>
       <activation>

From 34126b6c53205771d2cf34809f7ed2c2c66f9a54 Mon Sep 17 00:00:00 2001
From: Y Ethan Guo <ethan.guoyihua@gmail.com>
Date: Fri, 26 Jun 2026 18:21:30 -0700
Subject: [PATCH 2/4] Tier the Spark datasource CI matrix: full on latest, core
 on the rest

Run the full unit/functional/scala suites only on the latest 3.x
(spark3.5/Java11) and latest 4.x (spark4.2/Java17). Add test-spark-core-tests
(spark3.3/3.4/3.5) and test-spark-java17-core-tests (spark3.5/4.0/4.1/4.2)
that build once and run -Pcore-tests plus the quickstart on every version.
Leaves Flink, bundle-validation, docker, and build-only jobs unchanged.
---
 .github/workflows/bot.yml | 216 +++++++++++++++++++++++---------------
 1 file changed, 134 insertions(+), 82 deletions(-)

diff --git a/.github/workflows/bot.yml b/.github/workflows/bot.yml
index 36742a585f81f..1dd83a3ed4ecb 100644
--- a/.github/workflows/bot.yml
+++ b/.github/workflows/bot.yml
@@ -311,14 +311,8 @@ jobs:
     strategy:
       matrix:
         include:
-          - scalaProfile: "scala-2.12"
-            sparkProfile: "spark3.3"
-            sparkModules: "hudi-spark-datasource/hudi-spark3.3.x"
-
-          - scalaProfile: "scala-2.12"
-            sparkProfile: "spark3.4"
-            sparkModules: "hudi-spark-datasource/hudi-spark3.4.x"
-
+          # Full suite runs only on the latest 3.x (Spark 3.5 / Scala 2.12 / Java 11).
+          # Older 3.x versions run the curated core set in test-spark-core-tests.
           - scalaProfile: "scala-2.12"
             sparkProfile: "spark3.5"
             sparkModules: "hudi-spark-datasource/hudi-spark3.5.x"
@@ -368,14 +362,6 @@ jobs:
     strategy:
       matrix:
         include:
-          - scalaProfile: "scala-2.12"
-            sparkProfile: "spark3.3"
-            sparkModules: "hudi-spark-datasource/hudi-spark3.3.x"
-
-          - scalaProfile: "scala-2.12"
-            sparkProfile: "spark3.4"
-            sparkModules: "hudi-spark-datasource/hudi-spark3.4.x"
-
           - scalaProfile: "scala-2.12"
             sparkProfile: "spark3.5"
             sparkModules: "hudi-spark-datasource/hudi-spark3.5.x"
@@ -440,14 +426,6 @@ jobs:
     strategy:
       matrix:
         include:
-          - scalaProfile: "scala-2.12"
-            sparkProfile: "spark3.3"
-            sparkModules: "hudi-spark-datasource/hudi-spark3.3.x"
-
-          - scalaProfile: "scala-2.12"
-            sparkProfile: "spark3.4"
-            sparkModules: "hudi-spark-datasource/hudi-spark3.4.x"
-
           - scalaProfile: "scala-2.12"
             sparkProfile: "spark3.5"
             sparkModules: "hudi-spark-datasource/hudi-spark3.5.x"
@@ -505,14 +483,6 @@ jobs:
     strategy:
       matrix:
         include:
-          - scalaProfile: "scala-2.12"
-            sparkProfile: "spark3.3"
-            sparkModules: "hudi-spark-datasource/hudi-spark3.3.x"
-
-          - scalaProfile: "scala-2.12"
-            sparkProfile: "spark3.4"
-            sparkModules: "hudi-spark-datasource/hudi-spark3.4.x"
-
           - scalaProfile: "scala-2.12"
             sparkProfile: "spark3.5"
             sparkModules: "hudi-spark-datasource/hudi-spark3.5.x"
@@ -570,14 +540,6 @@ jobs:
     strategy:
       matrix:
         include:
-          - scalaProfile: "scala-2.12"
-            sparkProfile: "spark3.3"
-            sparkModules: "hudi-spark-datasource/hudi-spark3.3.x"
-
-          - scalaProfile: "scala-2.12"
-            sparkProfile: "spark3.4"
-            sparkModules: "hudi-spark-datasource/hudi-spark3.4.x"
-
           - scalaProfile: "scala-2.12"
             sparkProfile: "spark3.5"
             sparkModules: "hudi-spark-datasource/hudi-spark3.5.x"
@@ -685,15 +647,9 @@ jobs:
     strategy:
       matrix:
         include:
-          - scalaProfile: "scala-2.13"
-            sparkProfile: "spark3.5"
-            sparkModules: "hudi-spark-datasource/hudi-spark3.5.x"
-          - scalaProfile: "scala-2.13"
-            sparkProfile: "spark4.0"
-            sparkModules: "hudi-spark-datasource/hudi-spark4.0.x"
-          - scalaProfile: "scala-2.13"
-            sparkProfile: "spark4.1"
-            sparkModules: "hudi-spark-datasource/hudi-spark4.1.x"
+          # Full suite runs only on the latest 4.x (Spark 4.2 / Scala 2.13 / Java 17).
+          # Spark 3.5/4.0/4.1 on Java 17 run the curated core set in
+          # test-spark-java17-core-tests.
           - scalaProfile: "scala-2.13"
             sparkProfile: "spark4.2"
             sparkModules: "hudi-spark-datasource/hudi-spark4.2.x"
@@ -743,15 +699,6 @@ jobs:
     strategy:
       matrix:
         include:
-          - scalaProfile: "scala-2.13"
-            sparkProfile: "spark3.5"
-            sparkModules: "hudi-spark-datasource/hudi-spark3.5.x"
-          - scalaProfile: "scala-2.13"
-            sparkProfile: "spark4.0"
-            sparkModules: "hudi-spark-datasource/hudi-spark4.0.x"
-          - scalaProfile: "scala-2.13"
-            sparkProfile: "spark4.1"
-            sparkModules: "hudi-spark-datasource/hudi-spark4.1.x"
           - scalaProfile: "scala-2.13"
             sparkProfile: "spark4.2"
             sparkModules: "hudi-spark-datasource/hudi-spark4.2.x"
@@ -816,15 +763,6 @@ jobs:
     strategy:
       matrix:
         include:
-          - scalaProfile: "scala-2.13"
-            sparkProfile: "spark3.5"
-            sparkModules: "hudi-spark-datasource/hudi-spark3.5.x"
-          - scalaProfile: "scala-2.13"
-            sparkProfile: "spark4.0"
-            sparkModules: "hudi-spark-datasource/hudi-spark4.0.x"
-          - scalaProfile: "scala-2.13"
-            sparkProfile: "spark4.1"
-            sparkModules: "hudi-spark-datasource/hudi-spark4.1.x"
           - scalaProfile: "scala-2.13"
             sparkProfile: "spark4.2"
             sparkModules: "hudi-spark-datasource/hudi-spark4.2.x"
@@ -882,15 +820,6 @@ jobs:
     strategy:
       matrix:
         include:
-          - scalaProfile: "scala-2.13"
-            sparkProfile: "spark3.5"
-            sparkModules: "hudi-spark-datasource/hudi-spark3.5.x"
-          - scalaProfile: "scala-2.13"
-            sparkProfile: "spark4.0"
-            sparkModules: "hudi-spark-datasource/hudi-spark4.0.x"
-          - scalaProfile: "scala-2.13"
-            sparkProfile: "spark4.1"
-            sparkModules: "hudi-spark-datasource/hudi-spark4.1.x"
           - scalaProfile: "scala-2.13"
             sparkProfile: "spark4.2"
             sparkModules: "hudi-spark-datasource/hudi-spark4.2.x"
@@ -943,6 +872,130 @@ jobs:
           token: ${{ secrets.CODECOV_TOKEN }}
 
   test-spark-java17-scala-other-tests:
+    runs-on: ubuntu-latest
+    needs: changes
+    strategy:
+      matrix:
+        include:
+          - scalaProfile: "scala-2.13"
+            sparkProfile: "spark4.2"
+            sparkModules: "hudi-spark-datasource/hudi-spark4.2.x"
+
+    steps:
+      - if: needs.changes.outputs.relevant == 'true'
+        uses: actions/checkout@v5
+      - name: Set up JDK 17
+        if: needs.changes.outputs.relevant == 'true'
+        uses: actions/setup-java@v5
+        with:
+          java-version: '17'
+          distribution: 'temurin'
+          architecture: x64
+          cache: maven
+      - name: Build Project
+        if: needs.changes.outputs.relevant == 'true'
+        env:
+          SCALA_PROFILE: ${{ matrix.scalaProfile }}
+          SPARK_PROFILE: ${{ matrix.sparkProfile }}
+          SPARK_MODULES: ${{ matrix.sparkModules }}
+        run:
+          mvn clean install -T 2 -Pjava17 -D"$SCALA_PROFILE" -D"$SPARK_PROFILE" -DskipTests=true $MVN_ARGS -am -pl "hudi-examples/hudi-examples-spark,hudi-common,$SPARK_COMMON_MODULES,$SPARK_MODULES"
+      - name: Scala UT - Common & Spark
+        if: needs.changes.outputs.relevant == 'true'
+        env:
+          SCALA_PROFILE: ${{ matrix.scalaProfile }}
+          SPARK_PROFILE: ${{ matrix.sparkProfile }}
+          SPARK_MODULES: ${{ matrix.sparkModules }}
+        run:
+          mvn test -Punit-tests -Pjava17 -D"$SCALA_PROFILE" -D"$SPARK_PROFILE" -Dtest=skipJavaTests $SCALA_TEST_OTHERS_FILTER -Dsurefire.failIfNoSpecifiedTests=false -pl "hudi-common,$SPARK_COMMON_MODULES,$SPARK_MODULES" $MVN_ARGS -Djacoco.skip=false
+      - name: Scala FT - Spark
+        if: needs.changes.outputs.relevant == 'true'
+        env:
+          SCALA_PROFILE: ${{ matrix.scalaProfile }}
+          SPARK_PROFILE: ${{ matrix.sparkProfile }}
+          SPARK_MODULES: ${{ matrix.sparkModules }}
+        run:
+          mvn test -Pfunctional-tests -Pjava17 -D"$SCALA_PROFILE" -D"$SPARK_PROFILE" -Dtest=skipJavaTests $SCALA_TEST_OTHERS_FILTER -Dsurefire.failIfNoSpecifiedTests=false -pl "$SPARK_COMMON_MODULES,$SPARK_MODULES" $MVN_ARGS -Djacoco.skip=false
+      - name: Generate merged coverage report
+        if: always() && needs.changes.outputs.relevant == 'true'
+        run: ./scripts/jacoco/generate_merged_coverage_report.sh $GITHUB_WORKSPACE
+      - name: Upload coverage to Codecov
+        if: always() && needs.changes.outputs.relevant == 'true'
+        uses: codecov/codecov-action@75cd11691c0faa626561e295848008c8a7dddffe # v5
+        with:
+          files: ./jacoco-report.xml
+          disable_search: true
+          flags: spark-scala-tests
+          token: ${{ secrets.CODECOV_TOKEN }}
+
+  # Curated core flow subset (-Pcore-tests) on every Spark version. The full
+  # unit/functional/scala suites above run only on the latest 3.x (spark3.5) and
+  # latest 4.x (spark4.2); these jobs give the other versions a fast critical-path
+  # signal. The core set is excluded from the full suites (excludedGroups=core,
+  # tagsToExclude=SparkSQLCoreFlow), so no test runs twice on a given version.
+  test-spark-core-tests:
+    runs-on: ubuntu-latest
+    needs: changes
+    strategy:
+      matrix:
+        include:
+          - scalaProfile: "scala-2.12"
+            sparkProfile: "spark3.3"
+            sparkModules: "hudi-spark-datasource/hudi-spark3.3.x"
+          - scalaProfile: "scala-2.12"
+            sparkProfile: "spark3.4"
+            sparkModules: "hudi-spark-datasource/hudi-spark3.4.x"
+          - scalaProfile: "scala-2.12"
+            sparkProfile: "spark3.5"
+            sparkModules: "hudi-spark-datasource/hudi-spark3.5.x"
+
+    steps:
+      - if: needs.changes.outputs.relevant == 'true'
+        uses: actions/checkout@v5
+      - name: Set up JDK 11
+        if: needs.changes.outputs.relevant == 'true'
+        uses: actions/setup-java@v5
+        with:
+          java-version: '11'
+          distribution: 'temurin'
+          architecture: x64
+          cache: maven
+      - name: Build Project
+        if: needs.changes.outputs.relevant == 'true'
+        env:
+          SCALA_PROFILE: ${{ matrix.scalaProfile }}
+          SPARK_PROFILE: ${{ matrix.sparkProfile }}
+          SPARK_MODULES: ${{ matrix.sparkModules }}
+        run:
+          mvn clean install -T 2 -D"$SCALA_PROFILE" -D"$SPARK_PROFILE" -DskipTests=true $MVN_ARGS -am -pl "hudi-examples/hudi-examples-spark,$SPARK_COMMON_MODULES,$SPARK_MODULES"
+      - name: Core Tests - Common & Spark
+        if: needs.changes.outputs.relevant == 'true'
+        env:
+          SCALA_PROFILE: ${{ matrix.scalaProfile }}
+          SPARK_PROFILE: ${{ matrix.sparkProfile }}
+          SPARK_MODULES: ${{ matrix.sparkModules }}
+        run:
+          mvn test -Pcore-tests -D"$SCALA_PROFILE" -D"$SPARK_PROFILE" -fae -pl "$SPARK_COMMON_MODULES,$SPARK_MODULES" $MVN_ARGS -Djacoco.skip=false
+      - name: Quickstart Test
+        if: needs.changes.outputs.relevant == 'true'
+        env:
+          SCALA_PROFILE: ${{ matrix.scalaProfile }}
+          SPARK_PROFILE: ${{ matrix.sparkProfile }}
+        run:
+          mvn test -Punit-tests -D"$SCALA_PROFILE" -D"$SPARK_PROFILE" -pl hudi-examples/hudi-examples-spark $MVN_ARGS -Djacoco.skip=false
+      - name: Generate merged coverage report
+        if: always() && needs.changes.outputs.relevant == 'true'
+        run: ./scripts/jacoco/generate_merged_coverage_report.sh $GITHUB_WORKSPACE
+      - name: Upload coverage to Codecov
+        if: always() && needs.changes.outputs.relevant == 'true'
+        uses: codecov/codecov-action@75cd11691c0faa626561e295848008c8a7dddffe # v5
+        with:
+          files: ./jacoco-report.xml
+          disable_search: true
+          flags: spark-core-tests
+          token: ${{ secrets.CODECOV_TOKEN }}
+
+  test-spark-java17-core-tests:
     runs-on: ubuntu-latest
     needs: changes
     strategy:
@@ -980,22 +1033,21 @@ jobs:
           SPARK_MODULES: ${{ matrix.sparkModules }}
         run:
           mvn clean install -T 2 -Pjava17 -D"$SCALA_PROFILE" -D"$SPARK_PROFILE" -DskipTests=true $MVN_ARGS -am -pl "hudi-examples/hudi-examples-spark,hudi-common,$SPARK_COMMON_MODULES,$SPARK_MODULES"
-      - name: Scala UT - Common & Spark
+      - name: Core Tests - Common & Spark
         if: needs.changes.outputs.relevant == 'true'
         env:
           SCALA_PROFILE: ${{ matrix.scalaProfile }}
           SPARK_PROFILE: ${{ matrix.sparkProfile }}
           SPARK_MODULES: ${{ matrix.sparkModules }}
         run:
-          mvn test -Punit-tests -Pjava17 -D"$SCALA_PROFILE" -D"$SPARK_PROFILE" -Dtest=skipJavaTests $SCALA_TEST_OTHERS_FILTER -Dsurefire.failIfNoSpecifiedTests=false -pl "hudi-common,$SPARK_COMMON_MODULES,$SPARK_MODULES" $MVN_ARGS -Djacoco.skip=false
-      - name: Scala FT - Spark
+          mvn test -Pcore-tests -Pjava17 -D"$SCALA_PROFILE" -D"$SPARK_PROFILE" -fae -pl "hudi-common,$SPARK_COMMON_MODULES,$SPARK_MODULES" $MVN_ARGS -Djacoco.skip=false
+      - name: Quickstart Test
         if: needs.changes.outputs.relevant == 'true'
         env:
           SCALA_PROFILE: ${{ matrix.scalaProfile }}
           SPARK_PROFILE: ${{ matrix.sparkProfile }}
-          SPARK_MODULES: ${{ matrix.sparkModules }}
         run:
-          mvn test -Pfunctional-tests -Pjava17 -D"$SCALA_PROFILE" -D"$SPARK_PROFILE" -Dtest=skipJavaTests $SCALA_TEST_OTHERS_FILTER -Dsurefire.failIfNoSpecifiedTests=false -pl "$SPARK_COMMON_MODULES,$SPARK_MODULES" $MVN_ARGS -Djacoco.skip=false
+          mvn test -Punit-tests -Pjava17 -D"$SCALA_PROFILE" -D"$SPARK_PROFILE" -pl hudi-examples/hudi-examples-spark $MVN_ARGS -Djacoco.skip=false
       - name: Generate merged coverage report
         if: always() && needs.changes.outputs.relevant == 'true'
         run: ./scripts/jacoco/generate_merged_coverage_report.sh $GITHUB_WORKSPACE
@@ -1005,7 +1057,7 @@ jobs:
         with:
           files: ./jacoco-report.xml
           disable_search: true
-          flags: spark-scala-tests
+          flags: spark-core-tests
           token: ${{ secrets.CODECOV_TOKEN }}
 
   test-flink-1:

From 4ddbfa20e9fbe220db3b669b58d8565ecb92ca2b Mon Sep 17 00:00:00 2001
From: Y Ethan Guo <ethan.guoyihua@gmail.com>
Date: Fri, 26 Jun 2026 22:12:23 -0700
Subject: [PATCH 3/4] Merge the Java 11 and Java 17 core-test jobs into one
 matrix job

Collapse test-spark-core-tests and test-spark-java17-core-tests into a
single matrix-driven job: each matrix entry carries javaVersion and
mvnProfiles (-Pjava17 for the Java 17 rows, empty for Java 11), so the
JDK setup and mvn invocations are shared. Standardize the module list to
include hudi-common on both tracks (harmless on Java 11). Net: 28 -> 27
jobs, no behavior change to which tests run per version.
---
 .github/workflows/bot.yml | 94 +++++++++++++--------------------------
 1 file changed, 30 insertions(+), 64 deletions(-)

diff --git a/.github/workflows/bot.yml b/.github/workflows/bot.yml
index 1dd83a3ed4ecb..ef2c6d6801f47 100644
--- a/.github/workflows/bot.yml
+++ b/.github/workflows/bot.yml
@@ -649,7 +649,7 @@ jobs:
         include:
           # Full suite runs only on the latest 4.x (Spark 4.2 / Scala 2.13 / Java 17).
           # Spark 3.5/4.0/4.1 on Java 17 run the curated core set in
-          # test-spark-java17-core-tests.
+          # test-spark-core-tests.
           - scalaProfile: "scala-2.13"
             sparkProfile: "spark4.2"
             sparkModules: "hudi-spark-datasource/hudi-spark4.2.x"
@@ -928,11 +928,13 @@ jobs:
           flags: spark-scala-tests
           token: ${{ secrets.CODECOV_TOKEN }}
 
-  # Curated core flow subset (-Pcore-tests) on every Spark version. The full
-  # unit/functional/scala suites above run only on the latest 3.x (spark3.5) and
-  # latest 4.x (spark4.2); these jobs give the other versions a fast critical-path
-  # signal. The core set is excluded from the full suites (excludedGroups=core,
-  # tagsToExclude=SparkSQLCoreFlow), so no test runs twice on a given version.
+  # Curated core flow subset (-Pcore-tests) on every Spark version, for both the
+  # Java 11 / Scala 2.12 and Java 17 / Scala 2.13 tracks in one matrix-driven job.
+  # The full unit/functional/scala suites above run only on the latest 3.x (spark3.5)
+  # and latest 4.x (spark4.2); these entries give the other versions a fast
+  # critical-path signal. The core set is excluded from the full suites
+  # (excludedGroups=core, tagsToExclude=SparkSQLCoreFlow), so no test runs twice.
+  # The per-entry mvnProfiles carries -Pjava17 for the Java 17 rows (empty for Java 11).
   test-spark-core-tests:
     runs-on: ubuntu-latest
     needs: changes
@@ -942,86 +944,47 @@ jobs:
           - scalaProfile: "scala-2.12"
             sparkProfile: "spark3.3"
             sparkModules: "hudi-spark-datasource/hudi-spark3.3.x"
+            javaVersion: "11"
+            mvnProfiles: ""
           - scalaProfile: "scala-2.12"
             sparkProfile: "spark3.4"
             sparkModules: "hudi-spark-datasource/hudi-spark3.4.x"
+            javaVersion: "11"
+            mvnProfiles: ""
           - scalaProfile: "scala-2.12"
             sparkProfile: "spark3.5"
             sparkModules: "hudi-spark-datasource/hudi-spark3.5.x"
-
-    steps:
-      - if: needs.changes.outputs.relevant == 'true'
-        uses: actions/checkout@v5
-      - name: Set up JDK 11
-        if: needs.changes.outputs.relevant == 'true'
-        uses: actions/setup-java@v5
-        with:
-          java-version: '11'
-          distribution: 'temurin'
-          architecture: x64
-          cache: maven
-      - name: Build Project
-        if: needs.changes.outputs.relevant == 'true'
-        env:
-          SCALA_PROFILE: ${{ matrix.scalaProfile }}
-          SPARK_PROFILE: ${{ matrix.sparkProfile }}
-          SPARK_MODULES: ${{ matrix.sparkModules }}
-        run:
-          mvn clean install -T 2 -D"$SCALA_PROFILE" -D"$SPARK_PROFILE" -DskipTests=true $MVN_ARGS -am -pl "hudi-examples/hudi-examples-spark,$SPARK_COMMON_MODULES,$SPARK_MODULES"
-      - name: Core Tests - Common & Spark
-        if: needs.changes.outputs.relevant == 'true'
-        env:
-          SCALA_PROFILE: ${{ matrix.scalaProfile }}
-          SPARK_PROFILE: ${{ matrix.sparkProfile }}
-          SPARK_MODULES: ${{ matrix.sparkModules }}
-        run:
-          mvn test -Pcore-tests -D"$SCALA_PROFILE" -D"$SPARK_PROFILE" -fae -pl "$SPARK_COMMON_MODULES,$SPARK_MODULES" $MVN_ARGS -Djacoco.skip=false
-      - name: Quickstart Test
-        if: needs.changes.outputs.relevant == 'true'
-        env:
-          SCALA_PROFILE: ${{ matrix.scalaProfile }}
-          SPARK_PROFILE: ${{ matrix.sparkProfile }}
-        run:
-          mvn test -Punit-tests -D"$SCALA_PROFILE" -D"$SPARK_PROFILE" -pl hudi-examples/hudi-examples-spark $MVN_ARGS -Djacoco.skip=false
-      - name: Generate merged coverage report
-        if: always() && needs.changes.outputs.relevant == 'true'
-        run: ./scripts/jacoco/generate_merged_coverage_report.sh $GITHUB_WORKSPACE
-      - name: Upload coverage to Codecov
-        if: always() && needs.changes.outputs.relevant == 'true'
-        uses: codecov/codecov-action@75cd11691c0faa626561e295848008c8a7dddffe # v5
-        with:
-          files: ./jacoco-report.xml
-          disable_search: true
-          flags: spark-core-tests
-          token: ${{ secrets.CODECOV_TOKEN }}
-
-  test-spark-java17-core-tests:
-    runs-on: ubuntu-latest
-    needs: changes
-    strategy:
-      matrix:
-        include:
+            javaVersion: "11"
+            mvnProfiles: ""
           - scalaProfile: "scala-2.13"
             sparkProfile: "spark3.5"
             sparkModules: "hudi-spark-datasource/hudi-spark3.5.x"
+            javaVersion: "17"
+            mvnProfiles: "-Pjava17"
           - scalaProfile: "scala-2.13"
             sparkProfile: "spark4.0"
             sparkModules: "hudi-spark-datasource/hudi-spark4.0.x"
+            javaVersion: "17"
+            mvnProfiles: "-Pjava17"
           - scalaProfile: "scala-2.13"
             sparkProfile: "spark4.1"
             sparkModules: "hudi-spark-datasource/hudi-spark4.1.x"
+            javaVersion: "17"
+            mvnProfiles: "-Pjava17"
           - scalaProfile: "scala-2.13"
             sparkProfile: "spark4.2"
             sparkModules: "hudi-spark-datasource/hudi-spark4.2.x"
+            javaVersion: "17"
+            mvnProfiles: "-Pjava17"
 
     steps:
       - if: needs.changes.outputs.relevant == 'true'
         uses: actions/checkout@v5
-      - name: Set up JDK 17
+      - name: Set up JDK ${{ matrix.javaVersion }}
         if: needs.changes.outputs.relevant == 'true'
         uses: actions/setup-java@v5
         with:
-          java-version: '17'
+          java-version: "${{ matrix.javaVersion }}"
           distribution: 'temurin'
           architecture: x64
           cache: maven
@@ -1031,23 +994,26 @@ jobs:
           SCALA_PROFILE: ${{ matrix.scalaProfile }}
           SPARK_PROFILE: ${{ matrix.sparkProfile }}
           SPARK_MODULES: ${{ matrix.sparkModules }}
+          MVN_PROFILES: ${{ matrix.mvnProfiles }}
         run:
-          mvn clean install -T 2 -Pjava17 -D"$SCALA_PROFILE" -D"$SPARK_PROFILE" -DskipTests=true $MVN_ARGS -am -pl "hudi-examples/hudi-examples-spark,hudi-common,$SPARK_COMMON_MODULES,$SPARK_MODULES"
+          mvn clean install -T 2 $MVN_PROFILES -D"$SCALA_PROFILE" -D"$SPARK_PROFILE" -DskipTests=true $MVN_ARGS -am -pl "hudi-examples/hudi-examples-spark,hudi-common,$SPARK_COMMON_MODULES,$SPARK_MODULES"
       - name: Core Tests - Common & Spark
         if: needs.changes.outputs.relevant == 'true'
         env:
           SCALA_PROFILE: ${{ matrix.scalaProfile }}
           SPARK_PROFILE: ${{ matrix.sparkProfile }}
           SPARK_MODULES: ${{ matrix.sparkModules }}
+          MVN_PROFILES: ${{ matrix.mvnProfiles }}
         run:
-          mvn test -Pcore-tests -Pjava17 -D"$SCALA_PROFILE" -D"$SPARK_PROFILE" -fae -pl "hudi-common,$SPARK_COMMON_MODULES,$SPARK_MODULES" $MVN_ARGS -Djacoco.skip=false
+          mvn test -Pcore-tests $MVN_PROFILES -D"$SCALA_PROFILE" -D"$SPARK_PROFILE" -fae -pl "hudi-common,$SPARK_COMMON_MODULES,$SPARK_MODULES" $MVN_ARGS -Djacoco.skip=false
       - name: Quickstart Test
         if: needs.changes.outputs.relevant == 'true'
         env:
           SCALA_PROFILE: ${{ matrix.scalaProfile }}
           SPARK_PROFILE: ${{ matrix.sparkProfile }}
+          MVN_PROFILES: ${{ matrix.mvnProfiles }}
         run:
-          mvn test -Punit-tests -Pjava17 -D"$SCALA_PROFILE" -D"$SPARK_PROFILE" -pl hudi-examples/hudi-examples-spark $MVN_ARGS -Djacoco.skip=false
+          mvn test -Punit-tests $MVN_PROFILES -D"$SCALA_PROFILE" -D"$SPARK_PROFILE" -pl hudi-examples/hudi-examples-spark $MVN_ARGS -Djacoco.skip=false
       - name: Generate merged coverage report
         if: always() && needs.changes.outputs.relevant == 'true'
         run: ./scripts/jacoco/generate_merged_coverage_report.sh $GITHUB_WORKSPACE

From 3c9220a38a7f5554d132d9a58f367eb6b5bc141e Mon Sep 17 00:00:00 2001
From: Y Ethan Guo <ethan.guoyihua@gmail.com>
Date: Sat, 27 Jun 2026 11:08:00 -0700
Subject: [PATCH 4/4] Show only scala/spark/java in the core-tests job name

Set an explicit name template on test-spark-core-tests so the check name
shows just scalaProfile, sparkProfile, and javaVersion, hiding the
sparkModules and mvnProfiles matrix fields.
---
 .github/workflows/bot.yml | 1 +
 1 file changed, 1 insertion(+)

diff --git a/.github/workflows/bot.yml b/.github/workflows/bot.yml
index ef2c6d6801f47..11ec387bb72be 100644
--- a/.github/workflows/bot.yml
+++ b/.github/workflows/bot.yml
@@ -936,6 +936,7 @@ jobs:
   # (excludedGroups=core, tagsToExclude=SparkSQLCoreFlow), so no test runs twice.
   # The per-entry mvnProfiles carries -Pjava17 for the Java 17 rows (empty for Java 11).
   test-spark-core-tests:
+    name: test-spark-core-tests (${{ matrix.scalaProfile }}, ${{ matrix.sparkProfile }}, java${{ matrix.javaVersion }})
     runs-on: ubuntu-latest
     needs: changes
     strategy: