From 05b828bfce8782b22417397e869acf9eca58ca2d Mon Sep 17 00:00:00 2001 From: jackylee-ch Date: Thu, 23 Jan 2025 15:29:48 +0800 Subject: [PATCH] [DNM] Bump Spark to 3.5.4 --- .../workflows/util/install_spark_resources.sh | 20 ++++++++--------- .github/workflows/velox_backend.yml | 22 +++++++++---------- docs/get-started/Velox.md | 4 ++-- docs/get-started/build-guide.md | 2 +- pom.xml | 2 +- tools/gluten-it/pom.xml | 2 +- 6 files changed, 26 insertions(+), 26 deletions(-) diff --git a/.github/workflows/util/install_spark_resources.sh b/.github/workflows/util/install_spark_resources.sh index ad454f601a1c..48c07a0b4119 100755 --- a/.github/workflows/util/install_spark_resources.sh +++ b/.github/workflows/util/install_spark_resources.sh @@ -63,26 +63,26 @@ case "$1" in 3.5) # Spark-3.5 cd ${INSTALL_DIR} && \ - wget -nv https://archive.apache.org/dist/spark/spark-3.5.2/spark-3.5.2-bin-hadoop3.tgz && \ - tar --strip-components=1 -xf spark-3.5.2-bin-hadoop3.tgz spark-3.5.2-bin-hadoop3/jars/ && \ - rm -rf spark-3.5.2-bin-hadoop3.tgz && \ + wget -nv https://archive.apache.org/dist/spark/spark-3.5.4/spark-3.5.4-bin-hadoop3.tgz && \ + tar --strip-components=1 -xf spark-3.5.4-bin-hadoop3.tgz spark-3.5.4-bin-hadoop3/jars/ && \ + rm -rf spark-3.5.4-bin-hadoop3.tgz && \ mkdir -p ${INSTALL_DIR}/shims/spark35/spark_home/assembly/target/scala-2.12 && \ mv jars ${INSTALL_DIR}/shims/spark35/spark_home/assembly/target/scala-2.12 && \ - wget -nv https://github.com/apache/spark/archive/refs/tags/v3.5.2.tar.gz && \ - tar --strip-components=1 -xf v3.5.2.tar.gz spark-3.5.2/sql/core/src/test/resources/ && \ + wget -nv https://github.com/apache/spark/archive/refs/tags/v3.5.4.tar.gz && \ + tar --strip-components=1 -xf v3.5.4.tar.gz spark-3.5.4/sql/core/src/test/resources/ && \ mkdir -p shims/spark35/spark_home/ && \ mv sql shims/spark35/spark_home/ ;; 3.5-scala2.13) # Spark-3.5, scala 2.13 cd ${INSTALL_DIR} && \ - wget -nv https://archive.apache.org/dist/spark/spark-3.5.2/spark-3.5.2-bin-hadoop3.tgz && \ - tar --strip-components=1 -xf spark-3.5.2-bin-hadoop3.tgz spark-3.5.2-bin-hadoop3/jars/ && \ - rm -rf spark-3.5.2-bin-hadoop3.tgz && \ + wget -nv https://archive.apache.org/dist/spark/spark-3.5.4/spark-3.5.4-bin-hadoop3.tgz && \ + tar --strip-components=1 -xf spark-3.5.4-bin-hadoop3.tgz spark-3.5.4-bin-hadoop3/jars/ && \ + rm -rf spark-3.5.4-bin-hadoop3.tgz && \ mkdir -p ${INSTALL_DIR}/shims/spark35-scala2.13/spark_home/assembly/target/scala-2.13 && \ mv jars ${INSTALL_DIR}/shims/spark35-scala2.13/spark_home/assembly/target/scala-2.13 && \ - wget -nv https://github.com/apache/spark/archive/refs/tags/v3.5.2.tar.gz && \ - tar --strip-components=1 -xf v3.5.2.tar.gz spark-3.5.2/sql/core/src/test/resources/ && \ + wget -nv https://github.com/apache/spark/archive/refs/tags/v3.5.4.tar.gz && \ + tar --strip-components=1 -xf v3.5.4.tar.gz spark-3.5.4/sql/core/src/test/resources/ && \ mkdir -p shims/spark35-scala2.13/spark_home/ && \ mv sql shims/spark35-scala2.13/spark_home/ ;; diff --git a/.github/workflows/velox_backend.yml b/.github/workflows/velox_backend.yml index 12ddc107679a..78988d059363 100644 --- a/.github/workflows/velox_backend.yml +++ b/.github/workflows/velox_backend.yml @@ -855,9 +855,9 @@ jobs: dnf module -y install python39 && \ alternatives --set python3 /usr/bin/python3.9 && \ pip3 install setuptools && \ - pip3 install pyspark==3.5.2 cython && \ + pip3 install pyspark==3.5.4 cython && \ pip3 install pandas pyarrow - - name: Build and Run unit test for Spark 3.5.2 (other tests) + - name: Build and Run unit test for Spark 3.5.4 (other tests) run: | cd $GITHUB_WORKSPACE/ export SPARK_SCALA_VERSION=2.12 @@ -898,9 +898,9 @@ jobs: dnf module -y install python39 && \ alternatives --set python3 /usr/bin/python3.9 && \ pip3 install setuptools && \ - pip3 install pyspark==3.5.2 cython && \ + pip3 install pyspark==3.5.4 cython && \ pip3 install pandas pyarrow - - name: Build and Run unit test for Spark 3.5.2 with scala-2.13 (other tests) + - name: Build and Run unit test for Spark 3.5.4 with scala-2.13 (other tests) run: | cd $GITHUB_WORKSPACE/ export SPARK_SCALA_VERSION=2.13 @@ -930,7 +930,7 @@ jobs: with: name: arrow-jars-centos-7-${{github.sha}} path: /root/.m2/repository/org/apache/arrow/ - - name: Build and Run unit test for Spark 3.5.2 (slow tests) + - name: Build and Run unit test for Spark 3.5.4 (slow tests) run: | cd $GITHUB_WORKSPACE/ $MVN_CMD clean test -Pspark-3.5 -Pbackends-velox -Pceleborn -Piceberg -Pdelta -Phudi -Pspark-ut \ @@ -964,9 +964,9 @@ jobs: dnf module -y install python39 && \ alternatives --set python3 /usr/bin/python3.9 && \ pip3 install setuptools && \ - pip3 install pyspark==3.5.2 cython && \ + pip3 install pyspark==3.5.4 cython && \ pip3 install pandas pyarrow - - name: Build and Run unit test for Spark 3.5.2 (other tests) + - name: Build and Run unit test for Spark 3.5.4 (other tests) run: | cd $GITHUB_WORKSPACE/ export SPARK_SCALA_VERSION=2.12 @@ -995,7 +995,7 @@ jobs: with: name: arrow-jars-centos-7-${{github.sha}} path: /root/.m2/repository/org/apache/arrow/ - - name: Build and Run unit test for Spark 3.5.2 (slow tests) + - name: Build and Run unit test for Spark 3.5.4 (slow tests) run: | cd $GITHUB_WORKSPACE/ $MVN_CMD clean test -Pspark-3.5 -Pbackends-velox -Pceleborn -Piceberg -Pdelta -Pspark-ut \ @@ -1028,9 +1028,9 @@ jobs: dnf module -y install python39 && \ alternatives --set python3 /usr/bin/python3.9 && \ pip3 install setuptools && \ - pip3 install pyspark==3.5.2 cython && \ + pip3 install pyspark==3.5.4 cython && \ pip3 install pandas pyarrow - - name: Build and Run unit test for Spark 3.5.2 (other tests) + - name: Build and Run unit test for Spark 3.5.4 (other tests) run: | cd $GITHUB_WORKSPACE/ export SPARK_SCALA_VERSION=2.12 @@ -1059,7 +1059,7 @@ jobs: with: name: arrow-jars-centos-7-${{github.sha}} path: /root/.m2/repository/org/apache/arrow/ - - name: Build and Run unit test for Spark 3.5.2 (slow tests) + - name: Build and Run unit test for Spark 3.5.4 (slow tests) run: | cd $GITHUB_WORKSPACE/ $MVN_CMD clean test -Pspark-3.5 -Pbackends-velox -Pceleborn -Piceberg -Pdelta -Pspark-ut \ diff --git a/docs/get-started/Velox.md b/docs/get-started/Velox.md index 0371ea6168a5..76182e46913a 100644 --- a/docs/get-started/Velox.md +++ b/docs/get-started/Velox.md @@ -9,7 +9,7 @@ parent: Getting-Started | Type | Version | |-------|------------------------------| -| Spark | 3.2.2, 3.3.1, 3.4.4, 3.5.2 | +| Spark | 3.2.2, 3.3.1, 3.4.4, 3.5.4 | | OS | Ubuntu20.04/22.04, Centos7/8 | | jdk | openjdk8/jdk17 | | scala | 2.12 | @@ -18,7 +18,7 @@ parent: Getting-Started Currently, with static build Gluten+Velox backend supports all the Linux OSes, but is only tested on **Ubuntu20.04/Ubuntu22.04/Centos7/Centos8**. With dynamic build, Gluten+Velox backend support **Ubuntu20.04/Ubuntu22.04/Centos7/Centos8** and their variants. -Currently, the officially supported Spark versions are 3.2.2, 3.3.1, 3.4.4 and 3.5.2. +Currently, the officially supported Spark versions are 3.2.2, 3.3.1, 3.4.4 and 3.5.4. We need to set up the `JAVA_HOME` env. Currently, Gluten supports **java 8** and **java 17**. diff --git a/docs/get-started/build-guide.md b/docs/get-started/build-guide.md index 45556733490d..6cf816bc63fa 100644 --- a/docs/get-started/build-guide.md +++ b/docs/get-started/build-guide.md @@ -74,4 +74,4 @@ It's name pattern is `gluten--bundle-spark_< | 3.2.2 | 3.2 | 2.12 | | 3.3.1 | 3.3 | 2.12 | | 3.4.4 | 3.4 | 2.12 | -| 3.5.2 | 3.5 | 2.12 | +| 3.5.4 | 3.5 | 2.12 | diff --git a/pom.xml b/pom.xml index d103d25dc5f2..c98dcd20f43b 100644 --- a/pom.xml +++ b/pom.xml @@ -336,7 +336,7 @@ 3.5 spark-sql-columnar-shims-spark35 - 3.5.2 + 3.5.4 1.5.0 delta-spark 3.2.0 diff --git a/tools/gluten-it/pom.xml b/tools/gluten-it/pom.xml index 22256bd281ec..4d65f3221a98 100644 --- a/tools/gluten-it/pom.xml +++ b/tools/gluten-it/pom.xml @@ -170,7 +170,7 @@ spark-3.5 - 3.5.2 + 3.5.4 2.12.18