Skip to content

Commit d6d97d1

Browse files
authored
Spark: Remove Spark 3.3 support (#12279)
1 parent 4c1dec7 commit d6d97d1

File tree

511 files changed

+18
-110385
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

511 files changed

+18
-110385
lines changed

.github/workflows/publish-snapshot.yml

+1-1
Original file line numberDiff line numberDiff line change
@@ -41,4 +41,4 @@ jobs:
4141
- run: |
4242
./gradlew printVersion
4343
./gradlew -DallModules publishApachePublicationToMavenRepository -PmavenUser=${{ secrets.NEXUS_USER }} -PmavenPassword=${{ secrets.NEXUS_PW }}
44-
./gradlew -DflinkVersions= -DsparkVersions=3.3,3.4,3.5 -DscalaVersion=2.13 -DkafkaVersions=3 publishApachePublicationToMavenRepository -PmavenUser=${{ secrets.NEXUS_USER }} -PmavenPassword=${{ secrets.NEXUS_PW }}
44+
./gradlew -DflinkVersions= -DsparkVersions=3.4,3.5 -DscalaVersion=2.13 -DkafkaVersions=3 publishApachePublicationToMavenRepository -PmavenUser=${{ secrets.NEXUS_USER }} -PmavenPassword=${{ secrets.NEXUS_PW }}

.github/workflows/spark-ci.yml

+1-3
Original file line numberDiff line numberDiff line change
@@ -70,13 +70,11 @@ jobs:
7070
strategy:
7171
matrix:
7272
jvm: [11, 17, 21]
73-
spark: ['3.3', '3.4', '3.5']
73+
spark: ['3.4', '3.5']
7474
scala: ['2.12', '2.13']
7575
exclude:
7676
# Spark 3.5 is the first version not failing on Java 21 (https://issues.apache.org/jira/browse/SPARK-42369)
7777
# Full Java 21 support is coming in Spark 4 (https://issues.apache.org/jira/browse/SPARK-43831)
78-
- jvm: 21
79-
spark: '3.3'
8078
- jvm: 21
8179
spark: '3.4'
8280
env:

.gitignore

-2
Original file line numberDiff line numberDiff line change
@@ -31,8 +31,6 @@ site/docs/.asf.yaml
3131
site/docs/javadoc/
3232

3333
# benchmark output
34-
spark/v3.3/spark/benchmark/*
35-
spark/v3.3/spark-extensions/benchmark/*
3634
spark/v3.4/spark/benchmark/*
3735
spark/v3.4/spark-extensions/benchmark/*
3836
spark/v3.5/spark/benchmark/*

build.gradle

-1
Original file line numberDiff line numberDiff line change
@@ -1080,7 +1080,6 @@ project(':iceberg-bom') {
10801080
// the BOM references the artifacts for all Scala versions.
10811081
def sparkScalaPattern = ~"(.*)-([0-9][.][0-9]+)_([0-9][.][0-9]+)"
10821082
def sparkScalaVersions = [
1083-
"3.3": ["2.12", "2.13"],
10841083
"3.4": ["2.12", "2.13"],
10851084
"3.5": ["2.12", "2.13"],
10861085
]

dev/stage-binaries.sh

+1-2
Original file line numberDiff line numberDiff line change
@@ -20,14 +20,13 @@
2020

2121
SCALA_VERSION=2.12
2222
FLINK_VERSIONS=1.18,1.19,1.20
23-
SPARK_VERSIONS=3.3,3.4,3.5
23+
SPARK_VERSIONS=3.4,3.5
2424
KAFKA_VERSIONS=3
2525

2626
./gradlew -Prelease -DscalaVersion=$SCALA_VERSION -DflinkVersions=$FLINK_VERSIONS -DsparkVersions=$SPARK_VERSIONS -DkafkaVersions=$KAFKA_VERSIONS publishApachePublicationToMavenRepository
2727

2828
# Also publish Scala 2.13 Artifacts for versions that support it.
2929
# Flink does not yet support 2.13 (and is largely dropping a user-facing dependency on Scala). Hive doesn't need a Scala specification.
30-
./gradlew -Prelease -DscalaVersion=2.13 -DsparkVersions=3.3 :iceberg-spark:iceberg-spark-3.3_2.13:publishApachePublicationToMavenRepository :iceberg-spark:iceberg-spark-extensions-3.3_2.13:publishApachePublicationToMavenRepository :iceberg-spark:iceberg-spark-runtime-3.3_2.13:publishApachePublicationToMavenRepository
3130
./gradlew -Prelease -DscalaVersion=2.13 -DsparkVersions=3.4 :iceberg-spark:iceberg-spark-3.4_2.13:publishApachePublicationToMavenRepository :iceberg-spark:iceberg-spark-extensions-3.4_2.13:publishApachePublicationToMavenRepository :iceberg-spark:iceberg-spark-runtime-3.4_2.13:publishApachePublicationToMavenRepository
3231
./gradlew -Prelease -DscalaVersion=2.13 -DsparkVersions=3.5 :iceberg-spark:iceberg-spark-3.5_2.13:publishApachePublicationToMavenRepository :iceberg-spark:iceberg-spark-extensions-3.5_2.13:publishApachePublicationToMavenRepository :iceberg-spark:iceberg-spark-runtime-3.5_2.13:publishApachePublicationToMavenRepository
3332

docs/docs/aws.md

+9-9
Original file line numberDiff line numberDiff line change
@@ -440,7 +440,7 @@ This is turned off by default.
440440
### S3 Tags
441441

442442
Custom [tags](https://docs.aws.amazon.com/AmazonS3/latest/userguide/object-tagging.html) can be added to S3 objects while writing and deleting.
443-
For example, to write S3 tags with Spark 3.3, you can start the Spark SQL shell with:
443+
For example, to write S3 tags with Spark 3.5, you can start the Spark SQL shell with:
444444
```
445445
spark-sql --conf spark.sql.catalog.my_catalog=org.apache.iceberg.spark.SparkCatalog \
446446
--conf spark.sql.catalog.my_catalog.warehouse=s3://my-bucket/my/key/prefix \
@@ -457,7 +457,7 @@ The property is set to `true` by default.
457457

458458
With the `s3.delete.tags` config, objects are tagged with the configured key-value pairs before deletion.
459459
Users can configure tag-based object lifecycle policy at bucket level to transition objects to different tiers.
460-
For example, to add S3 delete tags with Spark 3.3, you can start the Spark SQL shell with:
460+
For example, to add S3 delete tags with Spark 3.5, you can start the Spark SQL shell with:
461461

462462
```
463463
sh spark-sql --conf spark.sql.catalog.my_catalog=org.apache.iceberg.spark.SparkCatalog \
@@ -473,7 +473,7 @@ Users can also use the catalog property `s3.delete.num-threads` to mention the n
473473

474474
When the catalog property `s3.write.table-tag-enabled` and `s3.write.namespace-tag-enabled` is set to `true` then the objects in S3 will be saved with tags: `iceberg.table=<table-name>` and `iceberg.namespace=<namespace-name>`.
475475
Users can define access and data retention policy per namespace or table based on these tags.
476-
For example, to write table and namespace name as S3 tags with Spark 3.3, you can start the Spark SQL shell with:
476+
For example, to write table and namespace name as S3 tags with Spark 3.5, you can start the Spark SQL shell with:
477477
```
478478
sh spark-sql --conf spark.sql.catalog.my_catalog=org.apache.iceberg.spark.SparkCatalog \
479479
--conf spark.sql.catalog.my_catalog.warehouse=s3://iceberg-warehouse/s3-tagging \
@@ -493,7 +493,7 @@ disaster recovery, etc.
493493
For using cross-region access points, we need to additionally set `use-arn-region-enabled` catalog property to
494494
`true` to enable `S3FileIO` to make cross-region calls, it's not required for same / multi-region access points.
495495

496-
For example, to use S3 access-point with Spark 3.3, you can start the Spark SQL shell with:
496+
For example, to use S3 access-point with Spark 3.5, you can start the Spark SQL shell with:
497497
```
498498
spark-sql --conf spark.sql.catalog.my_catalog=org.apache.iceberg.spark.SparkCatalog \
499499
--conf spark.sql.catalog.my_catalog.warehouse=s3://my-bucket2/my/key/prefix \
@@ -520,7 +520,7 @@ you to fallback to using your IAM role (and its permission sets directly) to acc
520520
is unable to authorize your S3 call. This can be done using the `s3.access-grants.fallback-to-iam` boolean catalog property. By default,
521521
this property is set to `false`.
522522

523-
For example, to add the S3 Access Grants Integration with Spark 3.3, you can start the Spark SQL shell with:
523+
For example, to add the S3 Access Grants Integration with Spark 3.5, you can start the Spark SQL shell with:
524524
```
525525
spark-sql --conf spark.sql.catalog.my_catalog=org.apache.iceberg.spark.SparkCatalog \
526526
--conf spark.sql.catalog.my_catalog.warehouse=s3://my-bucket2/my/key/prefix \
@@ -537,7 +537,7 @@ For more details on using S3 Access Grants, please refer to [Managing access wit
537537
S3 Cross-Region bucket access can be turned on by setting catalog property `s3.cross-region-access-enabled` to `true`.
538538
This is turned off by default to avoid first S3 API call increased latency.
539539

540-
For example, to enable S3 Cross-Region bucket access with Spark 3.3, you can start the Spark SQL shell with:
540+
For example, to enable S3 Cross-Region bucket access with Spark 3.5, you can start the Spark SQL shell with:
541541
```
542542
spark-sql --conf spark.sql.catalog.my_catalog=org.apache.iceberg.spark.SparkCatalog \
543543
--conf spark.sql.catalog.my_catalog.warehouse=s3://my-bucket2/my/key/prefix \
@@ -554,7 +554,7 @@ For more details, please refer to [Cross-Region access for Amazon S3](https://do
554554

555555
To use S3 Acceleration, we need to set `s3.acceleration-enabled` catalog property to `true` to enable `S3FileIO` to make accelerated S3 calls.
556556

557-
For example, to use S3 Acceleration with Spark 3.3, you can start the Spark SQL shell with:
557+
For example, to use S3 Acceleration with Spark 3.5, you can start the Spark SQL shell with:
558558
```
559559
spark-sql --conf spark.sql.catalog.my_catalog=org.apache.iceberg.spark.SparkCatalog \
560560
--conf spark.sql.catalog.my_catalog.warehouse=s3://my-bucket2/my/key/prefix \
@@ -572,7 +572,7 @@ When clients request a dual-stack endpoint, the bucket URL resolves to an IPv6 a
572572

573573
To use S3 Dual-stack, we need to set `s3.dualstack-enabled` catalog property to `true` to enable `S3FileIO` to make dual-stack S3 calls.
574574

575-
For example, to use S3 Dual-stack with Spark 3.3, you can start the Spark SQL shell with:
575+
For example, to use S3 Dual-stack with Spark 3.5, you can start the Spark SQL shell with:
576576
```
577577
spark-sql --conf spark.sql.catalog.my_catalog=org.apache.iceberg.spark.SparkCatalog \
578578
--conf spark.sql.catalog.my_catalog.warehouse=s3://my-bucket2/my/key/prefix \
@@ -698,7 +698,7 @@ LIB_PATH=/usr/share/aws/aws-java-sdk/
698698

699699

700700
ICEBERG_PACKAGES=(
701-
"iceberg-spark-runtime-3.3_2.12"
701+
"iceberg-spark-runtime-3.5_2.12"
702702
"iceberg-flink-runtime"
703703
"iceberg-aws-bundle"
704704
)

docs/docs/nessie.md

+4-4
Original file line numberDiff line numberDiff line change
@@ -33,17 +33,17 @@ See [Project Nessie](https://projectnessie.org) for more information on Nessie.
3333
## Enabling Nessie Catalog
3434

3535
The `iceberg-nessie` module is bundled with Spark and Flink runtimes for all versions from `0.11.0`. To get started
36-
with Nessie (with spark-3.3) and Iceberg simply add the Iceberg runtime to your process. Eg: `spark-sql --packages
37-
org.apache.iceberg:iceberg-spark-runtime-3.3_2.12:{{ icebergVersion }}`.
36+
with Nessie (with spark-3.5) and Iceberg simply add the Iceberg runtime to your process. Eg: `spark-sql --packages
37+
org.apache.iceberg:iceberg-spark-runtime-3.5_2.12:{{ icebergVersion }}`.
3838

3939
## Spark SQL Extensions
4040

4141
Nessie SQL extensions can be used to manage the Nessie repo as shown below.
42-
Example for Spark 3.3 with scala 2.12:
42+
Example for Spark 3.5 with scala 2.12:
4343

4444
```
4545
bin/spark-sql
46-
--packages "org.apache.iceberg:iceberg-spark-runtime-3.3_2.12:{{ icebergVersion }},org.projectnessie.nessie-integrations:nessie-spark-extensions-3.3_2.12:{{ nessieVersion }}"
46+
--packages "org.apache.iceberg:iceberg-spark-runtime-3.5_2.12:{{ icebergVersion }},org.projectnessie.nessie-integrations:nessie-spark-extensions-3.5_2.12:{{ nessieVersion }}"
4747
--conf spark.sql.extensions="org.apache.iceberg.spark.extensions.IcebergSparkSessionExtensions,org.projectnessie.spark.extensions.NessieSparkSessionExtensions"
4848
--conf <other settings>
4949
```

gradle.properties

+1-1
Original file line numberDiff line numberDiff line change
@@ -19,7 +19,7 @@ jmhIncludeRegex=.*
1919
systemProp.defaultFlinkVersions=1.20
2020
systemProp.knownFlinkVersions=1.18,1.19,1.20
2121
systemProp.defaultSparkVersions=3.5
22-
systemProp.knownSparkVersions=3.3,3.4,3.5
22+
systemProp.knownSparkVersions=3.4,3.5
2323
systemProp.defaultKafkaVersions=3
2424
systemProp.knownKafkaVersions=3
2525
systemProp.defaultScalaVersion=2.12

gradle/libs.versions.toml

-1
Original file line numberDiff line numberDiff line change
@@ -78,7 +78,6 @@ roaringbitmap = "1.3.0"
7878
scala-collection-compat = "2.13.0"
7979
slf4j = "2.0.16"
8080
snowflake-jdbc = "3.22.0"
81-
spark-hive33 = "3.3.4"
8281
spark-hive34 = "3.4.4"
8382
spark-hive35 = "3.5.4"
8483
sqlite-jdbc = "3.49.0.0"

jmh.gradle

-5
Original file line numberDiff line numberDiff line change
@@ -38,11 +38,6 @@ if (flinkVersions.contains("1.20")) {
3838
jmhProjects.add(project(":iceberg-flink:iceberg-flink-1.20"))
3939
}
4040

41-
if (sparkVersions.contains("3.3")) {
42-
jmhProjects.add(project(":iceberg-spark:iceberg-spark-3.3_${scalaVersion}"))
43-
jmhProjects.add(project(":iceberg-spark:iceberg-spark-extensions-3.3_${scalaVersion}"))
44-
}
45-
4641
if (sparkVersions.contains("3.4")) {
4742
jmhProjects.add(project(":iceberg-spark:iceberg-spark-3.4_${scalaVersion}"))
4843
jmhProjects.add(project(":iceberg-spark:iceberg-spark-extensions-3.4_${scalaVersion}"))

settings.gradle

-12
Original file line numberDiff line numberDiff line change
@@ -137,18 +137,6 @@ if (flinkVersions.contains("1.20")) {
137137
project(":iceberg-flink:flink-runtime-1.20").name = "iceberg-flink-runtime-1.20"
138138
}
139139

140-
if (sparkVersions.contains("3.3")) {
141-
include ":iceberg-spark:spark-3.3_${scalaVersion}"
142-
include ":iceberg-spark:spark-extensions-3.3_${scalaVersion}"
143-
include ":iceberg-spark:spark-runtime-3.3_${scalaVersion}"
144-
project(":iceberg-spark:spark-3.3_${scalaVersion}").projectDir = file('spark/v3.3/spark')
145-
project(":iceberg-spark:spark-3.3_${scalaVersion}").name = "iceberg-spark-3.3_${scalaVersion}"
146-
project(":iceberg-spark:spark-extensions-3.3_${scalaVersion}").projectDir = file('spark/v3.3/spark-extensions')
147-
project(":iceberg-spark:spark-extensions-3.3_${scalaVersion}").name = "iceberg-spark-extensions-3.3_${scalaVersion}"
148-
project(":iceberg-spark:spark-runtime-3.3_${scalaVersion}").projectDir = file('spark/v3.3/spark-runtime')
149-
project(":iceberg-spark:spark-runtime-3.3_${scalaVersion}").name = "iceberg-spark-runtime-3.3_${scalaVersion}"
150-
}
151-
152140
if (sparkVersions.contains("3.4")) {
153141
include ":iceberg-spark:spark-3.4_${scalaVersion}"
154142
include ":iceberg-spark:spark-extensions-3.4_${scalaVersion}"

site/docs/multi-engine-support.md

+1-1
Original file line numberDiff line numberDiff line change
@@ -67,7 +67,7 @@ Each engine version undergoes the following lifecycle stages:
6767
| 3.0 | End of Life | 0.9.0 | 1.0.0 | [iceberg-spark-runtime-3.0_2.12](https://search.maven.org/remotecontent?filepath=org/apache/iceberg/iceberg-spark-runtime-3.0_2.12/1.0.0/iceberg-spark-runtime-3.0_2.12-1.0.0.jar) |
6868
| 3.1 | End of Life | 0.12.0 | 1.3.1 | [iceberg-spark-runtime-3.1_2.12](https://search.maven.org/remotecontent?filepath=org/apache/iceberg/iceberg-spark-runtime-3.1_2.12/1.3.1/iceberg-spark-runtime-3.1_2.12-1.3.1.jar) [1] |
6969
| 3.2 | End of Life | 0.13.0 | 1.4.3 | [iceberg-spark-runtime-3.2_2.12](https://search.maven.org/remotecontent?filepath=org/apache/iceberg/iceberg-spark-runtime-3.2_2.12/1.4.3/iceberg-spark-runtime-3.2_2.12-1.4.3.jar) |
70-
| 3.3 | Deprecated | 0.14.0 | {{ icebergVersion }} | [iceberg-spark-runtime-3.3_2.12](https://search.maven.org/remotecontent?filepath=org/apache/iceberg/iceberg-spark-runtime-3.3_2.12/{{ icebergVersion }}/iceberg-spark-runtime-3.3_2.12-{{ icebergVersion }}.jar) |
70+
| 3.3 | End of Life | 0.14.0 | 1.8.0 | [iceberg-spark-runtime-3.3_2.12](https://search.maven.org/remotecontent?filepath=org/apache/iceberg/iceberg-spark-runtime-3.3_2.12/1.8.0/iceberg-spark-runtime-3.3_2.12-1.8.0.jar) |
7171
| 3.4 | Maintained | 1.3.0 | {{ icebergVersion }} | [iceberg-spark-runtime-3.4_2.12](https://search.maven.org/remotecontent?filepath=org/apache/iceberg/iceberg-spark-runtime-3.4_2.12/{{ icebergVersion }}/iceberg-spark-runtime-3.4_2.12-{{ icebergVersion }}.jar) |
7272
| 3.5 | Maintained | 1.4.0 | {{ icebergVersion }} | [iceberg-spark-runtime-3.5_2.12](https://search.maven.org/remotecontent?filepath=org/apache/iceberg/iceberg-spark-runtime-3.5_2.12/{{ icebergVersion }}/iceberg-spark-runtime-3.5_2.12-{{ icebergVersion }}.jar) |
7373

spark/build.gradle

-4
Original file line numberDiff line numberDiff line change
@@ -20,10 +20,6 @@
2020
// add enabled Spark version modules to the build
2121
def sparkVersions = (System.getProperty("sparkVersions") != null ? System.getProperty("sparkVersions") : System.getProperty("defaultSparkVersions")).split(",")
2222

23-
if (sparkVersions.contains("3.3")) {
24-
apply from: file("$projectDir/v3.3/build.gradle")
25-
}
26-
2723
if (sparkVersions.contains("3.4")) {
2824
apply from: file("$projectDir/v3.4/build.gradle")
2925
}

0 commit comments

Comments
 (0)