Skip to content

Commit cb6f6ec

Browse files
authored
Destinations CDK: refreshes logic (#38622)
1 parent 2432cc8 commit cb6f6ec

File tree

15 files changed

+746
-165
lines changed

15 files changed

+746
-165
lines changed

airbyte-cdk/java/airbyte-cdk/README.md

+1
Original file line numberDiff line numberDiff line change
@@ -174,6 +174,7 @@ corresponds to that version.
174174

175175
| Version | Date | Pull Request | Subject |
176176
| :------ | :--------- | :--------------------------------------------------------- | :------------------------------------------------------------------------------------------------------------------------------------------------------------- |
177+
| 0.40.0 | 2024-06-17 | [\#38622](https://github.com/airbytehq/airbyte/pull/38622) | Destinations: Implement refreshes logic in AbstractStreamOperation |
177178
| 0.39.0 | 2024-06-17 | [\#38067](https://github.com/airbytehq/airbyte/pull/38067) | Destinations: Breaking changes for refreshes (fail on INCOMPLETE stream status; ignore OVERWRITE sync mode) |
178179
| 0.38.2 | 2024-06-14 | [\#39460](https://github.com/airbytehq/airbyte/pull/39460) | Bump postgres JDBC driver version |
179180
| 0.38.1 | 2024-06-13 | [\#39445](https://github.com/airbytehq/airbyte/pull/39445) | Sources: More CDK changes to handle big initial snapshots. |
Original file line numberDiff line numberDiff line change
@@ -1 +1 @@
1-
version=0.39.0
1+
version=0.40.0

airbyte-cdk/java/airbyte-cdk/db-destinations/src/main/kotlin/io/airbyte/cdk/integrations/destination/jdbc/typing_deduping/JdbcDestinationHandler.kt

+4
Original file line numberDiff line numberDiff line change
@@ -325,6 +325,10 @@ abstract class JdbcDestinationHandler<DestinationState>(
325325
streamConfig,
326326
finalTableDefinition.isPresent,
327327
initialRawTableState,
328+
// TODO fix this
329+
// for now, no JDBC destinations actually do refreshes
330+
// so this is just to make our code compile
331+
InitialRawTableStatus(false, false, Optional.empty()),
328332
isSchemaMismatch,
329333
isFinalTableEmpty,
330334
destinationState

airbyte-cdk/java/airbyte-cdk/s3-destinations/src/main/kotlin/io/airbyte/cdk/integrations/destination/staging/operation/StagingStreamOperations.kt

+6-2
Original file line numberDiff line numberDiff line change
@@ -33,7 +33,11 @@ class StagingStreamOperations<DestinationState : MinimumDestinationState>(
3333
) {
3434

3535
private val log = KotlinLogging.logger {}
36-
override fun writeRecords(streamConfig: StreamConfig, stream: Stream<PartialAirbyteMessage>) {
36+
override fun writeRecordsImpl(
37+
streamConfig: StreamConfig,
38+
suffix: String,
39+
stream: Stream<PartialAirbyteMessage>
40+
) {
3741
val writeBuffer =
3842
StagingSerializedBufferFactory.initializeBuffer(fileUploadFormat, destinationColumns)
3943

@@ -51,7 +55,7 @@ class StagingStreamOperations<DestinationState : MinimumDestinationState>(
5155
"Buffer flush complete for stream ${streamConfig.id.originalName} (${FileUtils.byteCountToDisplaySize(it.byteCount)}) to staging"
5256
}
5357
if (it.byteCount != 0L) {
54-
storageOperation.writeToStage(streamConfig, writeBuffer)
58+
storageOperation.writeToStage(streamConfig, suffix, writeBuffer)
5559
} else {
5660
log.info { "Skipping writing to storage since there are no bytes to write" }
5761
}

airbyte-cdk/java/airbyte-cdk/typing-deduping/src/main/kotlin/io/airbyte/integrations/base/destination/operation/AbstractStreamOperation.kt

+207-63
Large diffs are not rendered by default.

airbyte-cdk/java/airbyte-cdk/typing-deduping/src/main/kotlin/io/airbyte/integrations/base/destination/operation/StandardStreamOperation.kt

+6-2
Original file line numberDiff line numberDiff line change
@@ -24,7 +24,11 @@ class StandardStreamOperation<DestinationState : MinimumDestinationState>(
2424
destinationInitialStatus,
2525
disableTypeDedupe
2626
) {
27-
override fun writeRecords(streamConfig: StreamConfig, stream: Stream<PartialAirbyteMessage>) {
28-
storageOperation.writeToStage(streamConfig, stream)
27+
override fun writeRecordsImpl(
28+
streamConfig: StreamConfig,
29+
suffix: String,
30+
stream: Stream<PartialAirbyteMessage>
31+
) {
32+
storageOperation.writeToStage(streamConfig, suffix, stream)
2933
}
3034
}

airbyte-cdk/java/airbyte-cdk/typing-deduping/src/main/kotlin/io/airbyte/integrations/base/destination/operation/StorageOperation.kt

+32-4
Original file line numberDiff line numberDiff line change
@@ -6,7 +6,6 @@ package io.airbyte.integrations.base.destination.operation
66

77
import io.airbyte.integrations.base.destination.typing_deduping.StreamConfig
88
import io.airbyte.integrations.base.destination.typing_deduping.StreamId
9-
import io.airbyte.protocol.models.v0.DestinationSyncMode
109
import java.time.Instant
1110
import java.util.Optional
1211

@@ -16,15 +15,44 @@ interface StorageOperation<Data> {
1615
*/
1716

1817
/**
19-
* Prepare staging area which cloud be creating any object storage, temp tables or file storage
18+
* Prepare staging area which cloud be creating any object storage, temp tables or file storage.
19+
* Similar to [createFinalTable], accepts a [suffix] parameter, which should be used in
20+
* conjunction with [overwriteStage].
21+
*
22+
* @param replace If true, then replace existing resources with empty e.g. tables. If false,
23+
* then leave existing resources untouched.
2024
*/
21-
fun prepareStage(streamId: StreamId, destinationSyncMode: DestinationSyncMode)
25+
fun prepareStage(streamId: StreamId, suffix: String, replace: Boolean = false)
26+
27+
/**
28+
* Swap the "temporary" stage into the "real" stage. For example, `DROP TABLE IF NOT EXISTS
29+
* airbyte_internal.foo; ALTER TABLE airbyte_internal.foo_tmp RENAME TO foo`.
30+
*/
31+
fun overwriteStage(streamId: StreamId, suffix: String)
32+
33+
/**
34+
* Copy all records from the temporary stage into the real stage, then drop the temporary stage.
35+
* For example `INSERT INTO airbyte_internal.foo SELECT * FROM airbyte_internal.foo_tmp; DROP
36+
* TABLE airbyte_internal.foo_tmp`.
37+
*/
38+
fun transferFromTempStage(streamId: StreamId, suffix: String)
39+
40+
/**
41+
* Get the generation of a single record in the stage. Not necessarily the min or max
42+
* generation, just _any_ record.
43+
*
44+
* [AbstractStreamOperation] is responsible for orchestrating the stages so that the temp stage
45+
* always contains exactly one generation.
46+
*
47+
* @return The generation ID of a record in the stage, or `null` if the stage is empty.
48+
*/
49+
fun getStageGeneration(streamId: StreamId, suffix: String): Long?
2250

2351
/** Delete previously staged data, using deterministic information from streamId. */
2452
fun cleanupStage(streamId: StreamId)
2553

2654
/** Write data to stage. */
27-
fun writeToStage(streamConfig: StreamConfig, data: Data)
55+
fun writeToStage(streamConfig: StreamConfig, suffix: String, data: Data)
2856

2957
/*
3058
* ==================== Final Table Operations ================================

airbyte-cdk/java/airbyte-cdk/typing-deduping/src/main/kotlin/io/airbyte/integrations/base/destination/typing_deduping/DestinationInitialStatus.kt

+6
Original file line numberDiff line numberDiff line change
@@ -7,7 +7,13 @@ package io.airbyte.integrations.base.destination.typing_deduping
77
data class DestinationInitialStatus<DestinationState>(
88
val streamConfig: StreamConfig,
99
val isFinalTablePresent: Boolean,
10+
// TODO we should probably make this nullable, then delete InitialRawTableStatus.rawTableExists
1011
val initialRawTableStatus: InitialRawTableStatus,
12+
/**
13+
* The state of the temp raw table, or null if there is no temp raw table at the start of the
14+
* sync.
15+
*/
16+
val initialTempRawTableStatus: InitialRawTableStatus,
1117
val isSchemaMismatch: Boolean,
1218
val isFinalTableEmpty: Boolean,
1319
val destinationState: DestinationState,

airbyte-cdk/java/airbyte-cdk/typing-deduping/src/main/kotlin/io/airbyte/integrations/base/destination/typing_deduping/InitialRawTableStatus.kt

+12
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,18 @@ import java.util.*
88

99
data class InitialRawTableStatus(
1010
val rawTableExists: Boolean,
11+
/**
12+
* Whether there were any records with null `_airbyte_loaded_at`, at the time that this status
13+
* was fetched.
14+
*/
1115
val hasUnprocessedRecords: Boolean,
16+
// TODO Make maxProcessedTimestamp just `Instant?` instead of Optional
17+
/**
18+
* The highest timestamp such that all records in `SELECT * FROM raw_table WHERE
19+
* _airbyte_extracted_at <= ?` have a nonnull `_airbyte_loaded_at`.
20+
*
21+
* Destinations MAY use this value to only run T+D on records with `_airbyte_extracted_at > ?`
22+
* (note the strictly-greater comparison).
23+
*/
1224
val maxProcessedTimestamp: Optional<Instant>
1325
)

airbyte-cdk/java/airbyte-cdk/typing-deduping/src/main/kotlin/io/airbyte/integrations/base/destination/typing_deduping/StreamId.kt

+3-2
Original file line numberDiff line numberDiff line change
@@ -42,8 +42,9 @@ data class StreamId(
4242
return "$quote$finalNamespace$quote.$quote$finalName$suffix$quote"
4343
}
4444

45-
fun rawTableId(quote: String): String {
46-
return "$quote$rawNamespace$quote.$quote$rawName$quote"
45+
@JvmOverloads
46+
fun rawTableId(quote: String, suffix: String = ""): String {
47+
return "$quote$rawNamespace$quote.$quote$rawName$suffix$quote"
4748
}
4849

4950
fun finalName(quote: String): String {

0 commit comments

Comments
 (0)