@@ -7,11 +7,58 @@ package io.airbyte.cdk.load.pipeline
7
7
import io.airbyte.cdk.load.message.WithStream
8
8
9
9
/* *
10
- * [BatchAccumulator] is used internally by the CDK to implement RecordLoaders. Connector devs
11
- * should never need to implement this interface.
10
+ * [BatchAccumulator] is used internally by the CDK to implement
11
+ * [io.airbyte.cdk.load.write.LoadStrategy]s. Connector devs should never need to implement this
12
+ * interface.
13
+ *
14
+ * It is the glue that connects a specific step in a specific pipeline to the generic pipeline on
15
+ * the back end. (For example, in a three-stage pipeline like bulk load, step 1 is to create a part,
16
+ * step 2 is to upload it, and step 3 is to load it from object storage into a table.)
17
+ *
18
+ * - [S] is a state type that will be threaded through accumulator calls.
19
+ * - [K] is a key type associated the input data. (NOTE: Currently, there is no support for
20
+ * key-mapping, so the key is always [io.airbyte.cdk.load.message.StreamKey]). Specifically, state
21
+ * will always be managed per-key.
22
+ * - [T] is the input data type
23
+ * - [U] is the output data type
24
+ *
25
+ * The first time data is seen for a given key, [start] is called (with the partition number). The
26
+ * state returned by [start] will be passed per input to [accept].
27
+ *
28
+ * If [accept] returns [IntermediateOutput] or [FinalOutput], the output will be forwarded to the
29
+ * next stage (if applicable) and/or trigger bookkeeping (iff the output type implements
30
+ * [io.airbyte.cdk.load.message.WithBatchState]).
31
+ *
32
+ * If [accept] returns a [NoOutput] or [IntermediateOutput], the nextState will be passed to the
33
+ * next call to [accept]. If [accept] returns a [FinalOutput] state, any state will be discarded and
34
+ * a new one will be created on the next input by a new call to [start].
35
+ *
36
+ * When the input stream is exhausted, [finish] will be called with any remaining state iff at least
37
+ * one input was seen for that key. This means that [finish] will not be called on empty keys or on
38
+ * keys where the last call to [accept] yielded a null (finished) state.
12
39
*/
13
40
interface BatchAccumulator <S , K : WithStream , T , U > {
14
- fun start (key : K , part : Int ): S
15
- fun accept (record : T , state : S ): Pair <S , U ?>
16
- fun finish (state : S ): U
41
+ suspend fun start (key : K , part : Int ): S
42
+ suspend fun accept (input : T , state : S ): BatchAccumulatorResult <S , U >
43
+ suspend fun finish (state : S ): FinalOutput <S , U >
44
+ }
45
+
46
+ sealed interface BatchAccumulatorResult <S , U > {
47
+ val nextState: S ?
48
+ val output: U ?
49
+ }
50
+
51
+ data class NoOutput <S , U >(
52
+ override val nextState : S ,
53
+ ) : BatchAccumulatorResult<S, U> {
54
+ override val output: U ? = null
55
+ }
56
+
57
+ data class IntermediateOutput <S , U >(
58
+ override val nextState : S ,
59
+ override val output : U ,
60
+ ) : BatchAccumulatorResult<S, U>
61
+
62
+ data class FinalOutput <S , U >(override val output : U ) : BatchAccumulatorResult<S, U> {
63
+ override val nextState: S ? = null
17
64
}
0 commit comments