|
| 1 | +package io.airbyte.integrations.destination.s3_v2 |
| 2 | + |
| 3 | +import io.airbyte.cdk.load.command.DestinationCatalog |
| 4 | +import io.airbyte.cdk.load.file.object_storage.PathFactory |
| 5 | +import io.airbyte.cdk.load.file.s3.S3Client |
| 6 | +import io.airbyte.cdk.load.task.SelfTerminating |
| 7 | +import io.airbyte.cdk.load.task.Task |
| 8 | +import io.airbyte.cdk.load.task.TerminalCondition |
| 9 | +import io.airbyte.cdk.load.write.WriteOpOverride |
| 10 | +import io.github.oshai.kotlinlogging.KotlinLogging |
| 11 | +import jakarta.inject.Singleton |
| 12 | +import kotlin.random.Random |
| 13 | +import kotlin.time.measureTime |
| 14 | +import kotlinx.coroutines.Dispatchers |
| 15 | +import kotlinx.coroutines.ExperimentalCoroutinesApi |
| 16 | +import kotlinx.coroutines.async |
| 17 | +import kotlinx.coroutines.awaitAll |
| 18 | +import kotlinx.coroutines.coroutineScope |
| 19 | +import kotlinx.coroutines.launch |
| 20 | +import kotlinx.coroutines.withContext |
| 21 | + |
| 22 | +@Singleton |
| 23 | +class S3V2WriteOpOverride( |
| 24 | + private val client: S3Client, |
| 25 | + private val catalog: DestinationCatalog, |
| 26 | + private val config: S3V2Configuration<*>, |
| 27 | + private val pathFactory: PathFactory, |
| 28 | +): WriteOpOverride { |
| 29 | + private val log = KotlinLogging.logger { } |
| 30 | + |
| 31 | + override val terminalCondition: TerminalCondition = SelfTerminating |
| 32 | + |
| 33 | + @OptIn(ExperimentalCoroutinesApi::class) |
| 34 | + override suspend fun execute() = coroutineScope { |
| 35 | + val prng = Random(System.currentTimeMillis()) |
| 36 | + val randomPart = prng.nextBytes(config.partSizeBytes.toInt()) |
| 37 | + val randomString = randomPart.take(32).joinToString("") { "%02x".format(it) } |
| 38 | + val stream = catalog.streams.first() |
| 39 | + val objectKey = pathFactory.getFinalDirectory(stream) + "/mock-perf-test-$randomString" |
| 40 | + |
| 41 | + val numParts = (config.objectSizeBytes / config.partSizeBytes).toInt() |
| 42 | + val partsPerWorker = numParts / config.numUploadWorkers |
| 43 | + val actualSizeBytes = partsPerWorker * config.numUploadWorkers * config.partSizeBytes |
| 44 | + |
| 45 | + log.info { |
| 46 | + "root key=$objectKey; part_size=${config.partSizeBytes}b; num_parts=$numParts (per_worker=$partsPerWorker); total_size=${actualSizeBytes}b; num_workers=${config.numUploadWorkers}" |
| 47 | + } |
| 48 | + |
| 49 | + val duration = measureTime { |
| 50 | + withContext(Dispatchers.IO.limitedParallelism(config.numUploadWorkers)) { |
| 51 | + (0 until config.numUploadWorkers).map { |
| 52 | + async { |
| 53 | + val workerKey = "$objectKey-worker-$it" |
| 54 | + log.info { "Starting upload to $workerKey" } |
| 55 | + val upload = client.startStreamingUpload(workerKey) |
| 56 | + repeat(partsPerWorker) { |
| 57 | + log.info { "Uploading part ${it + 1} of $workerKey" } |
| 58 | + upload.uploadPart(randomPart, it + 1) |
| 59 | + } |
| 60 | + log.info { "Completing upload to $workerKey" } |
| 61 | + upload.complete() |
| 62 | + } |
| 63 | + }.awaitAll() |
| 64 | + } |
| 65 | + } |
| 66 | + val mbs = actualSizeBytes.toFloat() / duration.inWholeSeconds.toFloat() / 1024 / 1024 |
| 67 | + log.info { |
| 68 | + // format mbs to 2 decimal places |
| 69 | + "Uploaded $actualSizeBytes bytes in $duration seconds (${"%.2f".format(mbs)} MB/s)" |
| 70 | + } |
| 71 | + } |
| 72 | +} |
0 commit comments