Skip to content

Commit d146b27

Browse files
authored
Make efficient table discovery during read (#52556)
1 parent ae01f28 commit d146b27

File tree

7 files changed

+87
-29
lines changed

7 files changed

+87
-29
lines changed

airbyte-cdk/java/airbyte-cdk/README.md

+1
Original file line numberDiff line numberDiff line change
@@ -174,6 +174,7 @@ corresponds to that version.
174174

175175
| Version | Date | Pull Request | Subject |
176176
|:-----------|:-----------|:------------------------------------------------------------|:---------------------------------------------------------------------------------------------------------------------------------------------------------------|
177+
| 0.48.7 | 2025-01-26 | [\#51596](https://github.com/airbytehq/airbyte/pull/51596) | Make efficient table discovery during read |
177178
| 0.48.6 | 2025-01-26 | [\#51596](https://github.com/airbytehq/airbyte/pull/51596) | Fix flaky source mssql tests |
178179
| 0.48.5 | 2025-01-16 | [\#51583](https://github.com/airbytehq/airbyte/pull/51583) | Also save SSL key to /tmp in destination-postgres |
179180
| 0.48.4 | 2024-12-24 | [\#50410](https://github.com/airbytehq/airbyte/pull/50410) | Save SSL key to /tmp |
Original file line numberDiff line numberDiff line change
@@ -1 +1 @@
1-
version=0.48.6
1+
version=0.48.7

airbyte-cdk/java/airbyte-cdk/db-sources/src/main/kotlin/io/airbyte/cdk/integrations/source/jdbc/AbstractJdbcSource.kt

+48-25
Original file line numberDiff line numberDiff line change
@@ -389,31 +389,7 @@ abstract class AbstractJdbcSource<Datatype>(
389389
)
390390
}
391391
.values
392-
.map { fields: List<JsonNode> ->
393-
TableInfo<CommonField<Datatype>>(
394-
nameSpace = fields[0].get(INTERNAL_SCHEMA_NAME).asText(),
395-
name = fields[0].get(INTERNAL_TABLE_NAME).asText(),
396-
fields =
397-
fields
398-
// read the column metadata Json object, and determine its
399-
// type
400-
.map { f: JsonNode ->
401-
val datatype = sourceOperations.getDatabaseFieldType(f)
402-
val jsonType = getAirbyteType(datatype)
403-
LOGGER.debug {
404-
"Table ${fields[0].get(INTERNAL_TABLE_NAME).asText()} column ${f.get(INTERNAL_COLUMN_NAME).asText()}" +
405-
"(type ${f.get(INTERNAL_COLUMN_TYPE_NAME).asText()}[${f.get(INTERNAL_COLUMN_SIZE).asInt()}], " +
406-
"nullable ${f.get(INTERNAL_IS_NULLABLE).asBoolean()}) -> $jsonType"
407-
}
408-
object :
409-
CommonField<Datatype>(
410-
f.get(INTERNAL_COLUMN_NAME).asText(),
411-
datatype
412-
) {}
413-
},
414-
cursorFields = extractCursorFields(fields)
415-
)
416-
}
392+
.map { fields: List<JsonNode> -> jsonFieldListToTableInfo(fields) }
417393
}
418394

419395
private fun extractCursorFields(fields: List<JsonNode>): List<String> {
@@ -579,6 +555,53 @@ abstract class AbstractJdbcSource<Datatype>(
579555
)
580556
}
581557

558+
override fun discoverTable(
559+
database: JdbcDatabase,
560+
schema: String,
561+
tableName: String
562+
): TableInfo<CommonField<Datatype>>? {
563+
LOGGER.info { "Discover table: $schema.$tableName" }
564+
return database
565+
.bufferedResultSetQuery<JsonNode>(
566+
{ connection: Connection ->
567+
connection.metaData.getColumns(getCatalog(database), schema, tableName, null)
568+
},
569+
{ resultSet: ResultSet -> this.getColumnMetadata(resultSet) }
570+
)
571+
.groupBy { t: JsonNode ->
572+
ImmutablePair.of<String, String>(
573+
t.get(INTERNAL_SCHEMA_NAME).asText(),
574+
t.get(INTERNAL_TABLE_NAME).asText()
575+
)
576+
}
577+
.values
578+
.map { fields: List<JsonNode> -> jsonFieldListToTableInfo(fields) }
579+
.firstOrNull()
580+
}
581+
582+
private fun jsonFieldListToTableInfo(fields: List<JsonNode>): TableInfo<CommonField<Datatype>> {
583+
return TableInfo<CommonField<Datatype>>(
584+
nameSpace = fields[0].get(INTERNAL_SCHEMA_NAME).asText(),
585+
name = fields[0].get(INTERNAL_TABLE_NAME).asText(),
586+
fields =
587+
fields
588+
// read the column metadata Json object, and determine its
589+
// type
590+
.map { f: JsonNode ->
591+
val datatype = sourceOperations.getDatabaseFieldType(f)
592+
val jsonType = getAirbyteType(datatype)
593+
LOGGER.debug {
594+
"Table ${fields[0].get(INTERNAL_TABLE_NAME).asText()} column ${f.get(INTERNAL_COLUMN_NAME).asText()}" +
595+
"(type ${f.get(INTERNAL_COLUMN_TYPE_NAME).asText()}[${f.get(INTERNAL_COLUMN_SIZE).asInt()}], " +
596+
"nullable ${f.get(INTERNAL_IS_NULLABLE).asBoolean()}) -> $jsonType"
597+
}
598+
object :
599+
CommonField<Datatype>(f.get(INTERNAL_COLUMN_NAME).asText(), datatype) {}
600+
},
601+
cursorFields = extractCursorFields(fields)
602+
)
603+
}
604+
582605
public override fun isCursorType(type: Datatype): Boolean {
583606
return sourceOperations.isCursorType(type)
584607
}

airbyte-cdk/java/airbyte-cdk/db-sources/src/main/kotlin/io/airbyte/cdk/integrations/source/relationaldb/AbstractDbSource.kt

+34-1
Original file line numberDiff line numberDiff line change
@@ -131,7 +131,7 @@ protected constructor(driverClassName: String) :
131131
logPreSyncDebugData(database, catalog)
132132

133133
val fullyQualifiedTableNameToInfo =
134-
discoverWithoutSystemTables(database).associateBy {
134+
discoverWithoutSystemTables(database, catalog).associateBy {
135135
String.format("%s.%s", it.nameSpace, it.name)
136136
}
137137

@@ -289,6 +289,22 @@ protected constructor(driverClassName: String) :
289289
/* no-op */
290290
}
291291

292+
@Throws(Exception::class)
293+
protected fun discoverWithoutSystemTables(
294+
database: Database,
295+
catalog: ConfiguredAirbyteCatalog,
296+
): List<TableInfo<CommonField<DataType>>> {
297+
var result = mutableListOf<TableInfo<CommonField<DataType>>>()
298+
catalog.streams.forEach { airbyteStream: ConfiguredAirbyteStream ->
299+
val stream = airbyteStream.stream
300+
discoverTable(database, stream.namespace, stream.name)?.let {
301+
LOGGER.info { "Discovered table: ${it.nameSpace}.${it.name}: $it" }
302+
result.add(it)
303+
}
304+
}
305+
return result
306+
}
307+
292308
@Throws(Exception::class)
293309
protected fun discoverWithoutSystemTables(
294310
database: Database
@@ -723,6 +739,23 @@ protected constructor(driverClassName: String) :
723739
tableInfos: List<TableInfo<CommonField<DataType>>>
724740
): Map<String, MutableList<String>>
725741

742+
/**
743+
* Discovers a table in the source database.
744+
*
745+
* @param database
746+
* - source database
747+
* @param schema
748+
* - source schema
749+
* @param tableName
750+
* - source table name
751+
* @return table information
752+
*/
753+
protected abstract fun discoverTable(
754+
database: Database,
755+
schema: String,
756+
tableName: String
757+
): TableInfo<CommonField<DataType>>?
758+
726759
protected abstract val quoteString: String?
727760

728761
/**

airbyte-integrations/connectors/source-mssql/build.gradle

+1-1
Original file line numberDiff line numberDiff line change
@@ -3,7 +3,7 @@ plugins {
33
}
44

55
airbyteJavaConnector {
6-
cdkVersionRequired = '0.48.6'
6+
cdkVersionRequired = '0.48.7'
77
features = ['db-sources']
88
useLocalCdk = false
99
}

airbyte-integrations/connectors/source-mssql/metadata.yaml

+1-1
Original file line numberDiff line numberDiff line change
@@ -9,7 +9,7 @@ data:
99
connectorSubtype: database
1010
connectorType: source
1111
definitionId: b5ea17b1-f170-46dc-bc31-cc744ca984c1
12-
dockerImageTag: 4.1.19
12+
dockerImageTag: 4.1.20
1313
dockerRepository: airbyte/source-mssql
1414
documentationUrl: https://docs.airbyte.com/integrations/sources/mssql
1515
githubIssueLabel: source-mssql

docs/integrations/sources/mssql.md

+1
Original file line numberDiff line numberDiff line change
@@ -424,6 +424,7 @@ WHERE actor_definition_id ='b5ea17b1-f170-46dc-bc31-cc744ca984c1' AND (configura
424424

425425
| Version | Date | Pull Request | Subject |
426426
|:--------|:-----------| :---------------------------------------------------------------------------------------------------------------- |:------------------------------------------------------------------------------------------------------------------------------------------------|
427+
| 4.1.20 | 2025-01-26 | [52556](https://github.com/airbytehq/airbyte/pull/52556) | Improve tables discovery during read. |
427428
| 4.1.19 | 2025-01-16 | [51596](https://github.com/airbytehq/airbyte/pull/51596) | Bump driver versions to latest (jdbc, debezium, cdk) |
428429
| 4.1.18 | 2025-01-06 | [50943](https://github.com/airbytehq/airbyte/pull/50943) | Use airbyte/java-connector-base:2.0.0. This makes the image rootless. The connector will be incompatible with Airbyte < 0.64. |
429430
| 4.1.17 | 2024-12-17 | [49840](https://github.com/airbytehq/airbyte/pull/49840) | Use a base image: airbyte/java-connector-base:1.0.0 |

0 commit comments

Comments
 (0)