Skip to content

Commit 770bbfe

Browse files
darynaishchenkoxiaohansong
authored andcommitted
🏥 Source Notion: update stream schema (#35409)
1 parent 4125958 commit 770bbfe

File tree

8 files changed

+154
-46
lines changed

8 files changed

+154
-46
lines changed

airbyte-integrations/connectors/source-notion/integration_tests/expected_records.jsonl

+3-3
Large diffs are not rendered by default.

airbyte-integrations/connectors/source-notion/metadata.yaml

+1-1
Original file line numberDiff line numberDiff line change
@@ -10,7 +10,7 @@ data:
1010
connectorSubtype: api
1111
connectorType: source
1212
definitionId: 6e00b415-b02e-4160-bf02-58176a0ae687
13-
dockerImageTag: 2.0.9
13+
dockerImageTag: 2.1.0
1414
dockerRepository: airbyte/source-notion
1515
documentationUrl: https://docs.airbyte.com/integrations/sources/notion
1616
githubIssueLabel: source-notion

airbyte-integrations/connectors/source-notion/pyproject.toml

+1-1
Original file line numberDiff line numberDiff line change
@@ -3,7 +3,7 @@ requires = [ "poetry-core>=1.0.0",]
33
build-backend = "poetry.core.masonry.api"
44

55
[tool.poetry]
6-
version = "2.0.9"
6+
version = "2.1.0"
77
name = "source-notion"
88
description = "Source implementation for Notion."
99
authors = [ "Airbyte <[email protected]>",]

airbyte-integrations/connectors/source-notion/source_notion/schemas/shared/rich_text.json

+11
Original file line numberDiff line numberDiff line change
@@ -32,6 +32,17 @@
3232
"properties": {
3333
"type": {
3434
"type": ["null", "string"]
35+
},
36+
"info": {
37+
"type": ["null", "object"],
38+
"properties": {
39+
"id": {
40+
"type": ["null", "string"]
41+
},
42+
"object": {
43+
"type": ["null", "string"]
44+
}
45+
}
3546
}
3647
}
3748
},

airbyte-integrations/connectors/source-notion/source_notion/schemas/shared/user.json

+28
Original file line numberDiff line numberDiff line change
@@ -37,10 +37,38 @@
3737
"type": {
3838
"type": "string"
3939
},
40+
"info": {
41+
"avatar_url": {
42+
"type": ["null", "string"]
43+
},
44+
"id": {
45+
"type": ["null", "string"]
46+
},
47+
"name": {
48+
"type": ["null", "string"]
49+
},
50+
"object": {
51+
"type": ["null", "string"]
52+
},
53+
"person": {
54+
"type": ["null", "object"],
55+
"properties": {
56+
"email": {
57+
"type": ["null", "string"]
58+
},
59+
"type": {
60+
"type": ["null", "string"]
61+
}
62+
}
63+
}
64+
},
4065
"workspace": {
4166
"type": ["null", "boolean"]
4267
}
4368
}
69+
},
70+
"workspace_name": {
71+
"type": ["null", "string"]
4472
}
4573
}
4674
}

airbyte-integrations/connectors/source-notion/source_notion/streams.py

+31-1
Original file line numberDiff line numberDiff line change
@@ -252,6 +252,22 @@ def request_params(self, next_page_token: Mapping[str, Any] = None, **kwargs) ->
252252
params["start_cursor"] = next_page_token["next_cursor"]
253253
return params
254254

255+
def transform(self, record: MutableMapping[str, Any]) -> MutableMapping[str, Any]:
256+
owner = record.get("bot", {}).get("owner")
257+
if owner:
258+
owner_type = owner.get("type")
259+
owner_info = owner.get(owner_type)
260+
if owner_type and owner_info:
261+
record["bot"]["owner"]["info"] = owner_info
262+
del record["bot"]["owner"][owner_type]
263+
return record
264+
265+
def parse_response(self, response: requests.Response, **kwargs) -> Iterable[Mapping]:
266+
# sometimes notion api returns response without results object
267+
data = response.json().get("results", [])
268+
for record in data:
269+
yield self.transform(record)
270+
255271

256272
class Databases(IncrementalNotionStream):
257273
"""
@@ -313,6 +329,20 @@ def stream_slices(
313329

314330
yield {"page_id": page_id}
315331

332+
def transform(self, record: Mapping[str, Any]) -> Mapping[str, Any]:
333+
transform_object_field = record.get("type")
334+
335+
if transform_object_field:
336+
rich_text = record.get(transform_object_field, {}).get("rich_text", [])
337+
for r in rich_text:
338+
mention = r.get("mention")
339+
if mention:
340+
type_info = mention[mention["type"]]
341+
record[transform_object_field]["rich_text"][rich_text.index(r)]["mention"]["info"] = type_info
342+
del record[transform_object_field]["rich_text"][rich_text.index(r)]["mention"][mention["type"]]
343+
344+
return record
345+
316346
def parse_response(self, response: requests.Response, stream_state: Mapping[str, Any], **kwargs) -> Iterable[Mapping]:
317347
# pages and databases blocks are already fetched in their streams, so no
318348
# need to do it again
@@ -321,7 +351,7 @@ def parse_response(self, response: requests.Response, stream_state: Mapping[str,
321351
records = super().parse_response(response, stream_state=stream_state, **kwargs)
322352
for record in records:
323353
if record["type"] not in ("child_page", "child_database", "ai_block"):
324-
yield record
354+
yield self.transform(record)
325355

326356
def read_records(self, **kwargs) -> Iterable[Mapping[str, Any]]:
327357
# if reached recursive limit, don't read anymore

airbyte-integrations/connectors/source-notion/unit_tests/test_streams.py

+38
Original file line numberDiff line numberDiff line change
@@ -312,3 +312,41 @@ def test_request_throttle(initial_page_size, expected_page_size, mock_response,
312312
stream.should_retry(response=response)
313313

314314
assert stream.page_size == expected_page_size
315+
316+
317+
def test_users_record_transformer():
318+
stream = Users(config=MagicMock())
319+
response_record = {
320+
"object": "user", "id": "id", "name": "Airbyte", "avatar_url": "some url", "type": "bot",
321+
"bot": {"owner": {"type": "user", "user": {"object": "user", "id": "id", "name": "Test User", "avatar_url": None, "type": "person",
322+
"person": {"email": "email"}}}, "workspace_name": "test"}
323+
}
324+
expected_record = {
325+
"object": "user", "id": "id", "name": "Airbyte", "avatar_url": "some url", "type": "bot",
326+
"bot": {"owner": {"type": "user", "info": {"object": "user", "id": "id", "name": "Test User", "avatar_url": None, "type": "person",
327+
"person": {"email": "email"}}}, "workspace_name": "test"}
328+
}
329+
assert stream.transform(response_record) == expected_record
330+
331+
332+
def test_block_record_transformer():
333+
stream = Blocks(parent=None, config=MagicMock())
334+
response_record = {
335+
"object": "block", "id": "id", "parent": {"type": "page_id", "page_id": "id"}, "created_time": "2021-10-19T13:33:00.000Z", "last_edited_time": "2021-10-19T13:33:00.000Z",
336+
"created_by": {"object": "user", "id": "id"}, "last_edited_by": {"object": "user", "id": "id"}, "has_children": False, "archived": False, "type": "paragraph",
337+
"paragraph": {"rich_text": [{"type": "text", "text": {"content": "test", "link": None}, "annotations": {"bold": False, "italic": False, "strikethrough": False, "underline": False, "code": False, "color": "default"}, "plain_text": "test", "href": None},
338+
{"type": "text", "text": {"content": "@", "link": None}, "annotations": {"bold": False, "italic": False, "strikethrough": False, "underline": False, "code": True, "color": "default"}, "plain_text": "@", "href": None},
339+
{"type": "text", "text": {"content": "test", "link": None}, "annotations": {"bold": False, "italic": False, "strikethrough": False, "underline": False, "code": False, "color": "default"}, "plain_text": "test", "href": None},
340+
{"type": "mention", "mention": {"type": "page", "page": {"id": "id"}}, "annotations": {"bold": False, "italic": False, "strikethrough": False, "underline": False, "code": False, "color": "default"},
341+
"plain_text": "test", "href": "https://www.notion.so/id"}], "color": "default"}
342+
}
343+
expected_record = {
344+
"object": "block", "id": "id", "parent": {"type": "page_id", "page_id": "id"}, "created_time": "2021-10-19T13:33:00.000Z", "last_edited_time": "2021-10-19T13:33:00.000Z",
345+
"created_by": {"object": "user", "id": "id"}, "last_edited_by": {"object": "user", "id": "id"}, "has_children": False, "archived": False, "type": "paragraph",
346+
"paragraph": {"rich_text": [{"type": "text", "text": {"content": "test", "link": None}, "annotations":{"bold": False, "italic": False, "strikethrough": False, "underline": False, "code": False, "color": "default"}, "plain_text":"test", "href": None},
347+
{"type": "text", "text": {"content": "@", "link": None}, "annotations": {"bold": False, "italic": False, "strikethrough": False, "underline": False, "code": True, "color": "default"}, "plain_text": "@", "href": None},
348+
{"type": "text", "text": {"content": "test", "link": None}, "annotations": {"bold": False, "italic": False, "strikethrough": False, "underline": False, "code": False, "color": "default"}, "plain_text": "test", "href": None},
349+
{"type": "mention", "mention": {"type": "page", "info": {"id": "id"}}, "annotations": {"bold": False, "italic": False, "strikethrough": False, "underline": False, "code": False, "color": "default"}, "plain_text": "test", "href": "https://www.notion.so/id"}],
350+
"color": "default"}
351+
}
352+
assert stream.transform(response_record) == expected_record

0 commit comments

Comments
 (0)