From bd8a4c8facadc41955070f58cf2142a4abf09bd5 Mon Sep 17 00:00:00 2001 From: Vadym Ratniuk Date: Thu, 21 Sep 2023 15:07:02 +0300 Subject: [PATCH 01/56] Revert "updated Prerequisites" This reverts commit c12869011234c7e3a9f47521699db43da0d9c4fa. --- docs/integrations/sources/facebook-marketing.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/integrations/sources/facebook-marketing.md b/docs/integrations/sources/facebook-marketing.md index 95c44402263fb..1006fe8cbf7b9 100644 --- a/docs/integrations/sources/facebook-marketing.md +++ b/docs/integrations/sources/facebook-marketing.md @@ -6,7 +6,7 @@ This page guides you through the process of setting up the Facebook Marketing so - A [Facebook Ad Account ID](https://www.facebook.com/business/help/1492627900875762) -- (For Airbyte Open Source) A [Facebook app](https://developers.facebook.com/apps/) with the Marketing API enabled. The credentials of your Facebook account with permission to access your ads account. +- (For Airbyte Open Source) A [Facebook app](https://developers.facebook.com/apps/) with the Marketing API enabled ## Setup guide From d2083b9c6940e8071989d34f53e398655b110b5f Mon Sep 17 00:00:00 2001 From: Vadym Ratniuk Date: Tue, 20 Feb 2024 18:29:40 +0200 Subject: [PATCH 02/56] low code migration --- .../source-slack/source_slack/components.py | 59 +++++ .../source-slack/source_slack/manifest.yaml | 210 ++++++++++++++++++ .../source-slack/source_slack/source.py | 126 ++++++----- 3 files changed, 335 insertions(+), 60 deletions(-) create mode 100644 airbyte-integrations/connectors/source-slack/source_slack/components.py create mode 100644 airbyte-integrations/connectors/source-slack/source_slack/manifest.yaml diff --git a/airbyte-integrations/connectors/source-slack/source_slack/components.py b/airbyte-integrations/connectors/source-slack/source_slack/components.py new file mode 100644 index 0000000000000..862afcbdc4776 --- /dev/null +++ b/airbyte-integrations/connectors/source-slack/source_slack/components.py @@ -0,0 +1,59 @@ +from dataclasses import dataclass +from typing import Optional +from typing import List, Mapping, Any +import requests +from airbyte_cdk.sources.declarative.types import Config, Record, StreamSlice, StreamState +from airbyte_cdk.sources.declarative.extractors import DpathExtractor +from airbyte_cdk.sources.declarative.types import Record +from airbyte_cdk.sources.declarative.transformations import RecordTransformation, AddFields +from airbyte_cdk.sources.streams.core import Stream +from airbyte_cdk.models import AirbyteMessage, SyncMode, Type + +@dataclass +class ChannelMembersExtractor(DpathExtractor): + """ + Record extractor that extracts record of the form from activity logs stream: + """ + def extract_records(self, response: requests.Response) -> List[Record]: + records = super().extract_records(response) + return [{'member_id': record} for record in records] + +# class ChannelsRecordSelector(RecordSelector): +# def select_records( +# self, +# response: requests.Response, +# stream_state: StreamState, +# records_schema: Mapping[str, Any], +# stream_slice: Optional[StreamSlice] = None, +# next_page_token: Optional[Mapping[str, Any]] = None, +# ) -> List[Record]: +# records = super().select_records(response, stream_state, records_schema, stream_slice, next_page_token) +# print(records) +# return records + + +@dataclass +class JoinChannels(RecordTransformation): + """ + Implementations of this class define transformations that can be applied to records of a stream. + """ + join_stream: Stream + def transform( + self, + record: Record, + config: Optional[Config] = None, + stream_state: Optional[StreamState] = None, + stream_slice: Optional[StreamSlice] = None, + ) -> Record: + """ sdf """ + if config.get('join_channels') and not record.get("is_member"): + print(f"++++++++++add {record['id']} ++++++++++++++++++++++++++++++++++++++++") + self.join_stream.channel_id = record['id'] + + for parent_record in self.join_stream.read_records( + sync_mode=SyncMode.full_refresh, cursor_field=None, stream_slice=[], stream_state=None + ): + print("++++++++++++++++++++++++++++++++++++++++++++++++++++") + print(parent_record) + print("++++++++++++++++++++++++++++++++++++++++++++++++++++") + diff --git a/airbyte-integrations/connectors/source-slack/source_slack/manifest.yaml b/airbyte-integrations/connectors/source-slack/source_slack/manifest.yaml new file mode 100644 index 0000000000000..7690f293ea8dd --- /dev/null +++ b/airbyte-integrations/connectors/source-slack/source_slack/manifest.yaml @@ -0,0 +1,210 @@ +#version: 0.61.2 +version: 0.58.7 +type: DeclarativeSource + +spec: + type: Spec + connection_specification: + $schema: http://json-schema.org/draft-07/schema# + type: object + required: + - api_token + - start_date + properties: + api_token: + type: string + title: API Key + airbyte_secret: true + order: 0 + start_date: + type: string + title: Start date + format: date-time + pattern: ^[0-9]{4}-[0-9]{2}-[0-9]{2}T[0-9]{2}:[0-9]{2}:[0-9]{2}Z$ + order: 1 + join_channels: + type: boolean + default: true + title: Join all channels + description: Whether to join all channels or to sync data only from channels the bot is already in. If false, you'll need to manually add the bot to all the channels from which you'd like to sync messages. + additionalProperties: true + +definitions: + + schema_loader: + type: JsonFileSchemaLoader + file_path: "./source_slack/schemas/{{ parameters['name'] }}.json" + + default_paginator: + type: DefaultPaginator + page_token_option: + type: RequestOption + inject_into: request_parameter + field_name: cursor + page_size_option: + type: RequestOption + field_name: limit + inject_into: request_parameter + pagination_strategy: + type: CursorPagination + page_size: 1000 + cursor_value: '{{ response.get("response_metadata", {}).get("next_cursor", {}) }}' + stop_condition: >- + {{ not response.get("response_metadata", {}).get("next_cursor", {}) + }} + + retriever: + type: SimpleRetriever + requester: + type: HttpRequester + url_base: https://slack.com/api/ + path: "{{ parameters['path'] }}" + http_method: GET + request_parameters: { } + request_headers: { } + authenticator: + type: BearerAuthenticator + api_token: '{{ config[''api_token''] }}' + request_body_json: { } + record_selector: + type: RecordSelector + extractor: + type: DpathExtractor + field_path: + - "{{ parameters['field_path'] }}" + paginator: + $ref: "#/definitions/default_paginator" + partition_router: [] + + stream_base: + retriever: + $ref: "#/definitions/retriever" + schema_loader: + $ref: "#/definitions/schema_loader" + primary_key: "id" + + users_stream: + $ref: "#/definitions/stream_base" + $parameters: + name: users + path: users.list + field_path: members + + channels_stream: + $ref: "#/definitions/stream_base" + $parameters: + name: channels + path: conversations.list + field_path: channels + + substream_partition_router: + type: SubstreamPartitionRouter + parent_stream_configs: + - type: ParentStreamConfig + parent_key: id + request_option: + type: RequestOption + field_name: channel + inject_into: request_parameter + partition_field: channel_id + stream: "#/definitions/channels_stream" + + channel_join_stream: + $ref: "#/definitions/stream_base" + $parameters: + name: channel_join + path: conversations.join + field_path: channel + retriever: + $ref: "#/definitions/retriever" + requester: + http_method: POST + partition_router: + $ref: "#/definitions/substream_partition_router" + + + channel_members_stream: + $ref: "#/definitions/stream_base" + $parameters: + name: channel_members + path: conversations.members + field_path: members + primary_key: + - member_id + - channel_id + retriever: + $ref: "#/definitions/retriever" + partition_router: + $ref: "#/definitions/substream_partition_router" + record_selector: + type: RecordSelector + extractor: + class_name: "source_slack.components.ChannelMembersExtractor" + field_path: ['members'] + transformations: + - type: AddFields + fields: + - path: + - channel_id + value: '{{ stream_partition[''channel_id''] }}' + + channel_messages_stream: + $ref: "#/definitions/stream_base" + $parameters: + name: channel_messages + path: conversations.history + field_path: messages + primary_key: + - channel_id + - ts + retriever: + $ref: "#/definitions/retriever" + partition_router: + $ref: "#/definitions/substream_partition_router" + incremental_sync: + type: DatetimeBasedCursor + cursor_field: float_ts + cursor_datetime_formats: + - '%s' + datetime_format: '%s' + start_datetime: + type: MinMaxDatetime + datetime: '{{ config[''start_date''] }}' + datetime_format: '%Y-%m-%dT%H:%M:%SZ' + start_time_option: + type: RequestOption + field_name: oldest + inject_into: request_parameter + end_time_option: + type: RequestOption + field_name: latest + inject_into: request_parameter + end_datetime: + type: MinMaxDatetime + datetime: '{{ now_utc().strftime(''%Y-%m-%dT%H:%M:%SZ'') }}' + datetime_format: '%Y-%m-%dT%H:%M:%SZ' + step: P100D + cursor_granularity: PT0S + transformations: + - type: AddFields + fields: + - path: + - float_ts + value: '{{ record.ts|int }}' + - type: AddFields + fields: + - path: + - channel_id + value: '{{ stream_partition[''channel_id''] }}' + +streams: + - "#/definitions/users_stream" + - "#/definitions/channels_stream" + - "#/definitions/channel_members_stream" + - "#/definitions/channel_messages_stream" + - "#/definitions/channels_main_stream" + +check: + type: CheckStream + stream_names: + - users \ No newline at end of file diff --git a/airbyte-integrations/connectors/source-slack/source_slack/source.py b/airbyte-integrations/connectors/source-slack/source_slack/source.py index e785114f865f4..de2f8a0705c27 100644 --- a/airbyte-integrations/connectors/source-slack/source_slack/source.py +++ b/airbyte-integrations/connectors/source-slack/source_slack/source.py @@ -2,6 +2,8 @@ # Copyright (c) 2023 Airbyte, Inc., all rights reserved. # +from airbyte_cdk.sources.declarative.yaml_declarative_source import YamlDeclarativeSource + from abc import ABC, abstractmethod from typing import Any, Iterable, List, Mapping, MutableMapping, Optional, Tuple @@ -347,64 +349,68 @@ def stream_slices(self, stream_state: Mapping[str, Any] = None, **kwargs) -> Ite # yield an empty slice to checkpoint state later yield {} +# +# class SourceSlack(AbstractSource): +# def _get_authenticator(self, config: Mapping[str, Any]): +# # Added to maintain backward compatibility with previous versions +# if "api_token" in config: +# return TokenAuthenticator(config["api_token"]) +# +# credentials = config.get("credentials", {}) +# credentials_title = credentials.get("option_title") +# if credentials_title == "Default OAuth2.0 authorization": +# return TokenAuthenticator(credentials["access_token"]) +# elif credentials_title == "API Token Credentials": +# return TokenAuthenticator(credentials["api_token"]) +# else: +# raise Exception(f"No supported option_title: {credentials_title} specified. See spec.json for references") +# +# def check_connection(self, logger: AirbyteLogger, config: Mapping[str, Any]) -> Tuple[bool, Optional[Any]]: +# try: +# authenticator = self._get_authenticator(config) +# users_stream = Users(authenticator=authenticator) +# next(users_stream.read_records(SyncMode.full_refresh)) +# return True, None +# except Exception as e: +# return ( +# False, +# f"Got an exception while trying to set up the connection: {e}. " +# f"Most probably, there are no users in the given Slack instance or your token is incorrect", +# ) +# +# def streams(self, config: Mapping[str, Any]) -> List[Stream]: +# authenticator = self._get_authenticator(config) +# default_start_date = pendulum.parse(config["start_date"]) +# # this field is not exposed to spec, used only for testing purposes +# end_date = config.get("end_date") +# end_date = end_date and pendulum.parse(end_date) +# threads_lookback_window = pendulum.Duration(days=config["lookback_window"]) +# channel_filter = config.get("channel_filter", []) +# should_join_to_channels = config.get("join_channels") +# +# channels = Channels(authenticator=authenticator, join_channels=should_join_to_channels, channel_filter=channel_filter) +# streams = [ +# channels, +# ChannelMembers(authenticator=authenticator, channel_filter=channel_filter), +# ChannelMessages( +# parent=channels, +# authenticator=authenticator, +# default_start_date=default_start_date, +# end_date=end_date, +# channel_filter=channel_filter, +# ), +# Threads( +# authenticator=authenticator, +# default_start_date=default_start_date, +# end_date=end_date, +# lookback_window=threads_lookback_window, +# channel_filter=channel_filter, +# ), +# Users(authenticator=authenticator), +# ] +# +# return streams -class SourceSlack(AbstractSource): - def _get_authenticator(self, config: Mapping[str, Any]): - # Added to maintain backward compatibility with previous versions - if "api_token" in config: - return TokenAuthenticator(config["api_token"]) - - credentials = config.get("credentials", {}) - credentials_title = credentials.get("option_title") - if credentials_title == "Default OAuth2.0 authorization": - return TokenAuthenticator(credentials["access_token"]) - elif credentials_title == "API Token Credentials": - return TokenAuthenticator(credentials["api_token"]) - else: - raise Exception(f"No supported option_title: {credentials_title} specified. See spec.json for references") - - def check_connection(self, logger: AirbyteLogger, config: Mapping[str, Any]) -> Tuple[bool, Optional[Any]]: - try: - authenticator = self._get_authenticator(config) - users_stream = Users(authenticator=authenticator) - next(users_stream.read_records(SyncMode.full_refresh)) - return True, None - except Exception as e: - return ( - False, - f"Got an exception while trying to set up the connection: {e}. " - f"Most probably, there are no users in the given Slack instance or your token is incorrect", - ) - - def streams(self, config: Mapping[str, Any]) -> List[Stream]: - authenticator = self._get_authenticator(config) - default_start_date = pendulum.parse(config["start_date"]) - # this field is not exposed to spec, used only for testing purposes - end_date = config.get("end_date") - end_date = end_date and pendulum.parse(end_date) - threads_lookback_window = pendulum.Duration(days=config["lookback_window"]) - channel_filter = config.get("channel_filter", []) - should_join_to_channels = config.get("join_channels") - - channels = Channels(authenticator=authenticator, join_channels=should_join_to_channels, channel_filter=channel_filter) - streams = [ - channels, - ChannelMembers(authenticator=authenticator, channel_filter=channel_filter), - ChannelMessages( - parent=channels, - authenticator=authenticator, - default_start_date=default_start_date, - end_date=end_date, - channel_filter=channel_filter, - ), - Threads( - authenticator=authenticator, - default_start_date=default_start_date, - end_date=end_date, - lookback_window=threads_lookback_window, - channel_filter=channel_filter, - ), - Users(authenticator=authenticator), - ] - - return streams +class SourceSlack(YamlDeclarativeSource): + def __init__(self): + super().__init__(**{"path_to_yaml": "manifest.yaml"}) From 602b92f04f980836c74d254059091cc42523f765 Mon Sep 17 00:00:00 2001 From: Vadym Ratniuk Date: Tue, 20 Feb 2024 20:14:33 +0200 Subject: [PATCH 03/56] added join_channels and channel_filter options --- .../source-slack/source_slack/components.py | 43 +++++------ .../source-slack/source_slack/manifest.yaml | 73 ++++++++++++------- 2 files changed, 67 insertions(+), 49 deletions(-) diff --git a/airbyte-integrations/connectors/source-slack/source_slack/components.py b/airbyte-integrations/connectors/source-slack/source_slack/components.py index 862afcbdc4776..008c894dded68 100644 --- a/airbyte-integrations/connectors/source-slack/source_slack/components.py +++ b/airbyte-integrations/connectors/source-slack/source_slack/components.py @@ -1,13 +1,17 @@ from dataclasses import dataclass from typing import Optional -from typing import List, Mapping, Any +from typing import List, Mapping, Any, Iterable import requests from airbyte_cdk.sources.declarative.types import Config, Record, StreamSlice, StreamState from airbyte_cdk.sources.declarative.extractors import DpathExtractor +from airbyte_cdk.sources.declarative.retrievers import SimpleRetriever from airbyte_cdk.sources.declarative.types import Record from airbyte_cdk.sources.declarative.transformations import RecordTransformation, AddFields from airbyte_cdk.sources.streams.core import Stream from airbyte_cdk.models import AirbyteMessage, SyncMode, Type +from airbyte_cdk.sources.declarative.types import Config, Record, StreamSlice, StreamState +from airbyte_cdk.sources.streams.core import StreamData + @dataclass class ChannelMembersExtractor(DpathExtractor): @@ -18,26 +22,13 @@ def extract_records(self, response: requests.Response) -> List[Record]: records = super().extract_records(response) return [{'member_id': record} for record in records] -# class ChannelsRecordSelector(RecordSelector): -# def select_records( -# self, -# response: requests.Response, -# stream_state: StreamState, -# records_schema: Mapping[str, Any], -# stream_slice: Optional[StreamSlice] = None, -# next_page_token: Optional[Mapping[str, Any]] = None, -# ) -> List[Record]: -# records = super().select_records(response, stream_state, records_schema, stream_slice, next_page_token) -# print(records) -# return records - @dataclass class JoinChannels(RecordTransformation): """ Implementations of this class define transformations that can be applied to records of a stream. """ - join_stream: Stream + def transform( self, record: Record, @@ -46,14 +37,20 @@ def transform( stream_slice: Optional[StreamSlice] = None, ) -> Record: """ sdf """ + print(f"++++++++++CHECK {record['id']} ++++++++++++++++++++++++++++++++++++++++") + # The `is_member` property indicates whether or not the API Bot is already assigned / joined to the channel. + # https://api.slack.com/types/conversation#booleans + channel_id = record.get('id') if config.get('join_channels') and not record.get("is_member"): - print(f"++++++++++add {record['id']} ++++++++++++++++++++++++++++++++++++++++") - self.join_stream.channel_id = record['id'] + response = requests.post( + url='https://slack.com/api/conversations.join', + headers={ + 'Content-Type': 'application/json', + 'Authorization': f'Bearer {config["api_token"]}' + }, + params={'channel': channel_id} + ) + print(response.json()) - for parent_record in self.join_stream.read_records( - sync_mode=SyncMode.full_refresh, cursor_field=None, stream_slice=[], stream_state=None - ): - print("++++++++++++++++++++++++++++++++++++++++++++++++++++") - print(parent_record) - print("++++++++++++++++++++++++++++++++++++++++++++++++++++") + # self.logger.info(f"!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!Successfully joined channel: {channel_id}") diff --git a/airbyte-integrations/connectors/source-slack/source_slack/manifest.yaml b/airbyte-integrations/connectors/source-slack/source_slack/manifest.yaml index 7690f293ea8dd..b8101ab4219e1 100644 --- a/airbyte-integrations/connectors/source-slack/source_slack/manifest.yaml +++ b/airbyte-integrations/connectors/source-slack/source_slack/manifest.yaml @@ -27,6 +27,15 @@ spec: default: true title: Join all channels description: Whether to join all channels or to sync data only from channels the bot is already in. If false, you'll need to manually add the bot to all the channels from which you'd like to sync messages. + channel_filter: + type: array + default: [] + items: + type: string + minLength: 0 + title": "Channel name filter" + description: "A channel name list (without leading '#' char) which limit the channels from which you'd like to sync. Empty list means no filter." + examples: ["channel_one", "channel_two"] additionalProperties: true definitions: @@ -53,35 +62,56 @@ definitions: {{ not response.get("response_metadata", {}).get("next_cursor", {}) }} + + requester: + type: HttpRequester + url_base: https://slack.com/api/ + path: "{{ parameters['path'] }}" + http_method: GET + request_parameters: { } + request_headers: { } + authenticator: + type: BearerAuthenticator + api_token: '{{ config[''api_token''] }}' + request_body_json: { } + retriever: type: SimpleRetriever requester: - type: HttpRequester - url_base: https://slack.com/api/ - path: "{{ parameters['path'] }}" - http_method: GET - request_parameters: { } - request_headers: { } - authenticator: - type: BearerAuthenticator - api_token: '{{ config[''api_token''] }}' - request_body_json: { } + $ref: "#/definitions/requester" + record_selector: + type: RecordSelector + extractor: + type: DpathExtractor + field_path: + - "{{ parameters['field_path'] }}" + paginator: + $ref: "#/definitions/default_paginator" + partition_router: [] + + retriever_filter: + type: SimpleRetriever + requester: + $ref: "#/definitions/requester" record_selector: type: RecordSelector extractor: type: DpathExtractor field_path: - "{{ parameters['field_path'] }}" + record_filter: + type: RecordFilter + condition: "{{ record.id in config.channel_filter or not config.channel_filter }}" paginator: $ref: "#/definitions/default_paginator" partition_router: [] stream_base: + primary_key: "id" retriever: $ref: "#/definitions/retriever" schema_loader: $ref: "#/definitions/schema_loader" - primary_key: "id" users_stream: $ref: "#/definitions/stream_base" @@ -96,6 +126,11 @@ definitions: name: channels path: conversations.list field_path: channels + retriever: + $ref: "#/definitions/retriever_filter" + transformations: + - type: CustomTransformation + class_name: "source_slack.components.JoinChannels" substream_partition_router: type: SubstreamPartitionRouter @@ -109,20 +144,6 @@ definitions: partition_field: channel_id stream: "#/definitions/channels_stream" - channel_join_stream: - $ref: "#/definitions/stream_base" - $parameters: - name: channel_join - path: conversations.join - field_path: channel - retriever: - $ref: "#/definitions/retriever" - requester: - http_method: POST - partition_router: - $ref: "#/definitions/substream_partition_router" - - channel_members_stream: $ref: "#/definitions/stream_base" $parameters: @@ -202,7 +223,7 @@ streams: - "#/definitions/channels_stream" - "#/definitions/channel_members_stream" - "#/definitions/channel_messages_stream" - - "#/definitions/channels_main_stream" + check: type: CheckStream From e84a1c29c8eedeb9a5cd49d30ff7b3556c7d4954 Mon Sep 17 00:00:00 2001 From: Vadym Ratniuk Date: Tue, 20 Feb 2024 22:34:26 +0200 Subject: [PATCH 04/56] added threads --- .../source-slack/source_slack/components.py | 81 +++++++++++++++++++ .../source-slack/source_slack/manifest.yaml | 55 ++++++++++++- 2 files changed, 133 insertions(+), 3 deletions(-) diff --git a/airbyte-integrations/connectors/source-slack/source_slack/components.py b/airbyte-integrations/connectors/source-slack/source_slack/components.py index 008c894dded68..0e2de9f632a3c 100644 --- a/airbyte-integrations/connectors/source-slack/source_slack/components.py +++ b/airbyte-integrations/connectors/source-slack/source_slack/components.py @@ -1,3 +1,4 @@ +import dpath.util from dataclasses import dataclass from typing import Optional from typing import List, Mapping, Any, Iterable @@ -7,6 +8,7 @@ from airbyte_cdk.sources.declarative.retrievers import SimpleRetriever from airbyte_cdk.sources.declarative.types import Record from airbyte_cdk.sources.declarative.transformations import RecordTransformation, AddFields +from airbyte_cdk.sources.declarative.partition_routers import SubstreamPartitionRouter from airbyte_cdk.sources.streams.core import Stream from airbyte_cdk.models import AirbyteMessage, SyncMode, Type from airbyte_cdk.sources.declarative.types import Config, Record, StreamSlice, StreamState @@ -54,3 +56,82 @@ def transform( # self.logger.info(f"!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!Successfully joined channel: {channel_id}") + +@dataclass +class ThreadsPartitionRouter(SubstreamPartitionRouter): + + def get_request_params( + self, + stream_state: Optional[StreamState] = None, + stream_slice: Optional[StreamSlice] = None, + next_page_token: Optional[Mapping[str, Any]] = None, + ) -> Mapping[str, Any]: + return { + 'channel': stream_slice['channel_id'], + 'ts': stream_slice['ts'], + } + + def stream_slices(self) -> Iterable[StreamSlice]: + """ + Iterate over each parent stream's record and create a StreamSlice for each record. + + For each stream, iterate over its stream_slices. + For each stream slice, iterate over each record. + yield a stream slice for each such records. + + If a parent slice contains no record, emit a slice with parent_record=None. + + The template string can interpolate the following values: + - parent_stream_slice: mapping representing the parent's stream slice + - parent_record: mapping representing the parent record + - parent_stream_name: string representing the parent stream name + """ + if not self.parent_stream_configs: + yield from [] + else: + for parent_stream_config in self.parent_stream_configs: + parent_stream = parent_stream_config.stream + parent_field = parent_stream_config.parent_key.eval(self.config) + stream_state_field = parent_stream_config.partition_field.eval(self.config) + for parent_stream_slice in parent_stream.stream_slices( + sync_mode=SyncMode.full_refresh, cursor_field=None, stream_state=None + ): + print("@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@") + print("parent_stream_slice") + print(parent_stream_slice) + empty_parent_slice = True + parent_slice = parent_stream_slice + + for parent_record in parent_stream.read_records( + sync_mode=SyncMode.full_refresh, cursor_field=None, stream_slice=parent_stream_slice, stream_state=None + ): + print("@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@") + print("parent_record") + print(parent_record) + # Skip non-records (eg AirbyteLogMessage) + if isinstance(parent_record, AirbyteMessage): + if parent_record.type == Type.RECORD: + parent_record = parent_record.record.data + else: + continue + elif isinstance(parent_record, Record): + parent_record = parent_record.data + # try: + # stream_state_value = dpath.util.get(parent_record, parent_field) + # except KeyError: + # pass + # else: + empty_parent_slice = False + print("@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@") + print("@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@") + print("@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@") + print(parent_record) + print("@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@") + yield { + 'channel_id': parent_record['channel_id'], + 'ts': parent_record['ts'], + "parent_slice": parent_slice + } + # If the parent slice contains no records, + if empty_parent_slice: + yield from [] diff --git a/airbyte-integrations/connectors/source-slack/source_slack/manifest.yaml b/airbyte-integrations/connectors/source-slack/source_slack/manifest.yaml index b8101ab4219e1..7efe038d84a44 100644 --- a/airbyte-integrations/connectors/source-slack/source_slack/manifest.yaml +++ b/airbyte-integrations/connectors/source-slack/source_slack/manifest.yaml @@ -132,7 +132,7 @@ definitions: - type: CustomTransformation class_name: "source_slack.components.JoinChannels" - substream_partition_router: + channels_partition_router: type: SubstreamPartitionRouter parent_stream_configs: - type: ParentStreamConfig @@ -156,7 +156,7 @@ definitions: retriever: $ref: "#/definitions/retriever" partition_router: - $ref: "#/definitions/substream_partition_router" + $ref: "#/definitions/channels_partition_router" record_selector: type: RecordSelector extractor: @@ -181,7 +181,7 @@ definitions: retriever: $ref: "#/definitions/retriever" partition_router: - $ref: "#/definitions/substream_partition_router" + $ref: "#/definitions/channels_partition_router" incremental_sync: type: DatetimeBasedCursor cursor_field: float_ts @@ -218,11 +218,60 @@ definitions: - channel_id value: '{{ stream_partition[''channel_id''] }}' + channel_messages_partition_router: + type: SubstreamPartitionRouter + parent_stream_configs: + - type: ParentStreamConfig + parent_key: id + request_option: + type: RequestOption + field_name: channel + inject_into: request_parameter + partition_field: channel_id + stream: "#/definitions/channel_messages_stream" + + threads_stream: + $ref: "#/definitions/stream_base" + $parameters: + name: threads + path: conversations.replies + field_path: messages + primary_key: + - channel_id + - ts + retriever: + type: SimpleRetriever + requester: + $ref: "#/definitions/requester" + record_selector: + type: RecordSelector + extractor: + type: DpathExtractor + field_path: + - "{{ parameters['field_path'] }}" + paginator: + $ref: "#/definitions/default_paginator" + + partition_router: + class_name: "source_slack.components.ThreadsPartitionRouter" + parent_stream_configs: + - type: ParentStreamConfig + stream: "#/definitions/channel_messages_stream" + parent_key: channel_id + partition_field: channel_id + request_option: + type: RequestOption + field_name: channel + inject_into: request_parameter + + + streams: - "#/definitions/users_stream" - "#/definitions/channels_stream" - "#/definitions/channel_members_stream" - "#/definitions/channel_messages_stream" + - "#/definitions/threads_stream" check: From 53718a175ff431d6cc9cd1d25dea0714f9c2644d Mon Sep 17 00:00:00 2001 From: Vadym Ratniuk Date: Tue, 20 Feb 2024 23:15:37 +0200 Subject: [PATCH 05/56] inclusive params and lookback_window --- .../source-slack/source_slack/manifest.yaml | 30 ++++++++----------- 1 file changed, 13 insertions(+), 17 deletions(-) diff --git a/airbyte-integrations/connectors/source-slack/source_slack/manifest.yaml b/airbyte-integrations/connectors/source-slack/source_slack/manifest.yaml index 7efe038d84a44..27c398358f993 100644 --- a/airbyte-integrations/connectors/source-slack/source_slack/manifest.yaml +++ b/airbyte-integrations/connectors/source-slack/source_slack/manifest.yaml @@ -36,6 +36,14 @@ spec: title": "Channel name filter" description: "A channel name list (without leading '#' char) which limit the channels from which you'd like to sync. Empty list means no filter." examples: ["channel_one", "channel_two"] + lookback_window: + type: "integer", + title: "Threads Lookback window (Days)", + description: "How far into the past to look for messages in threads, default is 0 days", + examples: [7, 14], + minimum: 0, + default: 0, + maximum: 365 additionalProperties: true definitions: @@ -180,6 +188,10 @@ definitions: - ts retriever: $ref: "#/definitions/retriever" + requester: + $ref: "#/definitions/requester" + request_parameters: + inclusive: 'True' partition_router: $ref: "#/definitions/channels_partition_router" incremental_sync: @@ -218,18 +230,6 @@ definitions: - channel_id value: '{{ stream_partition[''channel_id''] }}' - channel_messages_partition_router: - type: SubstreamPartitionRouter - parent_stream_configs: - - type: ParentStreamConfig - parent_key: id - request_option: - type: RequestOption - field_name: channel - inject_into: request_parameter - partition_field: channel_id - stream: "#/definitions/channel_messages_stream" - threads_stream: $ref: "#/definitions/stream_base" $parameters: @@ -251,21 +251,18 @@ definitions: - "{{ parameters['field_path'] }}" paginator: $ref: "#/definitions/default_paginator" - partition_router: class_name: "source_slack.components.ThreadsPartitionRouter" parent_stream_configs: - type: ParentStreamConfig stream: "#/definitions/channel_messages_stream" parent_key: channel_id - partition_field: channel_id + partition_field: channel request_option: type: RequestOption field_name: channel inject_into: request_parameter - - streams: - "#/definitions/users_stream" - "#/definitions/channels_stream" @@ -273,7 +270,6 @@ streams: - "#/definitions/channel_messages_stream" - "#/definitions/threads_stream" - check: type: CheckStream stream_names: From e7a034f2eb1be06d9a3bc2b929d3477a3d38268d Mon Sep 17 00:00:00 2001 From: Vadym Ratniuk Date: Wed, 21 Feb 2024 00:25:41 +0200 Subject: [PATCH 06/56] use_lookback_window handle --- .../source-slack/source_slack/manifest.yaml | 30 ++++++++++++++----- 1 file changed, 23 insertions(+), 7 deletions(-) diff --git a/airbyte-integrations/connectors/source-slack/source_slack/manifest.yaml b/airbyte-integrations/connectors/source-slack/source_slack/manifest.yaml index 27c398358f993..fc8b9465a4baf 100644 --- a/airbyte-integrations/connectors/source-slack/source_slack/manifest.yaml +++ b/airbyte-integrations/connectors/source-slack/source_slack/manifest.yaml @@ -37,12 +37,12 @@ spec: description: "A channel name list (without leading '#' char) which limit the channels from which you'd like to sync. Empty list means no filter." examples: ["channel_one", "channel_two"] lookback_window: - type: "integer", - title: "Threads Lookback window (Days)", - description: "How far into the past to look for messages in threads, default is 0 days", - examples: [7, 14], - minimum: 0, - default: 0, + type: "integer" + title: "Threads Lookback window (Days)" + description: "How far into the past to look for messages in threads, default is 0 days" + examples: [7, 14] + minimum: 0 + default: 0 maximum: 365 additionalProperties: true @@ -183,6 +183,7 @@ definitions: name: channel_messages path: conversations.history field_path: messages + use_lookback_window: false primary_key: - channel_id - ts @@ -197,6 +198,7 @@ definitions: incremental_sync: type: DatetimeBasedCursor cursor_field: float_ts + lookback_window: "P{{ config['lookback_window'] if parameters.use_lookback_window else 0 }}D" cursor_datetime_formats: - '%s' datetime_format: '%s' @@ -230,6 +232,14 @@ definitions: - channel_id value: '{{ stream_partition[''channel_id''] }}' + channel_messages_stream_lookback: + $ref: "#/definitions/stream_base" + $parameters: + name: channel_messages + path: conversations.history + field_path: messages + lookback_window: 0 + threads_stream: $ref: "#/definitions/stream_base" $parameters: @@ -255,7 +265,13 @@ definitions: class_name: "source_slack.components.ThreadsPartitionRouter" parent_stream_configs: - type: ParentStreamConfig - stream: "#/definitions/channel_messages_stream" + stream: + $ref: "#/definitions/channel_messages_stream" + $parameters: + name: channel_messages + path: conversations.history + field_path: messages + use_lookback_window: true parent_key: channel_id partition_field: channel request_option: From 3788cc25fa807d01d7e12253113c96203e5855f3 Mon Sep 17 00:00:00 2001 From: Vadym Ratniuk Date: Wed, 21 Feb 2024 11:52:37 +0200 Subject: [PATCH 07/56] refactoring --- .../source-slack/source_slack/components.py | 57 ++++++------------- 1 file changed, 17 insertions(+), 40 deletions(-) diff --git a/airbyte-integrations/connectors/source-slack/source_slack/components.py b/airbyte-integrations/connectors/source-slack/source_slack/components.py index 0e2de9f632a3c..d3581bba71ea7 100644 --- a/airbyte-integrations/connectors/source-slack/source_slack/components.py +++ b/airbyte-integrations/connectors/source-slack/source_slack/components.py @@ -1,24 +1,20 @@ -import dpath.util from dataclasses import dataclass from typing import Optional from typing import List, Mapping, Any, Iterable import requests +from airbyte_cdk.models import AirbyteMessage, SyncMode, Type from airbyte_cdk.sources.declarative.types import Config, Record, StreamSlice, StreamState from airbyte_cdk.sources.declarative.extractors import DpathExtractor -from airbyte_cdk.sources.declarative.retrievers import SimpleRetriever -from airbyte_cdk.sources.declarative.types import Record from airbyte_cdk.sources.declarative.transformations import RecordTransformation, AddFields from airbyte_cdk.sources.declarative.partition_routers import SubstreamPartitionRouter -from airbyte_cdk.sources.streams.core import Stream -from airbyte_cdk.models import AirbyteMessage, SyncMode, Type -from airbyte_cdk.sources.declarative.types import Config, Record, StreamSlice, StreamState -from airbyte_cdk.sources.streams.core import StreamData @dataclass class ChannelMembersExtractor(DpathExtractor): """ - Record extractor that extracts record of the form from activity logs stream: + Transform response from list of strings to list dicts: + from: ['aa', 'bb'] + to: [{'member_id': 'aa'}, {{'member_id': 'bb'}] """ def extract_records(self, response: requests.Response) -> List[Record]: records = super().extract_records(response) @@ -28,7 +24,8 @@ def extract_records(self, response: requests.Response) -> List[Record]: @dataclass class JoinChannels(RecordTransformation): """ - Implementations of this class define transformations that can be applied to records of a stream. + Make 'conversations.join' POST request for every found channel id + if we are not still a member of such channel """ def transform( @@ -38,8 +35,6 @@ def transform( stream_state: Optional[StreamState] = None, stream_slice: Optional[StreamSlice] = None, ) -> Record: - """ sdf """ - print(f"++++++++++CHECK {record['id']} ++++++++++++++++++++++++++++++++++++++++") # The `is_member` property indicates whether or not the API Bot is already assigned / joined to the channel. # https://api.slack.com/types/conversation#booleans channel_id = record.get('id') @@ -54,12 +49,15 @@ def transform( ) print(response.json()) - # self.logger.info(f"!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!Successfully joined channel: {channel_id}") + # WHAT TO DO IF IT FAILS ???????????????????????? + # self.logger.info(f"!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!Successfully joined channel: {channel_id}") @dataclass class ThreadsPartitionRouter(SubstreamPartitionRouter): - + """Overwrite SubstreamPartitionRouter to be able to pass more than one value + from parent stream to stream_slices + """ def get_request_params( self, stream_state: Optional[StreamState] = None, @@ -73,18 +71,8 @@ def get_request_params( def stream_slices(self) -> Iterable[StreamSlice]: """ - Iterate over each parent stream's record and create a StreamSlice for each record. - - For each stream, iterate over its stream_slices. - For each stream slice, iterate over each record. - yield a stream slice for each such records. - - If a parent slice contains no record, emit a slice with parent_record=None. - - The template string can interpolate the following values: - - parent_stream_slice: mapping representing the parent's stream slice - - parent_record: mapping representing the parent record - - parent_stream_name: string representing the parent stream name + Change behaviour of main stream_slices by adding two values (for channel_id, ts) from parent stream + (previously it was possible to add only one value) """ if not self.parent_stream_configs: yield from [] @@ -96,17 +84,14 @@ def stream_slices(self) -> Iterable[StreamSlice]: for parent_stream_slice in parent_stream.stream_slices( sync_mode=SyncMode.full_refresh, cursor_field=None, stream_state=None ): - print("@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@") - print("parent_stream_slice") - print(parent_stream_slice) + empty_parent_slice = True parent_slice = parent_stream_slice for parent_record in parent_stream.read_records( sync_mode=SyncMode.full_refresh, cursor_field=None, stream_slice=parent_stream_slice, stream_state=None ): - print("@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@") - print("parent_record") + print(parent_record) # Skip non-records (eg AirbyteLogMessage) if isinstance(parent_record, AirbyteMessage): @@ -116,17 +101,9 @@ def stream_slices(self) -> Iterable[StreamSlice]: continue elif isinstance(parent_record, Record): parent_record = parent_record.data - # try: - # stream_state_value = dpath.util.get(parent_record, parent_field) - # except KeyError: - # pass - # else: + empty_parent_slice = False - print("@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@") - print("@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@") - print("@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@") - print(parent_record) - print("@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@") + yield { 'channel_id': parent_record['channel_id'], 'ts': parent_record['ts'], From 44ef20507031405684a93138f91d8b2d3c5392c7 Mon Sep 17 00:00:00 2001 From: Vadym Ratniuk Date: Wed, 21 Feb 2024 11:54:49 +0200 Subject: [PATCH 08/56] refactoring --- .../connectors/source-slack/source_slack/manifest.yaml | 1 - 1 file changed, 1 deletion(-) diff --git a/airbyte-integrations/connectors/source-slack/source_slack/manifest.yaml b/airbyte-integrations/connectors/source-slack/source_slack/manifest.yaml index fc8b9465a4baf..486a3c4eecd47 100644 --- a/airbyte-integrations/connectors/source-slack/source_slack/manifest.yaml +++ b/airbyte-integrations/connectors/source-slack/source_slack/manifest.yaml @@ -70,7 +70,6 @@ definitions: {{ not response.get("response_metadata", {}).get("next_cursor", {}) }} - requester: type: HttpRequester url_base: https://slack.com/api/ From 21a2e821e4c67e15ee087b851655a5c0387ee524 Mon Sep 17 00:00:00 2001 From: Vadym Ratniuk Date: Mon, 26 Feb 2024 18:25:20 +0200 Subject: [PATCH 09/56] added channel_id to threads --- .../connectors/source-slack/source_slack/manifest.yaml | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/airbyte-integrations/connectors/source-slack/source_slack/manifest.yaml b/airbyte-integrations/connectors/source-slack/source_slack/manifest.yaml index 486a3c4eecd47..d74b77205fcee 100644 --- a/airbyte-integrations/connectors/source-slack/source_slack/manifest.yaml +++ b/airbyte-integrations/connectors/source-slack/source_slack/manifest.yaml @@ -277,6 +277,12 @@ definitions: type: RequestOption field_name: channel inject_into: request_parameter + transformations: + - type: AddFields + fields: + - path: + - channel_id + value: '{{ stream_partition[''channel_id''] }}' streams: - "#/definitions/users_stream" From 175970d4a9f582ed420dbf85109fcd08725ff3d5 Mon Sep 17 00:00:00 2001 From: Vadym Ratniuk Date: Tue, 27 Feb 2024 02:21:02 +0200 Subject: [PATCH 10/56] added oauth2 support, fixed expected records, added error handler --- .../source-slack/acceptance-test-config.yml | 4 +- .../integration_tests/expected_records.jsonl | 12 +- .../connectors/source-slack/metadata.yaml | 2 +- .../source-slack/source_slack/components.py | 46 ++++- .../source-slack/source_slack/manifest.yaml | 169 +++++++++++++++--- .../source-slack/source_slack/spec.json | 2 +- docs/integrations/sources/slack.md | 65 +++---- 7 files changed, 229 insertions(+), 71 deletions(-) diff --git a/airbyte-integrations/connectors/source-slack/acceptance-test-config.yml b/airbyte-integrations/connectors/source-slack/acceptance-test-config.yml index b94b7cf70bc01..e8aaa74fc7dfa 100644 --- a/airbyte-integrations/connectors/source-slack/acceptance-test-config.yml +++ b/airbyte-integrations/connectors/source-slack/acceptance-test-config.yml @@ -6,7 +6,9 @@ acceptance_tests: - spec_path: "source_slack/spec.json" backward_compatibility_tests_config: # edited `min`/`max` > `minimum`/`maximum` for `lookback_window` field - disable_for_version: "0.1.26" + #disable_for_version: "0.1.26" + # slight changes: removed doc url, added new null oauth param + disable_for_version: "0.3.10" connection: tests: - config_path: "secrets/config.json" diff --git a/airbyte-integrations/connectors/source-slack/integration_tests/expected_records.jsonl b/airbyte-integrations/connectors/source-slack/integration_tests/expected_records.jsonl index e5b6c79113767..8f08a81c9d361 100644 --- a/airbyte-integrations/connectors/source-slack/integration_tests/expected_records.jsonl +++ b/airbyte-integrations/connectors/source-slack/integration_tests/expected_records.jsonl @@ -4,12 +4,12 @@ {"stream": "channel_members", "data": {"member_id": "U04L65GPMKN", "channel_id": "C04KX3KEZ54"}, "emitted_at": 1707568736171} {"stream": "channel_members", "data": {"member_id": "U04LY6NARHU", "channel_id": "C04KX3KEZ54"}, "emitted_at": 1707568736172} {"stream": "channel_members", "data": {"member_id": "U04M23SBJGM", "channel_id": "C04KX3KEZ54"}, "emitted_at": 1707568736172} -{"stream": "channel_messages", "data": {"client_msg_id": "3ae60d35-58b8-441c-923a-75de35a4ed8a", "type": "message", "text": "Test Thread 2", "user": "U04L65GPMKN", "ts": "1683104542.931169", "blocks": [{"type": "rich_text", "block_id": "WLB", "elements": [{"type": "rich_text_section", "elements": [{"type": "text", "text": "Test Thread 2"}]}]}], "team": "T04KX3KDDU6", "thread_ts": "1683104542.931169", "reply_count": 2, "reply_users_count": 1, "latest_reply": "1683104568.059569", "reply_users": ["U04L65GPMKN"], "is_locked": false, "subscribed": true, "last_read": "1683104568.059569", "channel_id": "C04KX3KEZ54", "float_ts": 1683104542.931169}, "emitted_at": 1707568738170} -{"stream": "channel_messages", "data": {"client_msg_id": "e27672c0-451e-42a6-8eff-a14d2db8ac1e", "type": "message", "text": "Test Thread 1", "user": "U04L65GPMKN", "ts": "1683104499.808709", "blocks": [{"type": "rich_text", "block_id": "0j7", "elements": [{"type": "rich_text_section", "elements": [{"type": "text", "text": "Test Thread 1"}]}]}], "team": "T04KX3KDDU6", "thread_ts": "1683104499.808709", "reply_count": 2, "reply_users_count": 1, "latest_reply": "1683104528.084359", "reply_users": ["U04L65GPMKN"], "is_locked": false, "subscribed": true, "last_read": "1683104528.084359", "channel_id": "C04LTCM2Y56", "float_ts": 1683104499.808709}, "emitted_at": 1707569060525} -{"stream": "channel_messages", "data": {"type": "message", "subtype": "reminder_add", "text": " set up a reminder \u201ctest reminder\u201d in this channel at 9AM tomorrow, Eastern European Summer Time.", "user": "U04L65GPMKN", "ts": "1695814864.744249", "channel_id": "C04LTCM2Y56", "float_ts": 1695814864.744249}, "emitted_at": 1707569208689} -{"stream": "threads", "data": {"client_msg_id": "3ae60d35-58b8-441c-923a-75de35a4ed8a", "type": "message", "text": "Test Thread 2", "user": "U04L65GPMKN", "ts": "1683104542.931169", "blocks": [{"type": "rich_text", "block_id": "WLB", "elements": [{"type": "rich_text_section", "elements": [{"type": "text", "text": "Test Thread 2"}]}]}], "team": "T04KX3KDDU6", "thread_ts": "1683104542.931169", "reply_count": 2, "reply_users_count": 1, "latest_reply": "1683104568.059569", "reply_users": ["U04L65GPMKN"], "is_locked": false, "subscribed": true, "last_read": "1683104568.059569", "channel_id": "C04KX3KEZ54", "float_ts": 1683104542.931169}, "emitted_at": 1707569354932} -{"stream": "threads", "data": {"client_msg_id": "3e96d351-270c-493f-a1a0-fdc3c4c0e11f", "type": "message", "text": "<@U04M23SBJGM> test test test", "user": "U04L65GPMKN", "ts": "1683104559.922849", "blocks": [{"type": "rich_text", "block_id": "tX6vr", "elements": [{"type": "rich_text_section", "elements": [{"type": "user", "user_id": "U04M23SBJGM"}, {"type": "text", "text": " test test test"}]}]}], "team": "T04KX3KDDU6", "thread_ts": "1683104542.931169", "parent_user_id": "U04L65GPMKN", "channel_id": "C04KX3KEZ54", "float_ts": 1683104559.922849}, "emitted_at": 1707569354933} -{"stream": "threads", "data": {"client_msg_id": "08023e44-9d18-41ed-81dd-5f04ed699656", "type": "message", "text": "<@U04LY6NARHU> test test", "user": "U04L65GPMKN", "ts": "1683104568.059569", "blocks": [{"type": "rich_text", "block_id": "IyUF", "elements": [{"type": "rich_text_section", "elements": [{"type": "user", "user_id": "U04LY6NARHU"}, {"type": "text", "text": " test test"}]}]}], "team": "T04KX3KDDU6", "thread_ts": "1683104542.931169", "parent_user_id": "U04L65GPMKN", "channel_id": "C04KX3KEZ54", "float_ts": 1683104568.059569}, "emitted_at": 1707569354933} +{"stream": "channel_messages", "data": {"client_msg_id": "3ae60d35-58b8-441c-923a-75de35a4ed8a", "type": "message", "text": "Test Thread 2", "user": "U04L65GPMKN", "ts": "1683104542.931169", "blocks": [{"type": "rich_text", "block_id": "WLB", "elements": [{"type": "rich_text_section", "elements": [{"type": "text", "text": "Test Thread 2"}]}]}], "team": "T04KX3KDDU6", "thread_ts": "1683104542.931169", "reply_count": 2, "reply_users_count": 1, "latest_reply": "1683104568.059569", "reply_users": ["U04L65GPMKN"], "is_locked": false, "subscribed": true, "last_read": "1683104568.059569", "channel_id": "C04KX3KEZ54", "float_ts": 1683104542}, "emitted_at": 1707568738170} +{"stream": "channel_messages", "data": {"client_msg_id": "e27672c0-451e-42a6-8eff-a14d2db8ac1e", "type": "message", "text": "Test Thread 1", "user": "U04L65GPMKN", "ts": "1683104499.808709", "blocks": [{"type": "rich_text", "block_id": "0j7", "elements": [{"type": "rich_text_section", "elements": [{"type": "text", "text": "Test Thread 1"}]}]}], "team": "T04KX3KDDU6", "thread_ts": "1683104499.808709", "reply_count": 2, "reply_users_count": 1, "latest_reply": "1683104528.084359", "reply_users": ["U04L65GPMKN"], "is_locked": false, "subscribed": true, "last_read": "1683104528.084359", "channel_id": "C04LTCM2Y56", "float_ts": 1683104499}, "emitted_at": 1707569060525} +{"stream": "channel_messages", "data": {"type": "message", "subtype": "reminder_add", "text": " set up a reminder \u201ctest reminder\u201d in this channel at 9AM tomorrow, Eastern European Summer Time.", "user": "U04L65GPMKN", "ts": "1695814864.744249", "channel_id": "C04LTCM2Y56", "float_ts": 1695814864}, "emitted_at": 1707569208689} +{"stream": "threads", "data": {"client_msg_id": "3ae60d35-58b8-441c-923a-75de35a4ed8a", "type": "message", "text": "Test Thread 2", "user": "U04L65GPMKN", "ts": "1683104542.931169", "blocks": [{"type": "rich_text", "block_id": "WLB", "elements": [{"type": "rich_text_section", "elements": [{"type": "text", "text": "Test Thread 2"}]}]}], "team": "T04KX3KDDU6", "thread_ts": "1683104542.931169", "reply_count": 2, "reply_users_count": 1, "latest_reply": "1683104568.059569", "reply_users": ["U04L65GPMKN"], "is_locked": false, "subscribed": true, "last_read": "1683104568.059569", "channel_id": "C04KX3KEZ54", "float_ts": 1683104542}, "emitted_at": 1707569354932} +{"stream": "threads", "data": {"client_msg_id": "3e96d351-270c-493f-a1a0-fdc3c4c0e11f", "type": "message", "text": "<@U04M23SBJGM> test test test", "user": "U04L65GPMKN", "ts": "1683104559.922849", "blocks": [{"type": "rich_text", "block_id": "tX6vr", "elements": [{"type": "rich_text_section", "elements": [{"type": "user", "user_id": "U04M23SBJGM"}, {"type": "text", "text": " test test test"}]}]}], "team": "T04KX3KDDU6", "thread_ts": "1683104542.931169", "parent_user_id": "U04L65GPMKN", "channel_id": "C04KX3KEZ54", "float_ts": 1683104559}, "emitted_at": 1707569354933} +{"stream": "threads", "data": {"client_msg_id": "08023e44-9d18-41ed-81dd-5f04ed699656", "type": "message", "text": "<@U04LY6NARHU> test test", "user": "U04L65GPMKN", "ts": "1683104568.059569", "blocks": [{"type": "rich_text", "block_id": "IyUF", "elements": [{"type": "rich_text_section", "elements": [{"type": "user", "user_id": "U04LY6NARHU"}, {"type": "text", "text": " test test"}]}]}], "team": "T04KX3KDDU6", "thread_ts": "1683104542.931169", "parent_user_id": "U04L65GPMKN", "channel_id": "C04KX3KEZ54", "float_ts": 1683104568}, "emitted_at": 1707569354933} {"stream": "users", "data": {"id": "USLACKBOT", "team_id": "T04KX3KDDU6", "name": "slackbot", "deleted": false, "color": "757575", "real_name": "Slackbot", "tz": "America/Los_Angeles", "tz_label": "Pacific Standard Time", "tz_offset": -28800, "profile": {"title": "", "phone": "", "skype": "", "real_name": "Slackbot", "real_name_normalized": "Slackbot", "display_name": "Slackbot", "display_name_normalized": "Slackbot", "fields": {}, "status_text": "", "status_emoji": "", "status_emoji_display_info": [], "status_expiration": 0, "avatar_hash": "sv41d8cd98f0", "always_active": true, "first_name": "slackbot", "last_name": "", "image_24": "https://a.slack-edge.com/80588/img/slackbot_24.png", "image_32": "https://a.slack-edge.com/80588/img/slackbot_32.png", "image_48": "https://a.slack-edge.com/80588/img/slackbot_48.png", "image_72": "https://a.slack-edge.com/80588/img/slackbot_72.png", "image_192": "https://a.slack-edge.com/80588/marketing/img/avatars/slackbot/avatar-slackbot.png", "image_512": "https://a.slack-edge.com/80588/img/slackbot_512.png", "status_text_canonical": "", "team": "T04KX3KDDU6"}, "is_admin": false, "is_owner": false, "is_primary_owner": false, "is_restricted": false, "is_ultra_restricted": false, "is_bot": false, "is_app_user": false, "updated": 0, "is_email_confirmed": false, "who_can_share_contact_card": "EVERYONE"}, "emitted_at": 1707569357949} {"stream": "users", "data": {"id": "U04KUMXNYMV", "team_id": "T04KX3KDDU6", "name": "deactivateduser693438", "deleted": true, "profile": {"title": "", "phone": "", "skype": "", "real_name": "Deactivated User", "real_name_normalized": "Deactivated User", "display_name": "deactivateduser", "display_name_normalized": "deactivateduser", "fields": null, "status_text": "", "status_emoji": "", "status_emoji_display_info": [], "status_expiration": 0, "avatar_hash": "g849cc56ed76", "huddle_state": "default_unset", "first_name": "Deactivated", "last_name": "User", "image_24": "https://secure.gravatar.com/avatar/d5320ceddda202563fd9e6222c07c00a.jpg?s=24&d=https%3A%2F%2Fa.slack-edge.com%2Fdf10d%2Fimg%2Favatars%2Fava_0011-24.png", "image_32": "https://secure.gravatar.com/avatar/d5320ceddda202563fd9e6222c07c00a.jpg?s=32&d=https%3A%2F%2Fa.slack-edge.com%2Fdf10d%2Fimg%2Favatars%2Fava_0011-32.png", "image_48": "https://secure.gravatar.com/avatar/d5320ceddda202563fd9e6222c07c00a.jpg?s=48&d=https%3A%2F%2Fa.slack-edge.com%2Fdf10d%2Fimg%2Favatars%2Fava_0011-48.png", "image_72": "https://secure.gravatar.com/avatar/d5320ceddda202563fd9e6222c07c00a.jpg?s=72&d=https%3A%2F%2Fa.slack-edge.com%2Fdf10d%2Fimg%2Favatars%2Fava_0011-72.png", "image_192": "https://secure.gravatar.com/avatar/d5320ceddda202563fd9e6222c07c00a.jpg?s=192&d=https%3A%2F%2Fa.slack-edge.com%2Fdf10d%2Fimg%2Favatars%2Fava_0011-192.png", "image_512": "https://secure.gravatar.com/avatar/d5320ceddda202563fd9e6222c07c00a.jpg?s=512&d=https%3A%2F%2Fa.slack-edge.com%2Fdf10d%2Fimg%2Favatars%2Fava_0011-512.png", "status_text_canonical": "", "team": "T04KX3KDDU6"}, "is_bot": false, "is_app_user": false, "updated": 1675090804, "is_forgotten": true, "is_invited_user": true}, "emitted_at": 1707569357951} {"stream": "users", "data": {"id": "U04L2KY5CES", "team_id": "T04KX3KDDU6", "name": "deactivateduser686066", "deleted": true, "profile": {"title": "", "phone": "", "skype": "", "real_name": "Deactivated User", "real_name_normalized": "Deactivated User", "display_name": "deactivateduser", "display_name_normalized": "deactivateduser", "fields": null, "status_text": "", "status_emoji": "", "status_emoji_display_info": [], "status_expiration": 0, "avatar_hash": "g849cc56ed76", "huddle_state": "default_unset", "first_name": "Deactivated", "last_name": "User", "image_24": "https://secure.gravatar.com/avatar/cacb225265b3b19c4e72029a62cf1ef1.jpg?s=24&d=https%3A%2F%2Fa.slack-edge.com%2Fdf10d%2Fimg%2Favatars%2Fava_0009-24.png", "image_32": "https://secure.gravatar.com/avatar/cacb225265b3b19c4e72029a62cf1ef1.jpg?s=32&d=https%3A%2F%2Fa.slack-edge.com%2Fdf10d%2Fimg%2Favatars%2Fava_0009-32.png", "image_48": "https://secure.gravatar.com/avatar/cacb225265b3b19c4e72029a62cf1ef1.jpg?s=48&d=https%3A%2F%2Fa.slack-edge.com%2Fdf10d%2Fimg%2Favatars%2Fava_0009-48.png", "image_72": "https://secure.gravatar.com/avatar/cacb225265b3b19c4e72029a62cf1ef1.jpg?s=72&d=https%3A%2F%2Fa.slack-edge.com%2Fdf10d%2Fimg%2Favatars%2Fava_0009-72.png", "image_192": "https://secure.gravatar.com/avatar/cacb225265b3b19c4e72029a62cf1ef1.jpg?s=192&d=https%3A%2F%2Fa.slack-edge.com%2Fdf10d%2Fimg%2Favatars%2Fava_0009-192.png", "image_512": "https://secure.gravatar.com/avatar/cacb225265b3b19c4e72029a62cf1ef1.jpg?s=512&d=https%3A%2F%2Fa.slack-edge.com%2Fdf10d%2Fimg%2Favatars%2Fava_0009-512.png", "status_text_canonical": "", "team": "T04KX3KDDU6"}, "is_bot": false, "is_app_user": false, "updated": 1675090785, "is_forgotten": true, "is_invited_user": true}, "emitted_at": 1707569357951} diff --git a/airbyte-integrations/connectors/source-slack/metadata.yaml b/airbyte-integrations/connectors/source-slack/metadata.yaml index 06e16de8b167e..9a5ec30bad5e0 100644 --- a/airbyte-integrations/connectors/source-slack/metadata.yaml +++ b/airbyte-integrations/connectors/source-slack/metadata.yaml @@ -10,7 +10,7 @@ data: connectorSubtype: api connectorType: source definitionId: c2281cee-86f9-4a86-bb48-d23286b4c7bd - dockerImageTag: 0.3.9 + dockerImageTag: 0.3.10 dockerRepository: airbyte/source-slack documentationUrl: https://docs.airbyte.com/integrations/sources/slack githubIssueLabel: source-slack diff --git a/airbyte-integrations/connectors/source-slack/source_slack/components.py b/airbyte-integrations/connectors/source-slack/source_slack/components.py index d3581bba71ea7..db69ead6eb028 100644 --- a/airbyte-integrations/connectors/source-slack/source_slack/components.py +++ b/airbyte-integrations/connectors/source-slack/source_slack/components.py @@ -2,15 +2,57 @@ from typing import Optional from typing import List, Mapping, Any, Iterable import requests -from airbyte_cdk.models import AirbyteMessage, SyncMode, Type + +from airbyte_cdk.sources.declarative.auth.declarative_authenticator import DeclarativeAuthenticator +from airbyte_cdk.sources.declarative.auth.token import BearerAuthenticator +from airbyte_cdk.models import AirbyteMessage, SyncMode, Type, FailureType from airbyte_cdk.sources.declarative.types import Config, Record, StreamSlice, StreamState from airbyte_cdk.sources.declarative.extractors import DpathExtractor from airbyte_cdk.sources.declarative.transformations import RecordTransformation, AddFields from airbyte_cdk.sources.declarative.partition_routers import SubstreamPartitionRouter +from airbyte_cdk.utils.traced_exception import AirbyteTracedException + +@dataclass +class SlackAuthenticator(DeclarativeAuthenticator): + config: Mapping[str, Any] + api_token_auth: BearerAuthenticator + access_token_auth: BearerAuthenticator + + def __new__(cls, api_token_auth, access_token_auth, config, *args, **kwargs): + return api_token_auth if config["credentials"]["option_title"] == "API Token Credentials" else access_token_auth + + +@dataclass +class SlackDpathExtractor(DpathExtractor): + """ + Handle error from Slack API: + { + "body": "{\"ok\":false,\"error\":\"invalid_auth\"}", + "status": "200" + } + """ + def extract_records(self, response: requests.Response) -> List[Record]: + response_body = self.decoder.decode(response) + if not response_body.get('ok'): + error_message = response_body.get('error') + message = f"Request failed with error: {error_message}" + if 'invalid_auth' in error_message: + raise AirbyteTracedException( + message='Authentication has failed, please update your credentials', + internal_message=message, + failure_type=FailureType.config_error, + ) + else: + raise AirbyteTracedException( + message=message, + internal_message=message, + failure_type=FailureType.system_error, + ) + return super().extract_records(response) @dataclass -class ChannelMembersExtractor(DpathExtractor): +class ChannelMembersExtractor(SlackDpathExtractor): """ Transform response from list of strings to list dicts: from: ['aa', 'bb'] diff --git a/airbyte-integrations/connectors/source-slack/source_slack/manifest.yaml b/airbyte-integrations/connectors/source-slack/source_slack/manifest.yaml index d74b77205fcee..90231f2cd33c0 100644 --- a/airbyte-integrations/connectors/source-slack/source_slack/manifest.yaml +++ b/airbyte-integrations/connectors/source-slack/source_slack/manifest.yaml @@ -4,47 +4,144 @@ type: DeclarativeSource spec: type: Spec + documentationUrl: https://docs.airbyte.com/integrations/sources/slack connection_specification: $schema: http://json-schema.org/draft-07/schema# + title: Slack Spec type: object required: - - api_token - - start_date + - start_date + - lookback_window + - join_channels + additionalProperties: true properties: - api_token: - type: string - title: API Key - airbyte_secret: true - order: 0 start_date: type: string - title: Start date + pattern: "^[0-9]{4}-[0-9]{2}-[0-9]{2}T[0-9]{2}:[0-9]{2}:[0-9]{2}Z$" + description: UTC date and time in the format 2017-01-25T00:00:00Z. Any data + before this date will not be replicated. + examples: + - '2017-01-25T00:00:00Z' + title: Start Date format: date-time - pattern: ^[0-9]{4}-[0-9]{2}-[0-9]{2}T[0-9]{2}:[0-9]{2}:[0-9]{2}Z$ - order: 1 + lookback_window: + type: integer + title: Threads Lookback window (Days) + description: How far into the past to look for messages in threads, default + is 0 days + examples: + - 7 + - 14 + minimum: 0 + default: 0 + maximum: 365 join_channels: type: boolean default: true title: Join all channels - description: Whether to join all channels or to sync data only from channels the bot is already in. If false, you'll need to manually add the bot to all the channels from which you'd like to sync messages. + description: 'Whether to join all channels or to sync data only from channels + the bot is already in. If false, you''ll need to manually add the bot to + all the channels from which you''d like to sync messages. ' channel_filter: type: array default: [] items: type: string minLength: 0 - title": "Channel name filter" - description: "A channel name list (without leading '#' char) which limit the channels from which you'd like to sync. Empty list means no filter." - examples: ["channel_one", "channel_two"] - lookback_window: - type: "integer" - title: "Threads Lookback window (Days)" - description: "How far into the past to look for messages in threads, default is 0 days" - examples: [7, 14] - minimum: 0 - default: 0 - maximum: 365 - additionalProperties: true + title: Channel name filter + description: A channel name list (without leading '#' char) which limit the + channels from which you'd like to sync. Empty list means no filter. + examples: + - channel_one + - channel_two + credentials: + title: Authentication mechanism + description: Choose how to authenticate into Slack + type: object + oneOf: + - type: object + title: Sign in via Slack (OAuth) + required: + - option_title + - client_id + - client_secret + - access_token + properties: + option_title: + type: string + const: Default OAuth2.0 authorization + client_id: + type: string + title: Client ID + description: Slack client_id. See our docs + if you need help finding this id. + client_secret: + type: string + title: Client Secret + description: Slack client_secret. See our docs + if you need help finding this secret. + airbyte_secret: true + access_token: + type: string + title: Access token + description: Slack access_token. See our docs + if you need help generating the token. + airbyte_secret: true + order: 0 + - type: object + title: API Token + required: + - option_title + - api_token + properties: + option_title: + type: string + const: API Token Credentials + api_token: + type: string + title: API Token + description: A Slack bot token. See the docs + for instructions on how to generate it. + airbyte_secret: true + order: 1 + advanced_auth: + auth_flow_type: oauth2.0 + predicate_key: + - credentials + - option_title + predicate_value: Default OAuth2.0 authorization + oauth_config_specification: + complete_oauth_output_specification: + type: object + additionalProperties: false + properties: + access_token: + type: string + path_in_connector_config: + - credentials + - access_token + complete_oauth_server_input_specification: + type: object + additionalProperties: false + properties: + client_id: + type: string + client_secret: + type: string + complete_oauth_server_output_specification: + type: object + additionalProperties: false + properties: + client_id: + type: string + path_in_connector_config: + - credentials + - client_id + client_secret: + type: string + path_in_connector_config: + - credentials + - client_secret definitions: @@ -70,6 +167,13 @@ definitions: {{ not response.get("response_metadata", {}).get("next_cursor", {}) }} + api_token_auth: + type: BearerAuthenticator + api_token: "{{ config['credentials']['api_token'] }}" + access_token_auth: + type: BearerAuthenticator + api_token: "{{ config['credentials']['access_token'] }}" + requester: type: HttpRequester url_base: https://slack.com/api/ @@ -78,8 +182,9 @@ definitions: request_parameters: { } request_headers: { } authenticator: - type: BearerAuthenticator - api_token: '{{ config[''api_token''] }}' + class_name: source_slack.components.SlackAuthenticator + api_token_auth: "#/definitions/api_token_auth" + access_token_auth: "#/definitions/access_token_auth" request_body_json: { } retriever: @@ -89,7 +194,8 @@ definitions: record_selector: type: RecordSelector extractor: - type: DpathExtractor +# type: DpathExtractor + class_name: "source_slack.components.SlackDpathExtractor" field_path: - "{{ parameters['field_path'] }}" paginator: @@ -103,7 +209,8 @@ definitions: record_selector: type: RecordSelector extractor: - type: DpathExtractor +# type: DpathExtractor + class_name: "source_slack.components.SlackDpathExtractor" field_path: - "{{ parameters['field_path'] }}" record_filter: @@ -255,7 +362,8 @@ definitions: record_selector: type: RecordSelector extractor: - type: DpathExtractor +# type: DpathExtractor + class_name: "source_slack.components.SlackDpathExtractor" field_path: - "{{ parameters['field_path'] }}" paginator: @@ -278,6 +386,11 @@ definitions: field_name: channel inject_into: request_parameter transformations: + - type: AddFields + fields: + - path: + - float_ts + value: '{{ record.ts|int }}' - type: AddFields fields: - path: diff --git a/airbyte-integrations/connectors/source-slack/source_slack/spec.json b/airbyte-integrations/connectors/source-slack/source_slack/spec.json index 2ed0ba91abbdf..e59f508e41abe 100644 --- a/airbyte-integrations/connectors/source-slack/source_slack/spec.json +++ b/airbyte-integrations/connectors/source-slack/source_slack/spec.json @@ -1,5 +1,4 @@ { - "documentationUrl": "https://docs.airbyte.com/integrations/sources/slack", "connectionSpecification": { "$schema": "http://json-schema.org/draft-07/schema#", "title": "Slack Spec", @@ -107,6 +106,7 @@ "predicate_key": ["credentials", "option_title"], "predicate_value": "Default OAuth2.0 authorization", "oauth_config_specification": { + "oauth_user_input_from_connector_config_specification": null, "complete_oauth_output_specification": { "type": "object", "additionalProperties": false, diff --git a/docs/integrations/sources/slack.md b/docs/integrations/sources/slack.md index a5a99979b5239..86ba2b2ac1d7b 100644 --- a/docs/integrations/sources/slack.md +++ b/docs/integrations/sources/slack.md @@ -161,38 +161,39 @@ Slack has [rate limit restrictions](https://api.slack.com/docs/rate-limits). ## Changelog -| Version | Date | Pull Request | Subject | -|:--------|:-----------|:---------------------------------------------------------|:------------------------------------------------------------------------------------| -| 0.3.9 | 2024-02-12 | [35157](https://github.com/airbytehq/airbyte/pull/35157) | Manage dependencies with Poetry. | +| Version | Date | Pull Request | Subject | +|:--------|:-----------|:---------------------------------------------------------|:-------------------------------------------------------------------------------------| +| 0.3.10 | 2024-02-27 | [35477](https://github.com/airbytehq/airbyte/pull/35477) | Migration to low code | +| 0.3.9 | 2024-02-12 | [35157](https://github.com/airbytehq/airbyte/pull/35157) | Manage dependencies with Poetry. | | 0.3.8 | 2024-02-09 | [35131](https://github.com/airbytehq/airbyte/pull/35131) | Fixed the issue when `schema discovery` fails with `502` due to the platform timeout | -| 0.3.7 | 2024-01-10 | [1234](https://github.com/airbytehq/airbyte/pull/1234) | prepare for airbyte-lib | -| 0.3.6 | 2023-11-21 | [32707](https://github.com/airbytehq/airbyte/pull/32707) | Threads: do not use client-side record filtering | -| 0.3.5 | 2023-10-19 | [31599](https://github.com/airbytehq/airbyte/pull/31599) | Base image migration: remove Dockerfile and use the python-connector-base image | -| 0.3.4 | 2023-10-06 | [31134](https://github.com/airbytehq/airbyte/pull/31134) | Update CDK and remove non iterable return from records | -| 0.3.3 | 2023-09-28 | [30580](https://github.com/airbytehq/airbyte/pull/30580) | Add `bot_id` field to threads schema | -| 0.3.2 | 2023-09-20 | [30613](https://github.com/airbytehq/airbyte/pull/30613) | Set default value for channel_filters during discover | -| 0.3.1 | 2023-09-19 | [30570](https://github.com/airbytehq/airbyte/pull/30570) | Use default availability strategy | -| 0.3.0 | 2023-09-18 | [30521](https://github.com/airbytehq/airbyte/pull/30521) | Add unexpected fields to streams `channel_messages`, `channels`, `threads`, `users` | -| 0.2.0 | 2023-05-24 | [26497](https://github.com/airbytehq/airbyte/pull/26497) | Fixed `lookback window` value limitations | -| 0.1.26 | 2023-05-17 | [26186](https://github.com/airbytehq/airbyte/pull/26186) | Limited the `lookback window` range for input configuration | -| 0.1.25 | 2023-03-20 | [22889](https://github.com/airbytehq/airbyte/pull/22889) | Specified date formatting in specification | -| 0.1.24 | 2023-03-20 | [24126](https://github.com/airbytehq/airbyte/pull/24126) | Increase page size to 1000 | -| 0.1.23 | 2023-02-21 | [21907](https://github.com/airbytehq/airbyte/pull/21907) | Do not join channels that not gonna be synced | -| 0.1.22 | 2023-01-27 | [22022](https://github.com/airbytehq/airbyte/pull/22022) | Set `AvailabilityStrategy` for streams explicitly to `None` | -| 0.1.21 | 2023-01-12 | [21321](https://github.com/airbytehq/airbyte/pull/21321) | Retry Timeout error | -| 0.1.20 | 2022-12-21 | [20767](https://github.com/airbytehq/airbyte/pull/20767) | Update schema | -| 0.1.19 | 2022-12-01 | [19970](https://github.com/airbytehq/airbyte/pull/19970) | Remove OAuth2.0 broken `refresh_token` support | -| 0.1.18 | 2022-09-28 | [17315](https://github.com/airbytehq/airbyte/pull/17315) | Always install latest version of Airbyte CDK | -| 0.1.17 | 2022-08-28 | [16085](https://github.com/airbytehq/airbyte/pull/16085) | Increase unit test coverage | -| 0.1.16 | 2022-08-28 | [16050](https://github.com/airbytehq/airbyte/pull/16050) | Fix SATs | -| 0.1.15 | 2022-03-31 | [11613](https://github.com/airbytehq/airbyte/pull/11613) | Add 'channel_filter' config and improve performance | -| 0.1.14 | 2022-01-26 | [9575](https://github.com/airbytehq/airbyte/pull/9575) | Correct schema | -| 0.1.13 | 2021-11-08 | [7499](https://github.com/airbytehq/airbyte/pull/7499) | Remove base-python dependencies | -| 0.1.12 | 2021-10-07 | [6570](https://github.com/airbytehq/airbyte/pull/6570) | Implement OAuth support with OAuth authenticator | -| 0.1.11 | 2021-08-27 | [5830](https://github.com/airbytehq/airbyte/pull/5830) | Fix sync operations hang forever issue | -| 0.1.10 | 2021-08-27 | [5697](https://github.com/airbytehq/airbyte/pull/5697) | Fix max retries issue | -| 0.1.9 | 2021-07-20 | [4860](https://github.com/airbytehq/airbyte/pull/4860) | Fix reading threads issue | -| 0.1.8 | 2021-07-14 | [4683](https://github.com/airbytehq/airbyte/pull/4683) | Add float\_ts primary key | -| 0.1.7 | 2021-06-25 | [3978](https://github.com/airbytehq/airbyte/pull/3978) | Release Slack CDK Connector | +| 0.3.7 | 2024-01-10 | [1234](https://github.com/airbytehq/airbyte/pull/1234) | prepare for airbyte-lib | +| 0.3.6 | 2023-11-21 | [32707](https://github.com/airbytehq/airbyte/pull/32707) | Threads: do not use client-side record filtering | +| 0.3.5 | 2023-10-19 | [31599](https://github.com/airbytehq/airbyte/pull/31599) | Base image migration: remove Dockerfile and use the python-connector-base image | +| 0.3.4 | 2023-10-06 | [31134](https://github.com/airbytehq/airbyte/pull/31134) | Update CDK and remove non iterable return from records | +| 0.3.3 | 2023-09-28 | [30580](https://github.com/airbytehq/airbyte/pull/30580) | Add `bot_id` field to threads schema | +| 0.3.2 | 2023-09-20 | [30613](https://github.com/airbytehq/airbyte/pull/30613) | Set default value for channel_filters during discover | +| 0.3.1 | 2023-09-19 | [30570](https://github.com/airbytehq/airbyte/pull/30570) | Use default availability strategy | +| 0.3.0 | 2023-09-18 | [30521](https://github.com/airbytehq/airbyte/pull/30521) | Add unexpected fields to streams `channel_messages`, `channels`, `threads`, `users` | +| 0.2.0 | 2023-05-24 | [26497](https://github.com/airbytehq/airbyte/pull/26497) | Fixed `lookback window` value limitations | +| 0.1.26 | 2023-05-17 | [26186](https://github.com/airbytehq/airbyte/pull/26186) | Limited the `lookback window` range for input configuration | +| 0.1.25 | 2023-03-20 | [22889](https://github.com/airbytehq/airbyte/pull/22889) | Specified date formatting in specification | +| 0.1.24 | 2023-03-20 | [24126](https://github.com/airbytehq/airbyte/pull/24126) | Increase page size to 1000 | +| 0.1.23 | 2023-02-21 | [21907](https://github.com/airbytehq/airbyte/pull/21907) | Do not join channels that not gonna be synced | +| 0.1.22 | 2023-01-27 | [22022](https://github.com/airbytehq/airbyte/pull/22022) | Set `AvailabilityStrategy` for streams explicitly to `None` | +| 0.1.21 | 2023-01-12 | [21321](https://github.com/airbytehq/airbyte/pull/21321) | Retry Timeout error | +| 0.1.20 | 2022-12-21 | [20767](https://github.com/airbytehq/airbyte/pull/20767) | Update schema | +| 0.1.19 | 2022-12-01 | [19970](https://github.com/airbytehq/airbyte/pull/19970) | Remove OAuth2.0 broken `refresh_token` support | +| 0.1.18 | 2022-09-28 | [17315](https://github.com/airbytehq/airbyte/pull/17315) | Always install latest version of Airbyte CDK | +| 0.1.17 | 2022-08-28 | [16085](https://github.com/airbytehq/airbyte/pull/16085) | Increase unit test coverage | +| 0.1.16 | 2022-08-28 | [16050](https://github.com/airbytehq/airbyte/pull/16050) | Fix SATs | +| 0.1.15 | 2022-03-31 | [11613](https://github.com/airbytehq/airbyte/pull/11613) | Add 'channel_filter' config and improve performance | +| 0.1.14 | 2022-01-26 | [9575](https://github.com/airbytehq/airbyte/pull/9575) | Correct schema | +| 0.1.13 | 2021-11-08 | [7499](https://github.com/airbytehq/airbyte/pull/7499) | Remove base-python dependencies | +| 0.1.12 | 2021-10-07 | [6570](https://github.com/airbytehq/airbyte/pull/6570) | Implement OAuth support with OAuth authenticator | +| 0.1.11 | 2021-08-27 | [5830](https://github.com/airbytehq/airbyte/pull/5830) | Fix sync operations hang forever issue | +| 0.1.10 | 2021-08-27 | [5697](https://github.com/airbytehq/airbyte/pull/5697) | Fix max retries issue | +| 0.1.9 | 2021-07-20 | [4860](https://github.com/airbytehq/airbyte/pull/4860) | Fix reading threads issue | +| 0.1.8 | 2021-07-14 | [4683](https://github.com/airbytehq/airbyte/pull/4683) | Add float\_ts primary key | +| 0.1.7 | 2021-06-25 | [3978](https://github.com/airbytehq/airbyte/pull/3978) | Release Slack CDK Connector | From c45a766eae870b6310e0bc247b1ec70bd1e077f3 Mon Sep 17 00:00:00 2001 From: Vadym Ratniuk Date: Tue, 27 Feb 2024 02:23:06 +0200 Subject: [PATCH 11/56] cleanup --- .../connectors/source-slack/source_slack/manifest.yaml | 3 --- 1 file changed, 3 deletions(-) diff --git a/airbyte-integrations/connectors/source-slack/source_slack/manifest.yaml b/airbyte-integrations/connectors/source-slack/source_slack/manifest.yaml index 90231f2cd33c0..b91313fb85f10 100644 --- a/airbyte-integrations/connectors/source-slack/source_slack/manifest.yaml +++ b/airbyte-integrations/connectors/source-slack/source_slack/manifest.yaml @@ -194,7 +194,6 @@ definitions: record_selector: type: RecordSelector extractor: -# type: DpathExtractor class_name: "source_slack.components.SlackDpathExtractor" field_path: - "{{ parameters['field_path'] }}" @@ -209,7 +208,6 @@ definitions: record_selector: type: RecordSelector extractor: -# type: DpathExtractor class_name: "source_slack.components.SlackDpathExtractor" field_path: - "{{ parameters['field_path'] }}" @@ -362,7 +360,6 @@ definitions: record_selector: type: RecordSelector extractor: -# type: DpathExtractor class_name: "source_slack.components.SlackDpathExtractor" field_path: - "{{ parameters['field_path'] }}" From 7348e7fafb63a762dd26ec419daf5bdf73011ef6 Mon Sep 17 00:00:00 2001 From: darynaishchenko Date: Wed, 13 Mar 2024 12:07:51 +0200 Subject: [PATCH 12/56] updated auth in manifest, removed custom component --- .../source-slack/source_slack/components.py | 11 ----------- .../source-slack/source_slack/manifest.yaml | 8 +++++--- 2 files changed, 5 insertions(+), 14 deletions(-) diff --git a/airbyte-integrations/connectors/source-slack/source_slack/components.py b/airbyte-integrations/connectors/source-slack/source_slack/components.py index db69ead6eb028..9715daf93f810 100644 --- a/airbyte-integrations/connectors/source-slack/source_slack/components.py +++ b/airbyte-integrations/connectors/source-slack/source_slack/components.py @@ -3,8 +3,6 @@ from typing import List, Mapping, Any, Iterable import requests -from airbyte_cdk.sources.declarative.auth.declarative_authenticator import DeclarativeAuthenticator -from airbyte_cdk.sources.declarative.auth.token import BearerAuthenticator from airbyte_cdk.models import AirbyteMessage, SyncMode, Type, FailureType from airbyte_cdk.sources.declarative.types import Config, Record, StreamSlice, StreamState from airbyte_cdk.sources.declarative.extractors import DpathExtractor @@ -12,15 +10,6 @@ from airbyte_cdk.sources.declarative.partition_routers import SubstreamPartitionRouter from airbyte_cdk.utils.traced_exception import AirbyteTracedException -@dataclass -class SlackAuthenticator(DeclarativeAuthenticator): - config: Mapping[str, Any] - api_token_auth: BearerAuthenticator - access_token_auth: BearerAuthenticator - - def __new__(cls, api_token_auth, access_token_auth, config, *args, **kwargs): - return api_token_auth if config["credentials"]["option_title"] == "API Token Credentials" else access_token_auth - @dataclass class SlackDpathExtractor(DpathExtractor): diff --git a/airbyte-integrations/connectors/source-slack/source_slack/manifest.yaml b/airbyte-integrations/connectors/source-slack/source_slack/manifest.yaml index b91313fb85f10..db58c86309ff3 100644 --- a/airbyte-integrations/connectors/source-slack/source_slack/manifest.yaml +++ b/airbyte-integrations/connectors/source-slack/source_slack/manifest.yaml @@ -182,9 +182,11 @@ definitions: request_parameters: { } request_headers: { } authenticator: - class_name: source_slack.components.SlackAuthenticator - api_token_auth: "#/definitions/api_token_auth" - access_token_auth: "#/definitions/access_token_auth" + type: SelectiveAuthenticator + authenticator_selection_path: [ "credentials", "option_title" ] + authenticators: + Default OAuth2.0 authorization: "#/definitions/access_token_auth" + API Token Credentials: "#/definitions/api_token_auth" request_body_json: { } retriever: From 563daa1a47cf96cc5d82c805e2c5a38f31e4f7cf Mon Sep 17 00:00:00 2001 From: darynaishchenko Date: Tue, 19 Mar 2024 17:20:02 +0200 Subject: [PATCH 13/56] added selective auth, updated streams impelemntation --- .../source-slack/source_slack/manifest.yaml | 316 ++++++------------ 1 file changed, 106 insertions(+), 210 deletions(-) diff --git a/airbyte-integrations/connectors/source-slack/source_slack/manifest.yaml b/airbyte-integrations/connectors/source-slack/source_slack/manifest.yaml index db58c86309ff3..b9606ac25097d 100644 --- a/airbyte-integrations/connectors/source-slack/source_slack/manifest.yaml +++ b/airbyte-integrations/connectors/source-slack/source_slack/manifest.yaml @@ -1,150 +1,7 @@ -#version: 0.61.2 version: 0.58.7 type: DeclarativeSource -spec: - type: Spec - documentationUrl: https://docs.airbyte.com/integrations/sources/slack - connection_specification: - $schema: http://json-schema.org/draft-07/schema# - title: Slack Spec - type: object - required: - - start_date - - lookback_window - - join_channels - additionalProperties: true - properties: - start_date: - type: string - pattern: "^[0-9]{4}-[0-9]{2}-[0-9]{2}T[0-9]{2}:[0-9]{2}:[0-9]{2}Z$" - description: UTC date and time in the format 2017-01-25T00:00:00Z. Any data - before this date will not be replicated. - examples: - - '2017-01-25T00:00:00Z' - title: Start Date - format: date-time - lookback_window: - type: integer - title: Threads Lookback window (Days) - description: How far into the past to look for messages in threads, default - is 0 days - examples: - - 7 - - 14 - minimum: 0 - default: 0 - maximum: 365 - join_channels: - type: boolean - default: true - title: Join all channels - description: 'Whether to join all channels or to sync data only from channels - the bot is already in. If false, you''ll need to manually add the bot to - all the channels from which you''d like to sync messages. ' - channel_filter: - type: array - default: [] - items: - type: string - minLength: 0 - title: Channel name filter - description: A channel name list (without leading '#' char) which limit the - channels from which you'd like to sync. Empty list means no filter. - examples: - - channel_one - - channel_two - credentials: - title: Authentication mechanism - description: Choose how to authenticate into Slack - type: object - oneOf: - - type: object - title: Sign in via Slack (OAuth) - required: - - option_title - - client_id - - client_secret - - access_token - properties: - option_title: - type: string - const: Default OAuth2.0 authorization - client_id: - type: string - title: Client ID - description: Slack client_id. See our docs - if you need help finding this id. - client_secret: - type: string - title: Client Secret - description: Slack client_secret. See our docs - if you need help finding this secret. - airbyte_secret: true - access_token: - type: string - title: Access token - description: Slack access_token. See our docs - if you need help generating the token. - airbyte_secret: true - order: 0 - - type: object - title: API Token - required: - - option_title - - api_token - properties: - option_title: - type: string - const: API Token Credentials - api_token: - type: string - title: API Token - description: A Slack bot token. See the docs - for instructions on how to generate it. - airbyte_secret: true - order: 1 - advanced_auth: - auth_flow_type: oauth2.0 - predicate_key: - - credentials - - option_title - predicate_value: Default OAuth2.0 authorization - oauth_config_specification: - complete_oauth_output_specification: - type: object - additionalProperties: false - properties: - access_token: - type: string - path_in_connector_config: - - credentials - - access_token - complete_oauth_server_input_specification: - type: object - additionalProperties: false - properties: - client_id: - type: string - client_secret: - type: string - complete_oauth_server_output_specification: - type: object - additionalProperties: false - properties: - client_id: - type: string - path_in_connector_config: - - credentials - - client_id - client_secret: - type: string - path_in_connector_config: - - credentials - - client_secret - definitions: - schema_loader: type: JsonFileSchemaLoader file_path: "./source_slack/schemas/{{ parameters['name'] }}.json" @@ -179,26 +36,38 @@ definitions: url_base: https://slack.com/api/ path: "{{ parameters['path'] }}" http_method: GET - request_parameters: { } - request_headers: { } + request_parameters: {} + request_headers: {} authenticator: type: SelectiveAuthenticator - authenticator_selection_path: [ "credentials", "option_title" ] + authenticator_selection_path: ["credentials", "option_title"] authenticators: Default OAuth2.0 authorization: "#/definitions/access_token_auth" API Token Credentials: "#/definitions/api_token_auth" - request_body_json: { } + request_body_json: {} + error_handler: + type: DefaultErrorHandler + response_filters: + - error_message_contains: "invalid_auth" + action: FAIL + error_message: Authentication has failed, please update your credentials. + - http_codes: [429] + action: RETRY + error_message: Failed to perform a request due to rate limits. Retrying. + + selector: + type: RecordSelector + extractor: + type: DpathExtractor + field_path: + - "{{ parameters['field_path'] }}" retriever: type: SimpleRetriever requester: $ref: "#/definitions/requester" record_selector: - type: RecordSelector - extractor: - class_name: "source_slack.components.SlackDpathExtractor" - field_path: - - "{{ parameters['field_path'] }}" + $ref: "#/definitions/selector" paginator: $ref: "#/definitions/default_paginator" partition_router: [] @@ -208,11 +77,7 @@ definitions: requester: $ref: "#/definitions/requester" record_selector: - type: RecordSelector - extractor: - class_name: "source_slack.components.SlackDpathExtractor" - field_path: - - "{{ parameters['field_path'] }}" + $ref: "#/definitions/selector" record_filter: type: RecordFilter condition: "{{ record.id in config.channel_filter or not config.channel_filter }}" @@ -228,7 +93,30 @@ definitions: $ref: "#/definitions/schema_loader" users_stream: - $ref: "#/definitions/stream_base" + primary_key: "id" + retriever: + type: SimpleRetriever + requester: + $ref: "#/definitions/requester" + error_handler: + type: DefaultErrorHandler + response_filters: + - error_message_contains: "invalid_auth" + action: FAIL + error_message: Authentication has failed, please update your credentials. + - http_codes: [429] + action: RETRY + error_message: Failed to perform a request due to rate limits. Retrying. + - http_codes: [403, 400] + action: FAIL + error_message: Got an exception while trying to set up the connection. Most probably, there are no users in the given Slack instance or your token is incorrect. + record_selector: + $ref: "#/definitions/selector" + paginator: + $ref: "#/definitions/default_paginator" + partition_router: [] + schema_loader: + $ref: "#/definitions/schema_loader" $parameters: name: users path: users.list @@ -244,7 +132,7 @@ definitions: $ref: "#/definitions/retriever_filter" transformations: - type: CustomTransformation - class_name: "source_slack.components.JoinChannels" + class_name: "source_slack.components.join_channels.JoinChannels" channels_partition_router: type: SubstreamPartitionRouter @@ -274,14 +162,14 @@ definitions: record_selector: type: RecordSelector extractor: - class_name: "source_slack.components.ChannelMembersExtractor" - field_path: ['members'] + class_name: "source_slack.components.channel_members_extractor.ChannelMembersExtractor" + field_path: ["members"] transformations: - type: AddFields fields: - path: - channel_id - value: '{{ stream_partition[''channel_id''] }}' + value: "{{ stream_partition['channel_id'] }}" channel_messages_stream: $ref: "#/definitions/stream_base" @@ -289,7 +177,6 @@ definitions: name: channel_messages path: conversations.history field_path: messages - use_lookback_window: false primary_key: - channel_id - ts @@ -298,53 +185,60 @@ definitions: requester: $ref: "#/definitions/requester" request_parameters: - inclusive: 'True' + inclusive: "True" + record_selector: + $ref: "#/definitions/selector" + paginator: + $ref: "#/definitions/default_paginator" partition_router: - $ref: "#/definitions/channels_partition_router" + type: SubstreamPartitionRouter + parent_stream_configs: + - type: ParentStreamConfig + stream: + $ref: "#/definitions/channels_stream" + $parameters: + name: channels + path: conversations.list + field_path: channels + use_lookback_window: true + parent_key: id + partition_field: channel + request_option: + field_name: "channel" + inject_into: "request_parameter" incremental_sync: type: DatetimeBasedCursor cursor_field: float_ts - lookback_window: "P{{ config['lookback_window'] if parameters.use_lookback_window else 0 }}D" cursor_datetime_formats: - - '%s' - datetime_format: '%s' + - "%s" + step: P100D + cursor_granularity: P10D + datetime_format: "%s" start_datetime: type: MinMaxDatetime - datetime: '{{ config[''start_date''] }}' - datetime_format: '%Y-%m-%dT%H:%M:%SZ' + datetime: "{{ config['start_date'] }}" + datetime_format: "%Y-%m-%dT%H:%M:%SZ" start_time_option: - type: RequestOption - field_name: oldest inject_into: request_parameter - end_time_option: + field_name: oldest type: RequestOption - field_name: latest + end_time_option: inject_into: request_parameter + field_name: latest + type: RequestOption end_datetime: type: MinMaxDatetime - datetime: '{{ now_utc().strftime(''%Y-%m-%dT%H:%M:%SZ'') }}' - datetime_format: '%Y-%m-%dT%H:%M:%SZ' - step: P100D - cursor_granularity: PT0S + datetime: "{{ now_utc().strftime('%Y-%m-%dT%H:%M:%SZ') }}" + datetime_format: "%Y-%m-%dT%H:%M:%SZ" transformations: - type: AddFields fields: - path: - float_ts - value: '{{ record.ts|int }}' - - type: AddFields - fields: + value: "{{ record.ts|int }}" - path: - channel_id - value: '{{ stream_partition[''channel_id''] }}' - - channel_messages_stream_lookback: - $ref: "#/definitions/stream_base" - $parameters: - name: channel_messages - path: conversations.history - field_path: messages - lookback_window: 0 + value: "{{ stream_partition['channel_id'] }}" threads_stream: $ref: "#/definitions/stream_base" @@ -359,42 +253,44 @@ definitions: type: SimpleRetriever requester: $ref: "#/definitions/requester" + request_parameters: + channel: "{{ stream_slice['channel'] }}" record_selector: - type: RecordSelector - extractor: - class_name: "source_slack.components.SlackDpathExtractor" - field_path: - - "{{ parameters['field_path'] }}" + $ref: "#/definitions/selector" paginator: $ref: "#/definitions/default_paginator" partition_router: - class_name: "source_slack.components.ThreadsPartitionRouter" + class_name: "source_slack.components.threads_partition_router.ThreadsPartitionRouter" parent_stream_configs: - type: ParentStreamConfig stream: $ref: "#/definitions/channel_messages_stream" - $parameters: - name: channel_messages - path: conversations.history - field_path: messages - use_lookback_window: true - parent_key: channel_id - partition_field: channel + parent_key: ts + partition_field: ts request_option: type: RequestOption - field_name: channel - inject_into: request_parameter + field_name: "ts" + inject_into: "request_parameter" + incremental_sync: + type: DatetimeBasedCursor + lookback_window: "P{{ config.get('lookback_window', 0) }}D" + cursor_field: "float_ts" + cursor_datetime_formats: + - "%s" + datetime_format: "%Y-%m-%dT%H:%M:%SZ" + start_datetime: + type: MinMaxDatetime + datetime: "{{ config['start_date'] }}" + datetime_format: "%Y-%m-%dT%H:%M:%SZ" transformations: - type: AddFields fields: - path: - float_ts - value: '{{ record.ts|int }}' - - type: AddFields - fields: + value: "{{ record['ts'] }}" - path: - channel_id - value: '{{ stream_partition[''channel_id''] }}' + value: "{{ stream_slice['channel'] }}" streams: - "#/definitions/users_stream" @@ -406,4 +302,4 @@ streams: check: type: CheckStream stream_names: - - users \ No newline at end of file + - users From 7192ba26e321c0adad16bbcb8b18c3b005d8b0cb Mon Sep 17 00:00:00 2001 From: darynaishchenko Date: Tue, 19 Mar 2024 17:20:22 +0200 Subject: [PATCH 14/56] updated expected records --- .../integration_tests/expected_records.jsonl | 19 ++++++++++--------- 1 file changed, 10 insertions(+), 9 deletions(-) diff --git a/airbyte-integrations/connectors/source-slack/integration_tests/expected_records.jsonl b/airbyte-integrations/connectors/source-slack/integration_tests/expected_records.jsonl index 8f08a81c9d361..d966942ca743b 100644 --- a/airbyte-integrations/connectors/source-slack/integration_tests/expected_records.jsonl +++ b/airbyte-integrations/connectors/source-slack/integration_tests/expected_records.jsonl @@ -4,12 +4,13 @@ {"stream": "channel_members", "data": {"member_id": "U04L65GPMKN", "channel_id": "C04KX3KEZ54"}, "emitted_at": 1707568736171} {"stream": "channel_members", "data": {"member_id": "U04LY6NARHU", "channel_id": "C04KX3KEZ54"}, "emitted_at": 1707568736172} {"stream": "channel_members", "data": {"member_id": "U04M23SBJGM", "channel_id": "C04KX3KEZ54"}, "emitted_at": 1707568736172} -{"stream": "channel_messages", "data": {"client_msg_id": "3ae60d35-58b8-441c-923a-75de35a4ed8a", "type": "message", "text": "Test Thread 2", "user": "U04L65GPMKN", "ts": "1683104542.931169", "blocks": [{"type": "rich_text", "block_id": "WLB", "elements": [{"type": "rich_text_section", "elements": [{"type": "text", "text": "Test Thread 2"}]}]}], "team": "T04KX3KDDU6", "thread_ts": "1683104542.931169", "reply_count": 2, "reply_users_count": 1, "latest_reply": "1683104568.059569", "reply_users": ["U04L65GPMKN"], "is_locked": false, "subscribed": true, "last_read": "1683104568.059569", "channel_id": "C04KX3KEZ54", "float_ts": 1683104542}, "emitted_at": 1707568738170} -{"stream": "channel_messages", "data": {"client_msg_id": "e27672c0-451e-42a6-8eff-a14d2db8ac1e", "type": "message", "text": "Test Thread 1", "user": "U04L65GPMKN", "ts": "1683104499.808709", "blocks": [{"type": "rich_text", "block_id": "0j7", "elements": [{"type": "rich_text_section", "elements": [{"type": "text", "text": "Test Thread 1"}]}]}], "team": "T04KX3KDDU6", "thread_ts": "1683104499.808709", "reply_count": 2, "reply_users_count": 1, "latest_reply": "1683104528.084359", "reply_users": ["U04L65GPMKN"], "is_locked": false, "subscribed": true, "last_read": "1683104528.084359", "channel_id": "C04LTCM2Y56", "float_ts": 1683104499}, "emitted_at": 1707569060525} -{"stream": "channel_messages", "data": {"type": "message", "subtype": "reminder_add", "text": " set up a reminder \u201ctest reminder\u201d in this channel at 9AM tomorrow, Eastern European Summer Time.", "user": "U04L65GPMKN", "ts": "1695814864.744249", "channel_id": "C04LTCM2Y56", "float_ts": 1695814864}, "emitted_at": 1707569208689} -{"stream": "threads", "data": {"client_msg_id": "3ae60d35-58b8-441c-923a-75de35a4ed8a", "type": "message", "text": "Test Thread 2", "user": "U04L65GPMKN", "ts": "1683104542.931169", "blocks": [{"type": "rich_text", "block_id": "WLB", "elements": [{"type": "rich_text_section", "elements": [{"type": "text", "text": "Test Thread 2"}]}]}], "team": "T04KX3KDDU6", "thread_ts": "1683104542.931169", "reply_count": 2, "reply_users_count": 1, "latest_reply": "1683104568.059569", "reply_users": ["U04L65GPMKN"], "is_locked": false, "subscribed": true, "last_read": "1683104568.059569", "channel_id": "C04KX3KEZ54", "float_ts": 1683104542}, "emitted_at": 1707569354932} -{"stream": "threads", "data": {"client_msg_id": "3e96d351-270c-493f-a1a0-fdc3c4c0e11f", "type": "message", "text": "<@U04M23SBJGM> test test test", "user": "U04L65GPMKN", "ts": "1683104559.922849", "blocks": [{"type": "rich_text", "block_id": "tX6vr", "elements": [{"type": "rich_text_section", "elements": [{"type": "user", "user_id": "U04M23SBJGM"}, {"type": "text", "text": " test test test"}]}]}], "team": "T04KX3KDDU6", "thread_ts": "1683104542.931169", "parent_user_id": "U04L65GPMKN", "channel_id": "C04KX3KEZ54", "float_ts": 1683104559}, "emitted_at": 1707569354933} -{"stream": "threads", "data": {"client_msg_id": "08023e44-9d18-41ed-81dd-5f04ed699656", "type": "message", "text": "<@U04LY6NARHU> test test", "user": "U04L65GPMKN", "ts": "1683104568.059569", "blocks": [{"type": "rich_text", "block_id": "IyUF", "elements": [{"type": "rich_text_section", "elements": [{"type": "user", "user_id": "U04LY6NARHU"}, {"type": "text", "text": " test test"}]}]}], "team": "T04KX3KDDU6", "thread_ts": "1683104542.931169", "parent_user_id": "U04L65GPMKN", "channel_id": "C04KX3KEZ54", "float_ts": 1683104568}, "emitted_at": 1707569354933} -{"stream": "users", "data": {"id": "USLACKBOT", "team_id": "T04KX3KDDU6", "name": "slackbot", "deleted": false, "color": "757575", "real_name": "Slackbot", "tz": "America/Los_Angeles", "tz_label": "Pacific Standard Time", "tz_offset": -28800, "profile": {"title": "", "phone": "", "skype": "", "real_name": "Slackbot", "real_name_normalized": "Slackbot", "display_name": "Slackbot", "display_name_normalized": "Slackbot", "fields": {}, "status_text": "", "status_emoji": "", "status_emoji_display_info": [], "status_expiration": 0, "avatar_hash": "sv41d8cd98f0", "always_active": true, "first_name": "slackbot", "last_name": "", "image_24": "https://a.slack-edge.com/80588/img/slackbot_24.png", "image_32": "https://a.slack-edge.com/80588/img/slackbot_32.png", "image_48": "https://a.slack-edge.com/80588/img/slackbot_48.png", "image_72": "https://a.slack-edge.com/80588/img/slackbot_72.png", "image_192": "https://a.slack-edge.com/80588/marketing/img/avatars/slackbot/avatar-slackbot.png", "image_512": "https://a.slack-edge.com/80588/img/slackbot_512.png", "status_text_canonical": "", "team": "T04KX3KDDU6"}, "is_admin": false, "is_owner": false, "is_primary_owner": false, "is_restricted": false, "is_ultra_restricted": false, "is_bot": false, "is_app_user": false, "updated": 0, "is_email_confirmed": false, "who_can_share_contact_card": "EVERYONE"}, "emitted_at": 1707569357949} -{"stream": "users", "data": {"id": "U04KUMXNYMV", "team_id": "T04KX3KDDU6", "name": "deactivateduser693438", "deleted": true, "profile": {"title": "", "phone": "", "skype": "", "real_name": "Deactivated User", "real_name_normalized": "Deactivated User", "display_name": "deactivateduser", "display_name_normalized": "deactivateduser", "fields": null, "status_text": "", "status_emoji": "", "status_emoji_display_info": [], "status_expiration": 0, "avatar_hash": "g849cc56ed76", "huddle_state": "default_unset", "first_name": "Deactivated", "last_name": "User", "image_24": "https://secure.gravatar.com/avatar/d5320ceddda202563fd9e6222c07c00a.jpg?s=24&d=https%3A%2F%2Fa.slack-edge.com%2Fdf10d%2Fimg%2Favatars%2Fava_0011-24.png", "image_32": "https://secure.gravatar.com/avatar/d5320ceddda202563fd9e6222c07c00a.jpg?s=32&d=https%3A%2F%2Fa.slack-edge.com%2Fdf10d%2Fimg%2Favatars%2Fava_0011-32.png", "image_48": "https://secure.gravatar.com/avatar/d5320ceddda202563fd9e6222c07c00a.jpg?s=48&d=https%3A%2F%2Fa.slack-edge.com%2Fdf10d%2Fimg%2Favatars%2Fava_0011-48.png", "image_72": "https://secure.gravatar.com/avatar/d5320ceddda202563fd9e6222c07c00a.jpg?s=72&d=https%3A%2F%2Fa.slack-edge.com%2Fdf10d%2Fimg%2Favatars%2Fava_0011-72.png", "image_192": "https://secure.gravatar.com/avatar/d5320ceddda202563fd9e6222c07c00a.jpg?s=192&d=https%3A%2F%2Fa.slack-edge.com%2Fdf10d%2Fimg%2Favatars%2Fava_0011-192.png", "image_512": "https://secure.gravatar.com/avatar/d5320ceddda202563fd9e6222c07c00a.jpg?s=512&d=https%3A%2F%2Fa.slack-edge.com%2Fdf10d%2Fimg%2Favatars%2Fava_0011-512.png", "status_text_canonical": "", "team": "T04KX3KDDU6"}, "is_bot": false, "is_app_user": false, "updated": 1675090804, "is_forgotten": true, "is_invited_user": true}, "emitted_at": 1707569357951} -{"stream": "users", "data": {"id": "U04L2KY5CES", "team_id": "T04KX3KDDU6", "name": "deactivateduser686066", "deleted": true, "profile": {"title": "", "phone": "", "skype": "", "real_name": "Deactivated User", "real_name_normalized": "Deactivated User", "display_name": "deactivateduser", "display_name_normalized": "deactivateduser", "fields": null, "status_text": "", "status_emoji": "", "status_emoji_display_info": [], "status_expiration": 0, "avatar_hash": "g849cc56ed76", "huddle_state": "default_unset", "first_name": "Deactivated", "last_name": "User", "image_24": "https://secure.gravatar.com/avatar/cacb225265b3b19c4e72029a62cf1ef1.jpg?s=24&d=https%3A%2F%2Fa.slack-edge.com%2Fdf10d%2Fimg%2Favatars%2Fava_0009-24.png", "image_32": "https://secure.gravatar.com/avatar/cacb225265b3b19c4e72029a62cf1ef1.jpg?s=32&d=https%3A%2F%2Fa.slack-edge.com%2Fdf10d%2Fimg%2Favatars%2Fava_0009-32.png", "image_48": "https://secure.gravatar.com/avatar/cacb225265b3b19c4e72029a62cf1ef1.jpg?s=48&d=https%3A%2F%2Fa.slack-edge.com%2Fdf10d%2Fimg%2Favatars%2Fava_0009-48.png", "image_72": "https://secure.gravatar.com/avatar/cacb225265b3b19c4e72029a62cf1ef1.jpg?s=72&d=https%3A%2F%2Fa.slack-edge.com%2Fdf10d%2Fimg%2Favatars%2Fava_0009-72.png", "image_192": "https://secure.gravatar.com/avatar/cacb225265b3b19c4e72029a62cf1ef1.jpg?s=192&d=https%3A%2F%2Fa.slack-edge.com%2Fdf10d%2Fimg%2Favatars%2Fava_0009-192.png", "image_512": "https://secure.gravatar.com/avatar/cacb225265b3b19c4e72029a62cf1ef1.jpg?s=512&d=https%3A%2F%2Fa.slack-edge.com%2Fdf10d%2Fimg%2Favatars%2Fava_0009-512.png", "status_text_canonical": "", "team": "T04KX3KDDU6"}, "is_bot": false, "is_app_user": false, "updated": 1675090785, "is_forgotten": true, "is_invited_user": true}, "emitted_at": 1707569357951} +{"stream": "channel_messages", "data": {"user": "U04L65GPMKN", "type": "message", "ts": "1683104542.931169", "client_msg_id": "3ae60d35-58b8-441c-923a-75de35a4ed8a", "text": "Test Thread 2", "team": "T04KX3KDDU6", "thread_ts": "1683104542.931169", "reply_count": 2, "reply_users_count": 1, "latest_reply": "1683104568.059569", "reply_users": ["U04L65GPMKN"], "is_locked": false, "subscribed": true, "last_read": "1683104568.059569", "blocks": [{"type": "rich_text", "block_id": "WLB", "elements": [{"type": "rich_text_section", "elements": [{"type": "text", "text": "Test Thread 2"}]}]}], "float_ts": 1683104542, "channel_id": ""}, "emitted_at": 1710778019252} +{"stream": "channel_messages", "data": {"user": "U04L65GPMKN", "type": "message", "ts": "1683104499.808709", "client_msg_id": "e27672c0-451e-42a6-8eff-a14d2db8ac1e", "text": "Test Thread 1", "team": "T04KX3KDDU6", "thread_ts": "1683104499.808709", "reply_count": 2, "reply_users_count": 1, "latest_reply": "1683104528.084359", "reply_users": ["U04L65GPMKN"], "is_locked": false, "subscribed": true, "last_read": "1683104528.084359", "blocks": [{"type": "rich_text", "block_id": "0j7", "elements": [{"type": "rich_text_section", "elements": [{"type": "text", "text": "Test Thread 1"}]}]}], "float_ts": 1683104499, "channel_id": ""}, "emitted_at": 1710778021028} +{"stream": "channel_messages", "data": {"user": "USLACKBOT", "type": "message", "ts": "1695880827.186049", "bot_id": "B01", "text": "Reminder: test reminder.", "team": "T04KX3KDDU6", "blocks": [{"type": "rich_text", "block_id": "BGzX", "elements": [{"type": "rich_text_section", "elements": [{"type": "text", "text": "Reminder: test reminder."}]}]}], "float_ts": 1695880827, "channel_id": ""}, "emitted_at": 1710778021270} +{"stream": "channel_messages", "data": {"subtype": "reminder_add", "user": "U04L65GPMKN", "type": "message", "ts": "1695814864.744249", "text": " set up a reminder \u201ctest reminder\u201d in this channel at 9AM tomorrow, Eastern European Summer Time.", "float_ts": 1695814864, "channel_id": ""}, "emitted_at": 1710778021275} +{"stream": "threads", "data": {"user": "U04L65GPMKN", "type": "message", "ts": "1683104528.084359", "client_msg_id": "ffccbb24-8dd6-476d-87bf-65e5fa033cb9", "text": "<@U04M23SBJGM> test test test", "team": "T04KX3KDDU6", "thread_ts": "1683104499.808709", "parent_user_id": "U04L65GPMKN", "blocks": [{"type": "rich_text", "block_id": "Lvl", "elements": [{"type": "rich_text_section", "elements": [{"type": "user", "user_id": "U04M23SBJGM"}, {"type": "text", "text": " test test test"}]}]}], "float_ts": 1683104528.084359, "channel_id": "C04LTCM2Y56"}, "emitted_at": 1710778305077} +{"stream": "threads", "data": {"user": "USLACKBOT", "type": "message", "ts": "1695880827.186049", "bot_id": "B01", "text": "Reminder: test reminder.", "team": "T04KX3KDDU6", "blocks": [{"type": "rich_text", "block_id": "BGzX", "elements": [{"type": "rich_text_section", "elements": [{"type": "text", "text": "Reminder: test reminder."}]}]}], "float_ts": 1695880827.186049, "channel_id": "C04LTCM2Y56"}, "emitted_at": 1710778305751} +{"stream": "threads", "data": {"subtype": "reminder_add", "user": "U04L65GPMKN", "type": "message", "ts": "1695814864.744249", "text": " set up a reminder \u201ctest reminder\u201d in this channel at 9AM tomorrow, Eastern European Summer Time.", "float_ts": 1695814864.744249, "channel_id": "C04LTCM2Y56"}, "emitted_at": 1710778305981} +{"stream": "users", "data": {"id": "USLACKBOT", "team_id": "T04KX3KDDU6", "name": "slackbot", "deleted": false, "color": "757575", "real_name": "Slackbot", "tz": "America/Los_Angeles", "tz_label": "Pacific Daylight Time", "tz_offset": -25200, "profile": {"title": "", "phone": "", "skype": "", "real_name": "Slackbot", "real_name_normalized": "Slackbot", "display_name": "Slackbot", "display_name_normalized": "Slackbot", "fields": {}, "status_text": "", "status_emoji": "", "status_emoji_display_info": [], "status_expiration": 0, "avatar_hash": "sv41d8cd98f0", "always_active": true, "first_name": "slackbot", "last_name": "", "image_24": "https://a.slack-edge.com/80588/img/slackbot_24.png", "image_32": "https://a.slack-edge.com/80588/img/slackbot_32.png", "image_48": "https://a.slack-edge.com/80588/img/slackbot_48.png", "image_72": "https://a.slack-edge.com/80588/img/slackbot_72.png", "image_192": "https://a.slack-edge.com/80588/marketing/img/avatars/slackbot/avatar-slackbot.png", "image_512": "https://a.slack-edge.com/80588/img/slackbot_512.png", "status_text_canonical": "", "team": "T04KX3KDDU6"}, "is_admin": false, "is_owner": false, "is_primary_owner": false, "is_restricted": false, "is_ultra_restricted": false, "is_bot": false, "is_app_user": false, "updated": 0, "is_email_confirmed": false, "who_can_share_contact_card": "EVERYONE"}, "emitted_at": 1710501138877} +{"stream": "users", "data": {"id": "U04KUMXNYMV", "team_id": "T04KX3KDDU6", "name": "deactivateduser693438", "deleted": true, "profile": {"title": "", "phone": "", "skype": "", "real_name": "Deactivated User", "real_name_normalized": "Deactivated User", "display_name": "deactivateduser", "display_name_normalized": "deactivateduser", "fields": null, "status_text": "", "status_emoji": "", "status_emoji_display_info": [], "status_expiration": 0, "avatar_hash": "g849cc56ed76", "huddle_state": "default_unset", "first_name": "Deactivated", "last_name": "User", "image_24": "https://secure.gravatar.com/avatar/d5320ceddda202563fd9e6222c07c00a.jpg?s=24&d=https%3A%2F%2Fa.slack-edge.com%2Fdf10d%2Fimg%2Favatars%2Fava_0011-24.png", "image_32": "https://secure.gravatar.com/avatar/d5320ceddda202563fd9e6222c07c00a.jpg?s=32&d=https%3A%2F%2Fa.slack-edge.com%2Fdf10d%2Fimg%2Favatars%2Fava_0011-32.png", "image_48": "https://secure.gravatar.com/avatar/d5320ceddda202563fd9e6222c07c00a.jpg?s=48&d=https%3A%2F%2Fa.slack-edge.com%2Fdf10d%2Fimg%2Favatars%2Fava_0011-48.png", "image_72": "https://secure.gravatar.com/avatar/d5320ceddda202563fd9e6222c07c00a.jpg?s=72&d=https%3A%2F%2Fa.slack-edge.com%2Fdf10d%2Fimg%2Favatars%2Fava_0011-72.png", "image_192": "https://secure.gravatar.com/avatar/d5320ceddda202563fd9e6222c07c00a.jpg?s=192&d=https%3A%2F%2Fa.slack-edge.com%2Fdf10d%2Fimg%2Favatars%2Fava_0011-192.png", "image_512": "https://secure.gravatar.com/avatar/d5320ceddda202563fd9e6222c07c00a.jpg?s=512&d=https%3A%2F%2Fa.slack-edge.com%2Fdf10d%2Fimg%2Favatars%2Fava_0011-512.png", "status_text_canonical": "", "team": "T04KX3KDDU6"}, "is_bot": false, "is_app_user": false, "updated": 1675090804, "is_forgotten": true, "is_invited_user": true}, "emitted_at": 1710501138879} +{"stream": "users", "data": {"id": "U04L2KY5CES", "team_id": "T04KX3KDDU6", "name": "deactivateduser686066", "deleted": true, "profile": {"title": "", "phone": "", "skype": "", "real_name": "Deactivated User", "real_name_normalized": "Deactivated User", "display_name": "deactivateduser", "display_name_normalized": "deactivateduser", "fields": null, "status_text": "", "status_emoji": "", "status_emoji_display_info": [], "status_expiration": 0, "avatar_hash": "g849cc56ed76", "huddle_state": "default_unset", "first_name": "Deactivated", "last_name": "User", "image_24": "https://secure.gravatar.com/avatar/cacb225265b3b19c4e72029a62cf1ef1.jpg?s=24&d=https%3A%2F%2Fa.slack-edge.com%2Fdf10d%2Fimg%2Favatars%2Fava_0009-24.png", "image_32": "https://secure.gravatar.com/avatar/cacb225265b3b19c4e72029a62cf1ef1.jpg?s=32&d=https%3A%2F%2Fa.slack-edge.com%2Fdf10d%2Fimg%2Favatars%2Fava_0009-32.png", "image_48": "https://secure.gravatar.com/avatar/cacb225265b3b19c4e72029a62cf1ef1.jpg?s=48&d=https%3A%2F%2Fa.slack-edge.com%2Fdf10d%2Fimg%2Favatars%2Fava_0009-48.png", "image_72": "https://secure.gravatar.com/avatar/cacb225265b3b19c4e72029a62cf1ef1.jpg?s=72&d=https%3A%2F%2Fa.slack-edge.com%2Fdf10d%2Fimg%2Favatars%2Fava_0009-72.png", "image_192": "https://secure.gravatar.com/avatar/cacb225265b3b19c4e72029a62cf1ef1.jpg?s=192&d=https%3A%2F%2Fa.slack-edge.com%2Fdf10d%2Fimg%2Favatars%2Fava_0009-192.png", "image_512": "https://secure.gravatar.com/avatar/cacb225265b3b19c4e72029a62cf1ef1.jpg?s=512&d=https%3A%2F%2Fa.slack-edge.com%2Fdf10d%2Fimg%2Favatars%2Fava_0009-512.png", "status_text_canonical": "", "team": "T04KX3KDDU6"}, "is_bot": false, "is_app_user": false, "updated": 1675090785, "is_forgotten": true, "is_invited_user": true}, "emitted_at": 1710501138881} From af74589cf95c283060326d7a971ffe0b31564e1d Mon Sep 17 00:00:00 2001 From: darynaishchenko Date: Tue, 19 Mar 2024 17:22:51 +0200 Subject: [PATCH 15/56] updated components --- .../source-slack/source_slack/components.py | 145 ------------------ .../source_slack/components/__init__.py | 0 .../components/channel_members_extractor.py | 21 +++ .../source_slack/components/join_channels.py | 93 +++++++++++ .../components/threads_partition_router.py | 67 ++++++++ 5 files changed, 181 insertions(+), 145 deletions(-) delete mode 100644 airbyte-integrations/connectors/source-slack/source_slack/components.py create mode 100644 airbyte-integrations/connectors/source-slack/source_slack/components/__init__.py create mode 100644 airbyte-integrations/connectors/source-slack/source_slack/components/channel_members_extractor.py create mode 100644 airbyte-integrations/connectors/source-slack/source_slack/components/join_channels.py create mode 100644 airbyte-integrations/connectors/source-slack/source_slack/components/threads_partition_router.py diff --git a/airbyte-integrations/connectors/source-slack/source_slack/components.py b/airbyte-integrations/connectors/source-slack/source_slack/components.py deleted file mode 100644 index 9715daf93f810..0000000000000 --- a/airbyte-integrations/connectors/source-slack/source_slack/components.py +++ /dev/null @@ -1,145 +0,0 @@ -from dataclasses import dataclass -from typing import Optional -from typing import List, Mapping, Any, Iterable -import requests - -from airbyte_cdk.models import AirbyteMessage, SyncMode, Type, FailureType -from airbyte_cdk.sources.declarative.types import Config, Record, StreamSlice, StreamState -from airbyte_cdk.sources.declarative.extractors import DpathExtractor -from airbyte_cdk.sources.declarative.transformations import RecordTransformation, AddFields -from airbyte_cdk.sources.declarative.partition_routers import SubstreamPartitionRouter -from airbyte_cdk.utils.traced_exception import AirbyteTracedException - - -@dataclass -class SlackDpathExtractor(DpathExtractor): - """ - Handle error from Slack API: - { - "body": "{\"ok\":false,\"error\":\"invalid_auth\"}", - "status": "200" - } - """ - def extract_records(self, response: requests.Response) -> List[Record]: - response_body = self.decoder.decode(response) - if not response_body.get('ok'): - error_message = response_body.get('error') - message = f"Request failed with error: {error_message}" - if 'invalid_auth' in error_message: - raise AirbyteTracedException( - message='Authentication has failed, please update your credentials', - internal_message=message, - failure_type=FailureType.config_error, - ) - else: - raise AirbyteTracedException( - message=message, - internal_message=message, - failure_type=FailureType.system_error, - ) - return super().extract_records(response) - - -@dataclass -class ChannelMembersExtractor(SlackDpathExtractor): - """ - Transform response from list of strings to list dicts: - from: ['aa', 'bb'] - to: [{'member_id': 'aa'}, {{'member_id': 'bb'}] - """ - def extract_records(self, response: requests.Response) -> List[Record]: - records = super().extract_records(response) - return [{'member_id': record} for record in records] - - -@dataclass -class JoinChannels(RecordTransformation): - """ - Make 'conversations.join' POST request for every found channel id - if we are not still a member of such channel - """ - - def transform( - self, - record: Record, - config: Optional[Config] = None, - stream_state: Optional[StreamState] = None, - stream_slice: Optional[StreamSlice] = None, - ) -> Record: - # The `is_member` property indicates whether or not the API Bot is already assigned / joined to the channel. - # https://api.slack.com/types/conversation#booleans - channel_id = record.get('id') - if config.get('join_channels') and not record.get("is_member"): - response = requests.post( - url='https://slack.com/api/conversations.join', - headers={ - 'Content-Type': 'application/json', - 'Authorization': f'Bearer {config["api_token"]}' - }, - params={'channel': channel_id} - ) - print(response.json()) - - # WHAT TO DO IF IT FAILS ???????????????????????? - # self.logger.info(f"!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!Successfully joined channel: {channel_id}") - - -@dataclass -class ThreadsPartitionRouter(SubstreamPartitionRouter): - """Overwrite SubstreamPartitionRouter to be able to pass more than one value - from parent stream to stream_slices - """ - def get_request_params( - self, - stream_state: Optional[StreamState] = None, - stream_slice: Optional[StreamSlice] = None, - next_page_token: Optional[Mapping[str, Any]] = None, - ) -> Mapping[str, Any]: - return { - 'channel': stream_slice['channel_id'], - 'ts': stream_slice['ts'], - } - - def stream_slices(self) -> Iterable[StreamSlice]: - """ - Change behaviour of main stream_slices by adding two values (for channel_id, ts) from parent stream - (previously it was possible to add only one value) - """ - if not self.parent_stream_configs: - yield from [] - else: - for parent_stream_config in self.parent_stream_configs: - parent_stream = parent_stream_config.stream - parent_field = parent_stream_config.parent_key.eval(self.config) - stream_state_field = parent_stream_config.partition_field.eval(self.config) - for parent_stream_slice in parent_stream.stream_slices( - sync_mode=SyncMode.full_refresh, cursor_field=None, stream_state=None - ): - - empty_parent_slice = True - parent_slice = parent_stream_slice - - for parent_record in parent_stream.read_records( - sync_mode=SyncMode.full_refresh, cursor_field=None, stream_slice=parent_stream_slice, stream_state=None - ): - - print(parent_record) - # Skip non-records (eg AirbyteLogMessage) - if isinstance(parent_record, AirbyteMessage): - if parent_record.type == Type.RECORD: - parent_record = parent_record.record.data - else: - continue - elif isinstance(parent_record, Record): - parent_record = parent_record.data - - empty_parent_slice = False - - yield { - 'channel_id': parent_record['channel_id'], - 'ts': parent_record['ts'], - "parent_slice": parent_slice - } - # If the parent slice contains no records, - if empty_parent_slice: - yield from [] diff --git a/airbyte-integrations/connectors/source-slack/source_slack/components/__init__.py b/airbyte-integrations/connectors/source-slack/source_slack/components/__init__.py new file mode 100644 index 0000000000000..e69de29bb2d1d diff --git a/airbyte-integrations/connectors/source-slack/source_slack/components/channel_members_extractor.py b/airbyte-integrations/connectors/source-slack/source_slack/components/channel_members_extractor.py new file mode 100644 index 0000000000000..9dbb401a07e9d --- /dev/null +++ b/airbyte-integrations/connectors/source-slack/source_slack/components/channel_members_extractor.py @@ -0,0 +1,21 @@ +# Copyright (c) 2023 Airbyte, Inc., all rights reserved. + +from dataclasses import dataclass +from typing import List + +import requests +from airbyte_cdk.sources.declarative.extractors import DpathExtractor +from airbyte_cdk.sources.declarative.types import Record + + +@dataclass +class ChannelMembersExtractor(DpathExtractor): + """ + Transform response from list of strings to list dicts: + from: ['aa', 'bb'] + to: [{'member_id': 'aa'}, {{'member_id': 'bb'}] + """ + + def extract_records(self, response: requests.Response) -> List[Record]: + records = super().extract_records(response) + return [{"member_id": record} for record in records] diff --git a/airbyte-integrations/connectors/source-slack/source_slack/components/join_channels.py b/airbyte-integrations/connectors/source-slack/source_slack/components/join_channels.py new file mode 100644 index 0000000000000..d4d3e1b310b96 --- /dev/null +++ b/airbyte-integrations/connectors/source-slack/source_slack/components/join_channels.py @@ -0,0 +1,93 @@ +# Copyright (c) 2023 Airbyte, Inc., all rights reserved. + +import logging +from dataclasses import dataclass +from typing import Any, Iterable, List, Mapping, Optional + +import requests +from airbyte_cdk.models import SyncMode +from airbyte_cdk.sources.declarative.transformations import RecordTransformation +from airbyte_cdk.sources.declarative.types import Config, Record, StreamSlice, StreamState +from airbyte_cdk.sources.streams.http import HttpStream +from airbyte_cdk.sources.streams.http.auth import TokenAuthenticator + +LOGGER = logging.getLogger("airbyte_logger") + + +class JoinChannelsStream(HttpStream): + """ + This class is a special stream which joins channels because the Slack API only returns messages from channels this bot is in. + Its responses should only be logged for debugging reasons, not read as records. + """ + + url_base = "https://slack.com/api/" + http_method = "POST" + primary_key = "id" + + def __init__(self, channel_filter: List[str] = None, **kwargs): + self.channel_filter = channel_filter or [] + super().__init__(**kwargs) + + def path(self, **kwargs) -> str: + return "conversations.join" + + def parse_response(self, response: requests.Response, stream_slice: Mapping[str, Any] = None, **kwargs) -> Iterable: + """ + Override to simply indicate that the specific channel was joined successfully. + This method should not return any data, but should return an empty iterable. + """ + self.logger.info(f"Successfully joined channel: {stream_slice['channel_name']}") + yield response.json()["channel"] + + def request_body_json(self, stream_slice: Mapping = None, **kwargs) -> Optional[Mapping]: + return {"channel": stream_slice["channel"]} + + def next_page_token(self, response: requests.Response) -> Optional[Mapping[str, Any]]: + """ + The pagination is not applicable to this Service Stream. + """ + return None + + +@dataclass +class JoinChannels(RecordTransformation): + """ + Make 'conversations.join' POST request for every found channel id + if we are not still a member of such channel + """ + + def should_join_to_channel(self, config: Mapping[str, Any], record: Record) -> bool: + """ + The `is_member` property indicates whether the API Bot is already assigned / joined to the channel. + https://api.slack.com/types/conversation#booleans + """ + return config["join_channels"] and not record.get("is_member") + + def make_join_channel_slice(self, channel: Mapping[str, Any]) -> Mapping[str, Any]: + channel_id: str = channel.get("id") + channel_name: str = channel.get("name") + LOGGER.info(f"Joining Slack Channel: `{channel_name}`") + return {"channel": channel_id, "channel_name": channel_name} + + def join_channels_stream(self, config) -> JoinChannelsStream: + token = config["credentials"].get("api_token") or config["credentials"].get("access_token") + authenticator = TokenAuthenticator(token) + channel_filter = config["channel_filter"] + return JoinChannelsStream(authenticator=authenticator, channel_filter=channel_filter) + + def transform( + self, + record: Record, + config: Optional[Config] = None, + stream_state: Optional[StreamState] = None, + stream_slice: Optional[StreamSlice] = None, + ) -> Mapping[str, Any]: + if self.should_join_to_channel(config, record): + channel = list( + self.join_channels_stream(config).read_records( + sync_mode=SyncMode.full_refresh, + stream_slice=self.make_join_channel_slice(record), + ) + ) + return channel[0] + return record diff --git a/airbyte-integrations/connectors/source-slack/source_slack/components/threads_partition_router.py b/airbyte-integrations/connectors/source-slack/source_slack/components/threads_partition_router.py new file mode 100644 index 0000000000000..b0581988f3d82 --- /dev/null +++ b/airbyte-integrations/connectors/source-slack/source_slack/components/threads_partition_router.py @@ -0,0 +1,67 @@ +# Copyright (c) 2023 Airbyte, Inc., all rights reserved. + +from dataclasses import dataclass +from typing import Any, Iterable, Mapping, Optional + +import dpath.util +import pendulum +from airbyte_cdk.models import AirbyteMessage, SyncMode, Type +from airbyte_cdk.sources.declarative.partition_routers import SubstreamPartitionRouter +from airbyte_cdk.sources.declarative.types import Config, Record, StreamSlice, StreamState + + +@dataclass +class ThreadsPartitionRouter(SubstreamPartitionRouter): + """Overwrite SubstreamPartitionRouter to be able to pass more than one value + from parent stream to stream_slices + """ + + def stream_slices(self) -> Iterable[StreamSlice]: + """ + Iterate over each parent stream's record and create a StreamSlice for each record. + + For each stream, iterate over its stream_slices. + For each stream slice, iterate over each record. + yield a stream slice for each such records. + + If a parent slice contains no record, emit a slice with parent_record=None. + + The template string can interpolate the following values: + - parent_stream_slice: mapping representing the parent's stream slice + - parent_record: mapping representing the parent record + - parent_stream_name: string representing the parent stream name + """ + if not self.parent_stream_configs: + yield from [] + else: + for parent_stream_config in self.parent_stream_configs: + parent_stream = parent_stream_config.stream + parent_field = parent_stream_config.parent_key.eval(self.config) + stream_state_field = parent_stream_config.partition_field.eval(self.config) + for parent_stream_slice in parent_stream.stream_slices( + sync_mode=SyncMode.full_refresh, cursor_field=None, stream_state=None + ): + empty_parent_slice = True + parent_slice = parent_stream_slice + + for parent_record in parent_stream.read_records( + sync_mode=SyncMode.full_refresh, cursor_field=None, stream_slice=parent_stream_slice, stream_state=None + ): + # Skip non-records (eg AirbyteLogMessage) + if isinstance(parent_record, AirbyteMessage): + if parent_record.type == Type.RECORD: + parent_record = parent_record.record.data + else: + continue + elif isinstance(parent_record, Record): + parent_record = parent_record.data + try: + stream_state_value = dpath.util.get(parent_record, parent_field) + except KeyError: + pass + else: + empty_parent_slice = False + yield {stream_state_field: stream_state_value, "channel": parent_slice["channel"]} + # If the parent slice contains no records, + if empty_parent_slice: + yield from [] From 3d4890587f63f931357174ea9c4e1fc54939dada Mon Sep 17 00:00:00 2001 From: darynaishchenko Date: Tue, 19 Mar 2024 17:24:21 +0200 Subject: [PATCH 16/56] added migration for legacy config --- .../source_slack/config_migrations.py | 73 +++++++++++++++++++ 1 file changed, 73 insertions(+) create mode 100644 airbyte-integrations/connectors/source-slack/source_slack/config_migrations.py diff --git a/airbyte-integrations/connectors/source-slack/source_slack/config_migrations.py b/airbyte-integrations/connectors/source-slack/source_slack/config_migrations.py new file mode 100644 index 0000000000000..cc6d9cd036070 --- /dev/null +++ b/airbyte-integrations/connectors/source-slack/source_slack/config_migrations.py @@ -0,0 +1,73 @@ +# Copyright (c) 2023 Airbyte, Inc., all rights reserved. + +import logging +from typing import Any, List, Mapping + +from airbyte_cdk import AirbyteEntrypoint +from airbyte_cdk.config_observation import create_connector_config_control_message +from airbyte_cdk.sources.message import InMemoryMessageRepository, MessageRepository +from source_slack import SourceSlack + +logger = logging.getLogger("airbyte_logger") + + +class MigrateLegacyConfig: + message_repository: MessageRepository = InMemoryMessageRepository() + + @classmethod + def _should_migrate(cls, config: Mapping[str, Any]) -> bool: + """ + legacy config: + { + "start_date": "2021-07-22T20:00:00Z", + "end_date": "2021-07-23T20:00:00Z", + "lookback_window": 1, + "join_channels": True, + "channel_filter": ["airbyte-for-beginners", "good-reads"], + "api_token": "api-token" + } + api token should be in the credentials object + """ + if config.get("api_token") and not config.get("credentials"): + return True + return False + + @classmethod + def _move_token_to_credentials(cls, config: Mapping[str, Any]) -> Mapping[str, Any]: + api_token = config["api_token"] + config.update({"credentials": {"api_token": api_token, "option_title": "API Token Credentials"}}) + config.pop("api_token") + return config + + @classmethod + def _modify_and_save(cls, config_path: str, source: SourceSlack, config: Mapping[str, Any]) -> Mapping[str, Any]: + migrated_config = cls._move_token_to_credentials(config) + # save the config + source.write_config(migrated_config, config_path) + return migrated_config + + @classmethod + def _emit_control_message(cls, migrated_config: Mapping[str, Any]) -> None: + # add the Airbyte Control Message to message repo + cls.message_repository.emit_message(create_connector_config_control_message(migrated_config)) + # emit the Airbyte Control Message from message queue to stdout + for message in cls.message_repository._message_queue: + print(message.json(exclude_unset=True)) + + @classmethod + def migrate(cls, args: List[str], source: SourceSlack) -> None: + """ + This method checks the input args, should the config be migrated, + transform if necessary and emit the CONTROL message. + """ + # get config path + config_path = AirbyteEntrypoint(source).extract_config(args) + # proceed only if `--config` arg is provided + if config_path: + # read the existing config + config = source.read_config(config_path) + # migration check + if cls._should_migrate(config): + cls._emit_control_message( + cls._modify_and_save(config_path, source, config), + ) From 320a675c71f6f607e58d5f19c58c3ee96c9fb633 Mon Sep 17 00:00:00 2001 From: darynaishchenko Date: Tue, 19 Mar 2024 17:25:31 +0200 Subject: [PATCH 17/56] updated unittests --- .../unit_tests/configs/actual_config.json | 11 ++ .../unit_tests/configs/legacy_config.json | 8 ++ .../source-slack/unit_tests/conftest.py | 20 ++- .../unit_tests/test_components.py | 113 ++++++++++++++++ .../unit_tests/test_config_migrations.py | 47 +++++++ .../source-slack/unit_tests/test_source.py | 13 +- .../source-slack/unit_tests/test_streams.py | 121 +++++------------- 7 files changed, 230 insertions(+), 103 deletions(-) create mode 100644 airbyte-integrations/connectors/source-slack/unit_tests/configs/actual_config.json create mode 100644 airbyte-integrations/connectors/source-slack/unit_tests/configs/legacy_config.json create mode 100644 airbyte-integrations/connectors/source-slack/unit_tests/test_components.py create mode 100644 airbyte-integrations/connectors/source-slack/unit_tests/test_config_migrations.py diff --git a/airbyte-integrations/connectors/source-slack/unit_tests/configs/actual_config.json b/airbyte-integrations/connectors/source-slack/unit_tests/configs/actual_config.json new file mode 100644 index 0000000000000..065ffde78394f --- /dev/null +++ b/airbyte-integrations/connectors/source-slack/unit_tests/configs/actual_config.json @@ -0,0 +1,11 @@ +{ + "start_date": "2021-07-22T20:00:00Z", + "end_date": "2021-07-23T20:00:00Z", + "lookback_window": 1, + "join_channels": true, + "channel_filter": ["airbyte-for-beginners", "good-reads"], + "credentials": { + "api_token": "api-token", + "option_title": "API Token Credentials" + } +} diff --git a/airbyte-integrations/connectors/source-slack/unit_tests/configs/legacy_config.json b/airbyte-integrations/connectors/source-slack/unit_tests/configs/legacy_config.json new file mode 100644 index 0000000000000..99eda1d750b38 --- /dev/null +++ b/airbyte-integrations/connectors/source-slack/unit_tests/configs/legacy_config.json @@ -0,0 +1,8 @@ +{ + "start_date": "2021-07-22T20:00:00Z", + "end_date": "2021-07-23T20:00:00Z", + "lookback_window": 1, + "join_channels": true, + "channel_filter": ["airbyte-for-beginners", "good-reads"], + "api_token": "api-token" +} diff --git a/airbyte-integrations/connectors/source-slack/unit_tests/conftest.py b/airbyte-integrations/connectors/source-slack/unit_tests/conftest.py index 6d9254730d5f6..52afa40f47852 100644 --- a/airbyte-integrations/connectors/source-slack/unit_tests/conftest.py +++ b/airbyte-integrations/connectors/source-slack/unit_tests/conftest.py @@ -26,12 +26,6 @@ def conversations_list(requests_mock): }, ) - -@pytest.fixture(autouse=True) -def join_channels(requests_mock): - return requests_mock.register_uri("POST", "https://slack.com/api/conversations.join") - - def base_config() -> MutableMapping: return copy.deepcopy( { @@ -100,7 +94,19 @@ def invalid_config() -> MutableMapping: ( (_token_config(), True), (_oauth_config(), True), - (_legacy_token_config(), True), + # (_legacy_token_config(), True), (_invalid_config(), False), ), ) + + +@pytest.fixture +def joined_channel(): + return {"id": "C061EG9SL", "name": "general", "is_channel": True, "is_group": False, "is_im": False, + "created": 1449252889, + "creator": "U061F7AUR", "is_archived": False, "is_general": True, "unlinked": 0, "name_normalized": "general", + "is_shared": False, + "is_ext_shared": False, "is_org_shared": False, "pending_shared": [], "is_pending_ext_shared": False, + "is_member": True, "is_private": False, "is_mpim": False, + "topic": {"value": "Which widget do you worry about?", "creator": "", "last_set": 0}, + "purpose": {"value": "For widget discussion", "creator": "", "last_set": 0}, "previous_names": []} diff --git a/airbyte-integrations/connectors/source-slack/unit_tests/test_components.py b/airbyte-integrations/connectors/source-slack/unit_tests/test_components.py new file mode 100644 index 0000000000000..b0fa819a0a6fc --- /dev/null +++ b/airbyte-integrations/connectors/source-slack/unit_tests/test_components.py @@ -0,0 +1,113 @@ +# Copyright (c) 2023 Airbyte, Inc., all rights reserved. + +from unittest.mock import MagicMock + +import pendulum +from airbyte_cdk.sources.declarative.partition_routers.substream_partition_router import ParentStreamConfig +from airbyte_cdk.sources.declarative.requesters import RequestOption +from airbyte_cdk.sources.streams.http.auth import TokenAuthenticator +from airbyte_protocol.models import SyncMode +from freezegun import freeze_time +from source_slack import SourceSlack +from source_slack.components.channel_members_extractor import ChannelMembersExtractor +from source_slack.components.join_channels import JoinChannels, JoinChannelsStream +from source_slack.components.threads_partition_router import ThreadsPartitionRouter + + +def get_stream_by_name(stream_name, config): + streams = SourceSlack().streams(config=config) + for stream in streams: + if stream.name == stream_name: + return stream + raise ValueError(f"Stream {stream_name} not found") + + +def test_channel_members_extractor(token_config): + response_mock = MagicMock() + response_mock.json.return_value = {"members": [ + "U023BECGF", + "U061F7AUR", + "W012A3CDE" + ]} + records = ChannelMembersExtractor(config=token_config, parameters={}, field_path=['members']).extract_records(response=response_mock) + assert records == [{'member_id': 'U023BECGF'}, + {'member_id': 'U061F7AUR'}, + {'member_id': 'W012A3CDE'}] + + +@freeze_time("2024-03-10T20:00:00Z", tz_offset=-2) +def test_threads_partition_router(token_config, requests_mock): + requests_mock.get(url="https://slack.com/api/conversations.list?limit=1000", + json={"channels": [ + {"id": "airbyte-for-beginners", "is_member": True}, + {"id": "good-reads", "is_member": True} + ]}) + start_date = "2024-03-01T20:00:00Z" + end_date = pendulum.now() + oldest, latest = int(pendulum.parse(start_date).timestamp()), int(end_date.timestamp()) + token_config["start_date"] = start_date + for channel in token_config["channel_filter"]: + requests_mock.get( + url=f"https://slack.com/api/conversations.history?" + f"inclusive=True&limit=1000&channel={channel}&" + f"oldest={oldest}&latest={latest}", + json={"messages": [{"ts": latest}, {"ts": oldest}]} + ) + + channel_messages_stream = get_stream_by_name("channel_messages", token_config) + router = ThreadsPartitionRouter( + config=token_config, + parameters={}, + parent_stream_configs=[ + ParentStreamConfig( + config=token_config, + stream=channel_messages_stream, + parent_key="ts", + partition_field="ts", + parameters={}, + request_option=RequestOption(field_name="ts", inject_into="request_parameter", parameters={}) + ), ] + ) + slices = router.stream_slices() + expected = [{"channel": "airbyte-for-beginners", "ts": latest}, + {"channel": "airbyte-for-beginners", "ts": oldest}, + {"channel": "good-reads", "ts": latest}, + {"channel": "good-reads", "ts": oldest}] + + assert list(slices) == expected + + +def test_join_channels(token_config, requests_mock, joined_channel): + requests_mock.post( + url="https://slack.com/api/conversations.join", + json={"channel": joined_channel} + ) + token = token_config["credentials"]["api_token"] + authenticator = TokenAuthenticator(token) + channel_filter = token_config["channel_filter"] + stream = JoinChannelsStream(authenticator=authenticator, channel_filter=channel_filter) + records = list(stream.read_records(sync_mode=SyncMode.full_refresh, stream_slice={"channel": "C061EG9SL", "channel_name": "general"})) + assert records[0] == joined_channel + + +def test_join_channels_should_join_to_channel(token_config): + transformation = JoinChannels() + assert transformation.should_join_to_channel(token_config, {"is_member": False}) is True + assert transformation.should_join_to_channel(token_config, {"is_member": True}) is False + + +def test_join_channels_make_join_channel_slice(token_config): + transformation = JoinChannels() + assert transformation.make_join_channel_slice({"id": "C061EG9SL", "name": "general"}) == {"channel": "C061EG9SL", + "channel_name": "general"} + + +def test_join_channel_transformation(requests_mock, token_config, joined_channel): + requests_mock.post( + url="https://slack.com/api/conversations.join", + json={"channel": joined_channel} + ) + + transformation = JoinChannels() + assert transformation.transform(config=token_config, record={"is_member": True}) == {"is_member": True} + assert transformation.transform(config=token_config, record={"is_member": False}) == joined_channel diff --git a/airbyte-integrations/connectors/source-slack/unit_tests/test_config_migrations.py b/airbyte-integrations/connectors/source-slack/unit_tests/test_config_migrations.py new file mode 100644 index 0000000000000..761597a66fc22 --- /dev/null +++ b/airbyte-integrations/connectors/source-slack/unit_tests/test_config_migrations.py @@ -0,0 +1,47 @@ +# Copyright (c) 2023 Airbyte, Inc., all rights reserved. + +import json +import os +from typing import Any, Mapping + +from source_slack import SourceSlack +from source_slack.config_migrations import MigrateLegacyConfig + +CMD = "check" +TEST_CONFIG_LEGACY_PATH = f"{os.path.dirname(__file__)}/configs/legacy_config.json" +TEST_CONFIG_ACTUAL_PATH = f"{os.path.dirname(__file__)}/configs/actual_config.json" + +SOURCE_INPUT_ARGS_LEGACY = [CMD, "--config", TEST_CONFIG_LEGACY_PATH] +SOURCE_INPUT_ARGS_ACTUAL = [CMD, "--config", TEST_CONFIG_ACTUAL_PATH] + + +def revert_config(): + with open(TEST_CONFIG_LEGACY_PATH, "r") as test_config: + config = json.load(test_config) + config.pop("credentials") + config.update({"api_token": "api-token"}) + with open(TEST_CONFIG_LEGACY_PATH, "w") as updated_config: + config = json.dumps(config) + updated_config.write(config) + + +def load_config(config_path: str = TEST_CONFIG_LEGACY_PATH) -> Mapping[str, Any]: + with open(config_path, "r") as config: + return json.load(config) + + +def test_config_migration(): + migration = MigrateLegacyConfig() + migration.migrate(SOURCE_INPUT_ARGS_LEGACY, SourceSlack()) + test_migrated_config = load_config() + assert test_migrated_config["credentials"]["api_token"] == "api-token" + assert test_migrated_config["credentials"]["option_title"] == "API Token Credentials" + revert_config() + + +def test_config_not_migrated(): + config_before_migration = load_config(TEST_CONFIG_ACTUAL_PATH) + migration = MigrateLegacyConfig() + migration.migrate(SOURCE_INPUT_ARGS_ACTUAL, SourceSlack()) + test_migrated_config = load_config(TEST_CONFIG_ACTUAL_PATH) + assert config_before_migration == test_migrated_config diff --git a/airbyte-integrations/connectors/source-slack/unit_tests/test_source.py b/airbyte-integrations/connectors/source-slack/unit_tests/test_source.py index bef3bf26651f9..39e7c8e4c0bf8 100644 --- a/airbyte-integrations/connectors/source-slack/unit_tests/test_source.py +++ b/airbyte-integrations/connectors/source-slack/unit_tests/test_source.py @@ -19,7 +19,7 @@ def test_streams(conversations_list, config, is_valid): else: with pytest.raises(Exception) as exc_info: _ = source.streams(config) - assert "No supported option_title: None specified. See spec.json for references" in repr(exc_info.value) + assert "The path from `authenticator_selection_path` is not found in the config." in repr(exc_info.value) @pytest.mark.parametrize( @@ -30,17 +30,13 @@ def test_streams(conversations_list, config, is_valid): 400, "Bad request", False, - "Got an exception while trying to set up the connection: 400 Client Error: " - "None for url: https://slack.com/api/users.list?limit=1000. Most probably, there are no users in the given Slack instance or " - "your token is incorrect", + "Got an exception while trying to set up the connection. Most probably, there are no users in the given Slack instance or your token is incorrect.", ), ( 403, "Forbidden", False, - "Got an exception while trying to set up the connection: 403 Client Error: " - "None for url: https://slack.com/api/users.list?limit=1000. Most probably, there are no users in the given Slack instance or " - "your token is incorrect", + "Got an exception while trying to set up the connection. Most probably, there are no users in the given Slack instance or your token is incorrect.", ), ), ) @@ -49,4 +45,5 @@ def test_check_connection(token_config, requests_mock, status_code, response, is source = SourceSlack() success, error = source.check_connection(logger=logging.getLogger("airbyte"), config=token_config) assert success is is_connection_successful - assert error == error_msg + if not success: + assert error_msg in error diff --git a/airbyte-integrations/connectors/source-slack/unit_tests/test_streams.py b/airbyte-integrations/connectors/source-slack/unit_tests/test_streams.py index d0327093318f8..5edd6d9a35b5a 100644 --- a/airbyte-integrations/connectors/source-slack/unit_tests/test_streams.py +++ b/airbyte-integrations/connectors/source-slack/unit_tests/test_streams.py @@ -2,12 +2,13 @@ # Copyright (c) 2023 Airbyte, Inc., all rights reserved. # -from unittest.mock import Mock import pendulum import pytest from airbyte_cdk.sources.streams.http.requests_native_auth import TokenAuthenticator -from source_slack.source import Channels, Threads, Users +from airbyte_protocol.models import SyncMode +from freezegun import freeze_time +from source_slack import SourceSlack @pytest.fixture @@ -15,96 +16,40 @@ def authenticator(legacy_token_config): return TokenAuthenticator(legacy_token_config["api_token"]) -@pytest.mark.parametrize( - "start_date, end_date, messages, stream_state, expected_result", - ( - ( - "2020-01-01T00:00:00Z", - "2020-01-02T00:00:00Z", - [{"ts": 1577866844}, {"ts": 1577877406}], - {}, - [ - # two messages per each channel - {"channel": 3, "ts": 1577866844}, - {"channel": 3, "ts": 1577877406}, - {"channel": 4, "ts": 1577866844}, - {"channel": 4, "ts": 1577877406}, - ], - ), - ("2020-01-02T00:00:00Z", "2020-01-01T00:00:00Z", [], {}, [{}]), - ( - "2020-01-01T00:00:00Z", - "2020-01-02T00:00:00Z", - [{"ts": 1577866844}, {"ts": 1577877406}], - {"float_ts": 1577915266}, - [ - # two messages per each channel per datetime slice - {"channel": 3, "ts": 1577866844}, - {"channel": 3, "ts": 1577877406}, - {"channel": 3, "ts": 1577866844}, - {"channel": 3, "ts": 1577877406}, - {"channel": 4, "ts": 1577866844}, - {"channel": 4, "ts": 1577877406}, - {"channel": 4, "ts": 1577866844}, - {"channel": 4, "ts": 1577877406}, - ], - ), - ), -) -def test_threads_stream_slices( - requests_mock, authenticator, legacy_token_config, start_date, end_date, messages, stream_state, expected_result -): - requests_mock.register_uri( - "GET", "https://slack.com/api/conversations.history", [{"json": {"messages": messages}}, {"json": {"messages": messages}}] - ) - start_date = pendulum.parse(start_date) - end_date = end_date and pendulum.parse(end_date) - stream = Threads( - authenticator=authenticator, - default_start_date=start_date, - end_date=end_date, - lookback_window=pendulum.Duration(days=legacy_token_config["lookback_window"]), - channel_filter=legacy_token_config["channel_filter"], - ) - slices = list(stream.stream_slices(stream_state=stream_state)) - assert slices == expected_result +def get_stream_by_name(stream_name, config): + streams = SourceSlack().streams(config=config) + for stream in streams: + if stream.name == stream_name: + return stream + raise ValueError(f"Stream {stream_name} not found") -@pytest.mark.parametrize( - "current_state, latest_record, expected_state", - ( - ({}, {"float_ts": 1507866844}, {"float_ts": 1626984000.0}), - ({}, {"float_ts": 1726984000}, {"float_ts": 1726984000.0}), - ({"float_ts": 1588866844}, {"float_ts": 1577866844}, {"float_ts": 1588866844}), - ({"float_ts": 1577800844}, {"float_ts": 1577866844}, {"float_ts": 1577866844}), - ), -) -def test_get_updated_state(authenticator, legacy_token_config, current_state, latest_record, expected_state): - stream = Threads( - authenticator=authenticator, - default_start_date=pendulum.parse(legacy_token_config["start_date"]), - lookback_window=legacy_token_config["lookback_window"], - channel_filter=legacy_token_config["channel_filter"], - ) - assert stream.get_updated_state(current_stream_state=current_state, latest_record=latest_record) == expected_state +@freeze_time("2024-03-10T20:00:00Z", tz_offset=-2) +def test_threads_stream_slices(requests_mock, token_config): + requests_mock.get(url="https://slack.com/api/conversations.list?limit=1000", + json={"channels": [{"id": "airbyte-for-beginners", "is_member": True}, + {"id": "good-reads", "is_member": True}]}) + start_date = "2024-03-01T20:00:00Z" + end_date = pendulum.now() + oldest, latest = int(pendulum.parse(start_date).timestamp()), int(end_date.timestamp()) + token_config["start_date"] = start_date -@pytest.mark.parametrize("headers, expected_result", (({}, 5), ({"Retry-After": 15}, 15))) -def test_backoff(authenticator, headers, expected_result): - stream = Users(authenticator=authenticator) - assert stream.backoff_time(Mock(headers=headers)) == expected_result + for channel in token_config["channel_filter"]: + requests_mock.get( + url=f"https://slack.com/api/conversations.history?" + f"inclusive=True&limit=1000&channel={channel}&" + f"oldest={oldest}&latest={latest}", + json={"messages": [{"ts": latest}, {"ts": oldest}]} + ) + threads_stream = get_stream_by_name("threads", token_config) + slices = threads_stream.stream_slices(stream_state=None, sync_mode=SyncMode.full_refresh) -def test_channels_stream_with_autojoin(authenticator) -> None: - """ - The test uses the `conversations_list` fixture(autouse=true) as API mocker. - """ expected = [ - {'name': 'advice-data-architecture', 'id': 1, 'is_member': False}, - {'name': 'advice-data-orchestration', 'id': 2, 'is_member': True}, - {'name': 'airbyte-for-beginners', 'id': 3, 'is_member': False}, - {'name': 'good-reads', 'id': 4, 'is_member': True}, - ] - stream = Channels(channel_filter=[], join_channels=True, authenticator=authenticator) - assert list(stream.read_records(None)) == expected - \ No newline at end of file + {'ts': 1710093600, 'channel': 'airbyte-for-beginners', 'start_time': '2024-02-29T20:00:00Z', 'end_time': '2024-03-10T18:00:00Z'}, + {'ts': 1709323200, 'channel': 'airbyte-for-beginners', 'start_time': '2024-02-29T20:00:00Z', 'end_time': '2024-03-10T18:00:00Z'}, + {'ts': 1710093600, 'channel': 'good-reads', 'start_time': '2024-02-29T20:00:00Z', 'end_time': '2024-03-10T18:00:00Z'}, + {'ts': 1709323200, 'channel': 'good-reads', 'start_time': '2024-02-29T20:00:00Z', 'end_time': '2024-03-10T18:00:00Z'}] + + assert list(slices) == expected From 53e93d27246d00e769f5d16ea0afcdc66634b824 Mon Sep 17 00:00:00 2001 From: darynaishchenko Date: Tue, 19 Mar 2024 17:25:57 +0200 Subject: [PATCH 18/56] added dependencies --- .../connectors/source-slack/poetry.lock | 18 ++++++++++++++++-- .../connectors/source-slack/pyproject.toml | 1 + 2 files changed, 17 insertions(+), 2 deletions(-) diff --git a/airbyte-integrations/connectors/source-slack/poetry.lock b/airbyte-integrations/connectors/source-slack/poetry.lock index 608c63e12b1be..d322b42f42a93 100644 --- a/airbyte-integrations/connectors/source-slack/poetry.lock +++ b/airbyte-integrations/connectors/source-slack/poetry.lock @@ -1,4 +1,4 @@ -# This file is automatically @generated by Poetry 1.6.1 and should not be changed by hand. +# This file is automatically @generated by Poetry 1.7.1 and should not be changed by hand. [[package]] name = "airbyte-cdk" @@ -301,6 +301,20 @@ files = [ [package.extras] test = ["pytest (>=6)"] +[[package]] +name = "freezegun" +version = "1.4.0" +description = "Let your Python tests travel through time" +optional = false +python-versions = ">=3.7" +files = [ + {file = "freezegun-1.4.0-py3-none-any.whl", hash = "sha256:55e0fc3c84ebf0a96a5aa23ff8b53d70246479e9a68863f1fcac5a3e52f19dd6"}, + {file = "freezegun-1.4.0.tar.gz", hash = "sha256:10939b0ba0ff5adaecf3b06a5c2f73071d9678e507c5eaedb23c761d56ac774b"}, +] + +[package.dependencies] +python-dateutil = ">=2.7" + [[package]] name = "genson" version = "1.2.2" @@ -1031,4 +1045,4 @@ files = [ [metadata] lock-version = "2.0" python-versions = "^3.9,<3.12" -content-hash = "6d6c74c129dad767e4712df44cb1a80c195a3a27c8b39861a86d80acc72a19ed" +content-hash = "a4b30965edb8628fc7d183ca5ee4c41956917426b273f3fc2b61697d97697e86" diff --git a/airbyte-integrations/connectors/source-slack/pyproject.toml b/airbyte-integrations/connectors/source-slack/pyproject.toml index b7b6103bf52d8..0896b798a3214 100644 --- a/airbyte-integrations/connectors/source-slack/pyproject.toml +++ b/airbyte-integrations/connectors/source-slack/pyproject.toml @@ -19,6 +19,7 @@ include = "source_slack" python = "^3.9,<3.12" pendulum = "==2.1.2" airbyte-cdk = "==0.58.7" +freezegun = "^1.4.0" [tool.poetry.scripts] source-slack = "source_slack.run:run" From f622dcbe573614467aef786b3a179e36fe4b7979 Mon Sep 17 00:00:00 2001 From: darynaishchenko Date: Tue, 19 Mar 2024 17:27:05 +0200 Subject: [PATCH 19/56] delete unused fiels, added .coveragerc --- .../connectors/source-slack/.coveragerc | 3 +++ .../source-slack/source_slack/utils.py | 24 ------------------- 2 files changed, 3 insertions(+), 24 deletions(-) create mode 100644 airbyte-integrations/connectors/source-slack/.coveragerc delete mode 100644 airbyte-integrations/connectors/source-slack/source_slack/utils.py diff --git a/airbyte-integrations/connectors/source-slack/.coveragerc b/airbyte-integrations/connectors/source-slack/.coveragerc new file mode 100644 index 0000000000000..df94ba63f945b --- /dev/null +++ b/airbyte-integrations/connectors/source-slack/.coveragerc @@ -0,0 +1,3 @@ +[run] +omit = + source_slack/run.py \ No newline at end of file diff --git a/airbyte-integrations/connectors/source-slack/source_slack/utils.py b/airbyte-integrations/connectors/source-slack/source_slack/utils.py deleted file mode 100644 index 7507dbab35657..0000000000000 --- a/airbyte-integrations/connectors/source-slack/source_slack/utils.py +++ /dev/null @@ -1,24 +0,0 @@ -# -# Copyright (c) 2023 Airbyte, Inc., all rights reserved. -# - - -from typing import Iterable, Optional - -import pendulum -from pendulum import DateTime, Period - - -def chunk_date_range(start_date: DateTime, interval=pendulum.duration(days=1), end_date: Optional[DateTime] = None) -> Iterable[Period]: - """ - Yields a list of the beginning and ending timestamps of each day between the start date and now. - The return value is a pendulum.period - """ - - end_date = end_date or pendulum.now() - # Each stream_slice contains the beginning and ending timestamp for a 24 hour period - chunk_start_date = start_date - while chunk_start_date < end_date: - chunk_end_date = min(chunk_start_date + interval, end_date) - yield pendulum.period(chunk_start_date, chunk_end_date) - chunk_start_date = chunk_end_date From e874d06994b65723ddcd4d8568e64b60165b034b Mon Sep 17 00:00:00 2001 From: darynaishchenko Date: Tue, 19 Mar 2024 17:27:46 +0200 Subject: [PATCH 20/56] updated source.py and run.py --- .../source-slack/source_slack/run.py | 2 + .../source-slack/source_slack/source.py | 406 ------------------ 2 files changed, 2 insertions(+), 406 deletions(-) diff --git a/airbyte-integrations/connectors/source-slack/source_slack/run.py b/airbyte-integrations/connectors/source-slack/source_slack/run.py index 14caa9ab08e1e..fd5e385857b95 100644 --- a/airbyte-integrations/connectors/source-slack/source_slack/run.py +++ b/airbyte-integrations/connectors/source-slack/source_slack/run.py @@ -7,8 +7,10 @@ from airbyte_cdk.entrypoint import launch from source_slack import SourceSlack +from source_slack.config_migrations import MigrateLegacyConfig def run(): source = SourceSlack() + MigrateLegacyConfig.migrate(sys.argv[1:], source) launch(source, sys.argv[1:]) diff --git a/airbyte-integrations/connectors/source-slack/source_slack/source.py b/airbyte-integrations/connectors/source-slack/source_slack/source.py index de2f8a0705c27..ffc0fb532c4c1 100644 --- a/airbyte-integrations/connectors/source-slack/source_slack/source.py +++ b/airbyte-integrations/connectors/source-slack/source_slack/source.py @@ -5,412 +5,6 @@ from airbyte_cdk.sources.declarative.yaml_declarative_source import YamlDeclarativeSource -from abc import ABC, abstractmethod -from typing import Any, Iterable, List, Mapping, MutableMapping, Optional, Tuple - -import pendulum -import requests -from airbyte_cdk import AirbyteLogger -from airbyte_cdk.models import SyncMode -from airbyte_cdk.sources import AbstractSource -from airbyte_cdk.sources.streams import Stream -from airbyte_cdk.sources.streams.http import HttpStream, HttpSubStream -from airbyte_cdk.sources.streams.http.requests_native_auth import TokenAuthenticator -from pendulum import DateTime - -from .utils import chunk_date_range - - -class SlackStream(HttpStream, ABC): - url_base = "https://slack.com/api/" - primary_key = "id" - page_size = 1000 - - @property - def max_retries(self) -> int: - # Slack's rate limiting can be unpredictable so we increase the max number of retries by a lot before failing - return 20 - - def next_page_token(self, response: requests.Response) -> Optional[Mapping[str, Any]]: - """Slack uses a cursor-based pagination strategy. - Extract the cursor from the response if it exists and return it in a format - that can be used to update request parameters""" - - json_response = response.json() - next_cursor = json_response.get("response_metadata", {}).get("next_cursor") - if next_cursor: - return {"cursor": next_cursor} - - def request_params( - self, - stream_state: Mapping[str, Any], - stream_slice: Mapping[str, Any] = None, - next_page_token: Mapping[str, Any] = None, - ) -> MutableMapping[str, Any]: - params = {"limit": self.page_size} - if next_page_token: - params.update(**next_page_token) - return params - - def parse_response( - self, - response: requests.Response, - stream_state: Mapping[str, Any] = None, - stream_slice: Mapping[str, Any] = None, - next_page_token: Mapping[str, Any] = None, - ) -> Iterable[MutableMapping]: - json_response = response.json() - yield from json_response.get(self.data_field, []) - - def backoff_time(self, response: requests.Response) -> Optional[float]: - """This method is called if we run into the rate limit. - Slack puts the retry time in the `Retry-After` response header so we - we return that value. If the response is anything other than a 429 (e.g: 5XX) - fall back on default retry behavior. - Rate Limits Docs: https://api.slack.com/docs/rate-limits#web""" - - if "Retry-After" in response.headers: - return int(response.headers["Retry-After"]) - else: - self.logger.info("Retry-after header not found. Using default backoff value") - return 5 - - @property - @abstractmethod - def data_field(self) -> str: - """The name of the field in the response which contains the data""" - - def should_retry(self, response: requests.Response) -> bool: - return response.status_code == requests.codes.REQUEST_TIMEOUT or super().should_retry(response) - - -class JoinChannelsStream(HttpStream): - """ - This class is a special stream which joins channels because the Slack API only returns messages from channels this bot is in. - Its responses should only be logged for debugging reasons, not read as records. - """ - - url_base = "https://slack.com/api/" - http_method = "POST" - primary_key = "id" - - def __init__(self, channel_filter: List[str] = None, **kwargs): - self.channel_filter = channel_filter or [] - super().__init__(**kwargs) - - def parse_response(self, response: requests.Response, stream_slice: Mapping[str, Any] = None, **kwargs) -> Iterable: - """ - Override to simply indicate that the specific channel was joined successfully. - This method should not return any data, but should return an empty iterable. - """ - self.logger.info(f"Successfully joined channel: {stream_slice['channel_name']}") - return [] - - def next_page_token(self, response: requests.Response) -> Optional[Mapping[str, Any]]: - """ - The pagination is not applicable to this Service Stream. - """ - return None - - def path(self, **kwargs) -> str: - return "conversations.join" - - def request_body_json(self, stream_slice: Mapping = None, **kwargs) -> Optional[Mapping]: - return {"channel": stream_slice["channel"]} - - -class ChanneledStream(SlackStream, ABC): - """Slack stream with channel filter""" - - def __init__(self, channel_filter: List[str] = [], join_channels: bool = False, **kwargs): - self.channel_filter = channel_filter - self.join_channels = join_channels - self.kwargs = kwargs - super().__init__(**kwargs) - - @property - def join_channels_stream(self) -> JoinChannelsStream: - return JoinChannelsStream(authenticator=self.kwargs.get("authenticator"), channel_filter=self.channel_filter) - - def should_join_to_channel(self, channel: Mapping[str, Any]) -> bool: - """ - The `is_member` property indicates whether or not the API Bot is already assigned / joined to the channel. - https://api.slack.com/types/conversation#booleans - """ - return self.join_channels and not channel.get("is_member") - - def make_join_channel_slice(self, channel: Mapping[str, Any]) -> Mapping[str, Any]: - channel_id: str = channel.get("id") - channel_name: str = channel.get("name") - self.logger.info(f"Joining Slack Channel: `{channel_name}`") - return {"channel": channel_id, "channel_name": channel_name} - - -class Channels(ChanneledStream): - data_field = "channels" - - @property - def use_cache(self) -> bool: - return True - - def path(self, **kwargs) -> str: - return "conversations.list" - - def request_params(self, **kwargs) -> MutableMapping[str, Any]: - params = super().request_params(**kwargs) - params["types"] = "public_channel" - return params - - def parse_response(self, response: requests.Response, **kwargs) -> Iterable[MutableMapping]: - json_response = response.json() - channels = json_response.get(self.data_field, []) - if self.channel_filter: - channels = [channel for channel in channels if channel["name"] in self.channel_filter] - yield from channels - - def read_records(self, sync_mode: SyncMode, **kwargs) -> Iterable[Mapping[str, Any]]: - """ - Override the default `read_records` method to provide the `JoinChannelsStream` functionality, - and be able to read all the channels, not just the ones that already has the API Bot joined. - """ - for channel in super().read_records(sync_mode=sync_mode): - # check the channel should be joined before reading - if self.should_join_to_channel(channel): - # join the channel before reading it - yield from self.join_channels_stream.read_records( - sync_mode=sync_mode, - stream_slice=self.make_join_channel_slice(channel), - ) - # reading the channel data - self.logger.info(f"Reading the channel: `{channel.get('name')}`") - yield channel - - -class ChannelMembers(ChanneledStream): - data_field = "members" - primary_key = ["member_id", "channel_id"] - - def path(self, **kwargs) -> str: - return "conversations.members" - - def request_params(self, stream_state: Mapping[str, Any], stream_slice: Mapping[str, Any] = None, **kwargs) -> MutableMapping[str, Any]: - params = super().request_params(stream_state=stream_state, stream_slice=stream_slice, **kwargs) - params["channel"] = stream_slice["channel_id"] - return params - - def parse_response(self, response: requests.Response, stream_slice: Mapping[str, Any] = None, **kwargs) -> Iterable[Mapping]: - for member_id in super().parse_response(response, **kwargs): - # Slack just returns raw IDs as a string, so we want to put them in a "join table" format - yield {"member_id": member_id, "channel_id": stream_slice["channel_id"]} - - def stream_slices(self, **kwargs) -> Iterable[Optional[Mapping[str, Any]]]: - channels_stream = Channels(authenticator=self._session.auth, channel_filter=self.channel_filter) - for channel_record in channels_stream.read_records(sync_mode=SyncMode.full_refresh): - yield {"channel_id": channel_record["id"]} - - -class Users(SlackStream): - data_field = "members" - - def path(self, **kwargs) -> str: - return "users.list" - - -# Incremental Streams -class IncrementalMessageStream(ChanneledStream, ABC): - data_field = "messages" - cursor_field = "float_ts" - primary_key = ["channel_id", "ts"] - - def __init__(self, default_start_date: DateTime, end_date: Optional[DateTime] = None, **kwargs): - self._start_ts = default_start_date.timestamp() - self._end_ts = end_date and end_date.timestamp() - self.set_sub_primary_key() - super().__init__(**kwargs) - - def set_sub_primary_key(self): - if isinstance(self.primary_key, list): - for index, value in enumerate(self.primary_key): - setattr(self, f"sub_primary_key_{index + 1}", value) - else: - self.logger.error("Failed during setting sub primary keys. Primary key should be list.") - - def request_params(self, stream_state: Mapping[str, Any], stream_slice: Mapping[str, Any] = None, **kwargs) -> MutableMapping[str, Any]: - params = super().request_params(stream_state=stream_state, stream_slice=stream_slice, **kwargs) - params.update(**stream_slice) - return params - - def parse_response(self, response: requests.Response, stream_slice: Mapping[str, Any] = None, **kwargs) -> Iterable[Mapping]: - for record in super().parse_response(response, **kwargs): - record[self.sub_primary_key_1] = stream_slice.get("channel", "") - record[self.cursor_field] = float(record[self.sub_primary_key_2]) - yield record - - def get_updated_state(self, current_stream_state: MutableMapping[str, Any], latest_record: Mapping[str, Any]) -> Mapping[str, Any]: - current_stream_state = current_stream_state or {} - current_stream_state[self.cursor_field] = max( - latest_record[self.cursor_field], current_stream_state.get(self.cursor_field, self._start_ts) - ) - - return current_stream_state - - def read_records( - self, - sync_mode: SyncMode, - cursor_field: List[str] = None, - stream_slice: Mapping[str, Any] = None, - stream_state: Mapping[str, Any] = None, - ) -> Iterable[Mapping[str, Any]]: - if not stream_slice: - # return an empty iterator - # this is done to emit at least one state message when no slices are generated - return iter([]) - return super().read_records(sync_mode, cursor_field=cursor_field, stream_slice=stream_slice, stream_state=stream_state) - - -class ChannelMessages(HttpSubStream, IncrementalMessageStream): - def path(self, **kwargs) -> str: - return "conversations.history" - - @property - def use_cache(self) -> bool: - return True - - def stream_slices(self, stream_state: Mapping[str, Any] = None, **kwargs) -> Iterable[Optional[Mapping[str, Any]]]: - stream_state = stream_state or {} - start_date = pendulum.from_timestamp(stream_state.get(self.cursor_field, self._start_ts)) - end_date = self._end_ts and pendulum.from_timestamp(self._end_ts) - slice_yielded = False - for parent_slice in super().stream_slices(sync_mode=SyncMode.full_refresh): - channel = parent_slice["parent"] - for period in chunk_date_range(start_date=start_date, end_date=end_date): - yield {"channel": channel["id"], "oldest": period.start.timestamp(), "latest": period.end.timestamp()} - slice_yielded = True - if not slice_yielded: - # yield an empty slice to checkpoint state later - yield {} - - -class Threads(IncrementalMessageStream): - def __init__(self, lookback_window: Mapping[str, int], **kwargs): - self.messages_lookback_window = lookback_window - super().__init__(**kwargs) - - def path(self, **kwargs) -> str: - return "conversations.replies" - - def stream_slices(self, stream_state: Mapping[str, Any] = None, **kwargs) -> Iterable[Optional[Mapping[str, Any]]]: - """ - The logic for incrementally syncing threads is not very obvious, so buckle up. - - To get all messages in a thread, one must specify the channel and timestamp of the parent (first) message of that thread, - basically its ID. - - One complication is that threads can be updated at Any time in the future. Therefore, if we wanted to comprehensively sync data - i.e: get every single response in a thread, we'd have to read every message in the slack instance every time we ran a sync, - because otherwise there is no way to guarantee that a thread deep in the past didn't receive a new message. - - A pragmatic workaround is to say we want threads to be at least N days fresh i.e: look back N days into the past, - get every message since, and read all of the thread responses. This is essentially the approach we're taking here via slicing: - create slices from N days into the past and read all messages in threads since then. We could optionally filter out records we have - already read, but that's omitted to keep the logic simple to reason about. - - Good luck. - """ - - stream_state = stream_state or {} - channels_stream = Channels(authenticator=self._session.auth, channel_filter=self.channel_filter) - - if self.cursor_field in stream_state: - # Since new messages can be posted to threads continuously after the parent message has been posted, - # we get messages from the latest date - # found in the state minus X days to pick up any new messages in threads. - # If there is state always use lookback - messages_start_date = pendulum.from_timestamp(stream_state[self.cursor_field]) - self.messages_lookback_window - else: - # If there is no state i.e: this is the first sync then there is no use for lookback, just get messages - # from the default start date - messages_start_date = pendulum.from_timestamp(self._start_ts) - - messages_stream = ChannelMessages( - parent=channels_stream, - authenticator=self._session.auth, - default_start_date=messages_start_date, - end_date=self._end_ts and pendulum.from_timestamp(self._end_ts), - ) - - slice_yielded = False - for message_chunk in messages_stream.stream_slices(stream_state={self.cursor_field: messages_start_date.timestamp()}): - self.logger.info(f"Syncing replies {message_chunk}") - for message in messages_stream.read_records(sync_mode=SyncMode.full_refresh, stream_slice=message_chunk): - yield {"channel": message_chunk["channel"], self.sub_primary_key_2: message[self.sub_primary_key_2]} - slice_yielded = True - if not slice_yielded: - # yield an empty slice to checkpoint state later - yield {} - -# -# class SourceSlack(AbstractSource): -# def _get_authenticator(self, config: Mapping[str, Any]): -# # Added to maintain backward compatibility with previous versions -# if "api_token" in config: -# return TokenAuthenticator(config["api_token"]) -# -# credentials = config.get("credentials", {}) -# credentials_title = credentials.get("option_title") -# if credentials_title == "Default OAuth2.0 authorization": -# return TokenAuthenticator(credentials["access_token"]) -# elif credentials_title == "API Token Credentials": -# return TokenAuthenticator(credentials["api_token"]) -# else: -# raise Exception(f"No supported option_title: {credentials_title} specified. See spec.json for references") -# -# def check_connection(self, logger: AirbyteLogger, config: Mapping[str, Any]) -> Tuple[bool, Optional[Any]]: -# try: -# authenticator = self._get_authenticator(config) -# users_stream = Users(authenticator=authenticator) -# next(users_stream.read_records(SyncMode.full_refresh)) -# return True, None -# except Exception as e: -# return ( -# False, -# f"Got an exception while trying to set up the connection: {e}. " -# f"Most probably, there are no users in the given Slack instance or your token is incorrect", -# ) -# -# def streams(self, config: Mapping[str, Any]) -> List[Stream]: -# authenticator = self._get_authenticator(config) -# default_start_date = pendulum.parse(config["start_date"]) -# # this field is not exposed to spec, used only for testing purposes -# end_date = config.get("end_date") -# end_date = end_date and pendulum.parse(end_date) -# threads_lookback_window = pendulum.Duration(days=config["lookback_window"]) -# channel_filter = config.get("channel_filter", []) -# should_join_to_channels = config.get("join_channels") -# -# channels = Channels(authenticator=authenticator, join_channels=should_join_to_channels, channel_filter=channel_filter) -# streams = [ -# channels, -# ChannelMembers(authenticator=authenticator, channel_filter=channel_filter), -# ChannelMessages( -# parent=channels, -# authenticator=authenticator, -# default_start_date=default_start_date, -# end_date=end_date, -# channel_filter=channel_filter, -# ), -# Threads( -# authenticator=authenticator, -# default_start_date=default_start_date, -# end_date=end_date, -# lookback_window=threads_lookback_window, -# channel_filter=channel_filter, -# ), -# Users(authenticator=authenticator), -# ] -# -# return streams - class SourceSlack(YamlDeclarativeSource): def __init__(self): super().__init__(**{"path_to_yaml": "manifest.yaml"}) From 5c36ec94fb24eae40e57d39412c4b77ca8aab6db Mon Sep 17 00:00:00 2001 From: darynaishchenko Date: Tue, 19 Mar 2024 17:43:48 +0200 Subject: [PATCH 21/56] updated abnormal_state --- .../integration_tests/abnormal_state.json | 90 ++++++++++++++++++- 1 file changed, 86 insertions(+), 4 deletions(-) diff --git a/airbyte-integrations/connectors/source-slack/integration_tests/abnormal_state.json b/airbyte-integrations/connectors/source-slack/integration_tests/abnormal_state.json index d55652e4e69a1..6cc36537a8fb0 100644 --- a/airbyte-integrations/connectors/source-slack/integration_tests/abnormal_state.json +++ b/airbyte-integrations/connectors/source-slack/integration_tests/abnormal_state.json @@ -2,15 +2,97 @@ { "type": "STREAM", "stream": { - "stream_state": { "float_ts": 7270247822 }, - "stream_descriptor": { "name": "threads" } + "stream_descriptor": { + "name": "threads" + }, + "stream_state": { + "states": [ + { + "partition": { + "channel": "C04KX3KEZ54", + "ts": "1683104542.931169" + }, + "cursor": { + "float_ts": "2534945416" + } + }, + { + "partition": { + "channel": "C04LTCM2Y56", + "ts": "1683104499.808709" + }, + "cursor": { + "float_ts": "2534945416" + } + }, + { + "partition": { + "channel": "C04LTCM2Y56", + "ts": "1695880827.186049" + }, + "cursor": { + "float_ts": "2534945416" + } + }, + { + "partition": { + "channel": "C04LTCM2Y56", + "ts": "1695814864.744249" + }, + "cursor": { + "float_ts": "2534945416" + } + } + ] + } } }, { "type": "STREAM", "stream": { - "stream_state": { "float_ts": 7270247822 }, - "stream_descriptor": { "name": "channel_messages" } + "stream_descriptor": { + "name": "channel_messages" + }, + "stream_state": { + "states": [ + { + "partition": { + "channel_id": "C04LTCM2Y56", + "parent_slice": {} + }, + "cursor": { + "float_ts": "2534945416" + } + }, + { + "partition": { + "channel": "C04KX3KEZ54", + "parent_slice": {} + }, + "cursor": { + "float_ts": "2534945416" + } + }, + { + "partition": { + "channel": "C04L3M4PTJ6", + "parent_slice": {} + }, + "cursor": { + "float_ts": "2534945416" + } + }, + { + "partition": { + "channel": "C04LTCM2Y56", + "parent_slice": {} + }, + "cursor": { + "float_ts": "2534945416" + } + } + ] + } } } ] From a5b26ca82e63a62d64117d1da002820f382fcc5f Mon Sep 17 00:00:00 2001 From: darynaishchenko Date: Tue, 19 Mar 2024 19:32:51 +0200 Subject: [PATCH 22/56] added request param for channels stream --- .../connectors/source-slack/source_slack/manifest.yaml | 2 ++ .../source-slack/unit_tests/configs/legacy_config.json | 9 +-------- .../connectors/source-slack/unit_tests/conftest.py | 7 ++----- .../source-slack/unit_tests/test_components.py | 5 ----- .../connectors/source-slack/unit_tests/test_streams.py | 4 ---- 5 files changed, 5 insertions(+), 22 deletions(-) diff --git a/airbyte-integrations/connectors/source-slack/source_slack/manifest.yaml b/airbyte-integrations/connectors/source-slack/source_slack/manifest.yaml index b9606ac25097d..9bba30094febd 100644 --- a/airbyte-integrations/connectors/source-slack/source_slack/manifest.yaml +++ b/airbyte-integrations/connectors/source-slack/source_slack/manifest.yaml @@ -76,6 +76,8 @@ definitions: type: SimpleRetriever requester: $ref: "#/definitions/requester" + request_parameters: + types: "public_channel" record_selector: $ref: "#/definitions/selector" record_filter: diff --git a/airbyte-integrations/connectors/source-slack/unit_tests/configs/legacy_config.json b/airbyte-integrations/connectors/source-slack/unit_tests/configs/legacy_config.json index 99eda1d750b38..0d60705f99388 100644 --- a/airbyte-integrations/connectors/source-slack/unit_tests/configs/legacy_config.json +++ b/airbyte-integrations/connectors/source-slack/unit_tests/configs/legacy_config.json @@ -1,8 +1 @@ -{ - "start_date": "2021-07-22T20:00:00Z", - "end_date": "2021-07-23T20:00:00Z", - "lookback_window": 1, - "join_channels": true, - "channel_filter": ["airbyte-for-beginners", "good-reads"], - "api_token": "api-token" -} +{"start_date": "2021-07-22T20:00:00Z", "end_date": "2021-07-23T20:00:00Z", "lookback_window": 1, "join_channels": true, "channel_filter": ["airbyte-for-beginners", "good-reads"], "api_token": "api-token"} \ No newline at end of file diff --git a/airbyte-integrations/connectors/source-slack/unit_tests/conftest.py b/airbyte-integrations/connectors/source-slack/unit_tests/conftest.py index 52afa40f47852..94897b9c22f5d 100644 --- a/airbyte-integrations/connectors/source-slack/unit_tests/conftest.py +++ b/airbyte-integrations/connectors/source-slack/unit_tests/conftest.py @@ -18,11 +18,8 @@ def conversations_list(requests_mock): "https://slack.com/api/conversations.list?limit=1000&types=public_channel", json={ "channels": [ - {"name": "advice-data-architecture", "id": 1, "is_member": False}, - {"name": "advice-data-orchestration", "id": 2, "is_member": True}, - {"name": "airbyte-for-beginners", "id": 3, "is_member": False}, - {"name": "good-reads", "id": 4, "is_member": True}, - ] + {"id": "airbyte-for-beginners", "is_member": True}, + {"id": "good-reads", "is_member": True}] }, ) diff --git a/airbyte-integrations/connectors/source-slack/unit_tests/test_components.py b/airbyte-integrations/connectors/source-slack/unit_tests/test_components.py index b0fa819a0a6fc..5ca5b7ab85994 100644 --- a/airbyte-integrations/connectors/source-slack/unit_tests/test_components.py +++ b/airbyte-integrations/connectors/source-slack/unit_tests/test_components.py @@ -37,11 +37,6 @@ def test_channel_members_extractor(token_config): @freeze_time("2024-03-10T20:00:00Z", tz_offset=-2) def test_threads_partition_router(token_config, requests_mock): - requests_mock.get(url="https://slack.com/api/conversations.list?limit=1000", - json={"channels": [ - {"id": "airbyte-for-beginners", "is_member": True}, - {"id": "good-reads", "is_member": True} - ]}) start_date = "2024-03-01T20:00:00Z" end_date = pendulum.now() oldest, latest = int(pendulum.parse(start_date).timestamp()), int(end_date.timestamp()) diff --git a/airbyte-integrations/connectors/source-slack/unit_tests/test_streams.py b/airbyte-integrations/connectors/source-slack/unit_tests/test_streams.py index 5edd6d9a35b5a..afc46f42d663a 100644 --- a/airbyte-integrations/connectors/source-slack/unit_tests/test_streams.py +++ b/airbyte-integrations/connectors/source-slack/unit_tests/test_streams.py @@ -26,10 +26,6 @@ def get_stream_by_name(stream_name, config): @freeze_time("2024-03-10T20:00:00Z", tz_offset=-2) def test_threads_stream_slices(requests_mock, token_config): - requests_mock.get(url="https://slack.com/api/conversations.list?limit=1000", - json={"channels": [{"id": "airbyte-for-beginners", "is_member": True}, - {"id": "good-reads", "is_member": True}]}) - start_date = "2024-03-01T20:00:00Z" end_date = pendulum.now() oldest, latest = int(pendulum.parse(start_date).timestamp()), int(end_date.timestamp()) From 42a499fe4fa9262e24ed102bef12015d1c51b4e0 Mon Sep 17 00:00:00 2001 From: darynaishchenko Date: Tue, 19 Mar 2024 19:33:22 +0200 Subject: [PATCH 23/56] updated dependencies --- .../connectors/source-slack/poetry.lock | 54 +++++++++---------- .../connectors/source-slack/pyproject.toml | 2 +- 2 files changed, 28 insertions(+), 28 deletions(-) diff --git a/airbyte-integrations/connectors/source-slack/poetry.lock b/airbyte-integrations/connectors/source-slack/poetry.lock index d322b42f42a93..281d41b733434 100644 --- a/airbyte-integrations/connectors/source-slack/poetry.lock +++ b/airbyte-integrations/connectors/source-slack/poetry.lock @@ -104,13 +104,13 @@ files = [ [[package]] name = "cachetools" -version = "5.3.2" +version = "5.3.3" description = "Extensible memoizing collections and decorators" optional = false python-versions = ">=3.7" files = [ - {file = "cachetools-5.3.2-py3-none-any.whl", hash = "sha256:861f35a13a451f94e301ce2bec7cac63e881232ccce7ed67fab9b5df4d3beaa1"}, - {file = "cachetools-5.3.2.tar.gz", hash = "sha256:086ee420196f7b2ab9ca2db2520aca326318b68fe5ba8bc4d49cca91add450f2"}, + {file = "cachetools-5.3.3-py3-none-any.whl", hash = "sha256:0abad1021d3f8325b2fc1d2e9c8b9c9d57b04c3932657a72465447332c24d945"}, + {file = "cachetools-5.3.3.tar.gz", hash = "sha256:ba29e2dfa0b8b556606f097407ed1aa62080ee108ab0dc5ec9d6a723a007d105"}, ] [[package]] @@ -481,13 +481,13 @@ files = [ [[package]] name = "packaging" -version = "23.2" +version = "24.0" description = "Core utilities for Python packages" optional = false python-versions = ">=3.7" files = [ - {file = "packaging-23.2-py3-none-any.whl", hash = "sha256:8c491190033a9af7e1d931d0b5dacc2ef47509b34dd0de67ed209b5203fc88c7"}, - {file = "packaging-23.2.tar.gz", hash = "sha256:048fb0e9405036518eaaf48a55953c750c11e1a1b68e0dd1a9d62ed0c092cfc5"}, + {file = "packaging-24.0-py3-none-any.whl", hash = "sha256:2ddfb553fdf02fb784c234c7ba6ccc288296ceabec964ad2eae3777778130bc5"}, + {file = "packaging-24.0.tar.gz", hash = "sha256:eb82c5e3e56209074766e6885bb04b8c38a0c015d0a30036ebe7ece34c9989e9"}, ] [[package]] @@ -716,13 +716,13 @@ dev = ["pre-commit", "pytest-asyncio", "tox"] [[package]] name = "python-dateutil" -version = "2.8.2" +version = "2.9.0.post0" description = "Extensions to the standard Python datetime module" optional = false python-versions = "!=3.0.*,!=3.1.*,!=3.2.*,>=2.7" files = [ - {file = "python-dateutil-2.8.2.tar.gz", hash = "sha256:0123cacc1627ae19ddf3c27a5de5bd67ee4586fbdd6440d9748f8abb483d3e86"}, - {file = "python_dateutil-2.8.2-py2.py3-none-any.whl", hash = "sha256:961d03dc3453ebbc59dbdea9e4e11c5651520a876d0f4db161e8674aae935da9"}, + {file = "python-dateutil-2.9.0.post0.tar.gz", hash = "sha256:37dd54208da7e1cd875388217d5e00ebd4179249f90fb72437e91a35459a0ad3"}, + {file = "python_dateutil-2.9.0.post0-py2.py3-none-any.whl", hash = "sha256:a8b2bc7bffae282281c8140a97d3aa9c14da0b136dfe83f850eea9a5f7470427"}, ] [package.dependencies] @@ -822,13 +822,13 @@ use-chardet-on-py3 = ["chardet (>=3.0.2,<6)"] [[package]] name = "requests-cache" -version = "1.1.1" +version = "1.2.0" description = "A persistent cache for python requests" optional = false -python-versions = ">=3.7,<4.0" +python-versions = ">=3.8" files = [ - {file = "requests_cache-1.1.1-py3-none-any.whl", hash = "sha256:c8420cf096f3aafde13c374979c21844752e2694ffd8710e6764685bb577ac90"}, - {file = "requests_cache-1.1.1.tar.gz", hash = "sha256:764f93d3fa860be72125a568c2cc8eafb151cf29b4dc2515433a56ee657e1c60"}, + {file = "requests_cache-1.2.0-py3-none-any.whl", hash = "sha256:490324301bf0cb924ff4e6324bd2613453e7e1f847353928b08adb0fdfb7f722"}, + {file = "requests_cache-1.2.0.tar.gz", hash = "sha256:db1c709ca343cc1cd5b6c8b1a5387298eceed02306a6040760db538c885e3838"}, ] [package.dependencies] @@ -840,15 +840,15 @@ url-normalize = ">=1.4" urllib3 = ">=1.25.5" [package.extras] -all = ["boto3 (>=1.15)", "botocore (>=1.18)", "itsdangerous (>=2.0)", "pymongo (>=3)", "pyyaml (>=5.4)", "redis (>=3)", "ujson (>=5.4)"] +all = ["boto3 (>=1.15)", "botocore (>=1.18)", "itsdangerous (>=2.0)", "pymongo (>=3)", "pyyaml (>=6.0.1)", "redis (>=3)", "ujson (>=5.4)"] bson = ["bson (>=0.5)"] -docs = ["furo (>=2023.3,<2024.0)", "linkify-it-py (>=2.0,<3.0)", "myst-parser (>=1.0,<2.0)", "sphinx (>=5.0.2,<6.0.0)", "sphinx-autodoc-typehints (>=1.19)", "sphinx-automodapi (>=0.14)", "sphinx-copybutton (>=0.5)", "sphinx-design (>=0.2)", "sphinx-notfound-page (>=0.8)", "sphinxcontrib-apidoc (>=0.3)", "sphinxext-opengraph (>=0.6)"] +docs = ["furo (>=2023.3,<2024.0)", "linkify-it-py (>=2.0,<3.0)", "myst-parser (>=1.0,<2.0)", "sphinx (>=5.0.2,<6.0.0)", "sphinx-autodoc-typehints (>=1.19)", "sphinx-automodapi (>=0.14)", "sphinx-copybutton (>=0.5)", "sphinx-design (>=0.2)", "sphinx-notfound-page (>=0.8)", "sphinxcontrib-apidoc (>=0.3)", "sphinxext-opengraph (>=0.9)"] dynamodb = ["boto3 (>=1.15)", "botocore (>=1.18)"] json = ["ujson (>=5.4)"] mongodb = ["pymongo (>=3)"] redis = ["redis (>=3)"] security = ["itsdangerous (>=2.0)"] -yaml = ["pyyaml (>=5.4)"] +yaml = ["pyyaml (>=6.0.1)"] [[package]] name = "requests-mock" @@ -871,19 +871,19 @@ test = ["fixtures", "mock", "purl", "pytest", "requests-futures", "sphinx", "tes [[package]] name = "setuptools" -version = "69.1.0" +version = "69.2.0" description = "Easily download, build, install, upgrade, and uninstall Python packages" optional = false python-versions = ">=3.8" files = [ - {file = "setuptools-69.1.0-py3-none-any.whl", hash = "sha256:c054629b81b946d63a9c6e732bc8b2513a7c3ea645f11d0139a2191d735c60c6"}, - {file = "setuptools-69.1.0.tar.gz", hash = "sha256:850894c4195f09c4ed30dba56213bf7c3f21d86ed6bdaafb5df5972593bfc401"}, + {file = "setuptools-69.2.0-py3-none-any.whl", hash = "sha256:c21c49fb1042386df081cb5d86759792ab89efca84cf114889191cd09aacc80c"}, + {file = "setuptools-69.2.0.tar.gz", hash = "sha256:0ff4183f8f42cd8fa3acea16c45205521a4ef28f73c6391d8a25e92893134f2e"}, ] [package.extras] docs = ["furo", "jaraco.packaging (>=9.3)", "jaraco.tidelift (>=1.4)", "pygments-github-lexers (==0.0.5)", "rst.linker (>=1.9)", "sphinx (<7.2.5)", "sphinx (>=3.5)", "sphinx-favicon", "sphinx-inline-tabs", "sphinx-lint", "sphinx-notfound-page (>=1,<2)", "sphinx-reredirects", "sphinxcontrib-towncrier"] -testing = ["build[virtualenv]", "filelock (>=3.4.0)", "flake8-2020", "ini2toml[lite] (>=0.9)", "jaraco.develop (>=7.21)", "jaraco.envs (>=2.2)", "jaraco.path (>=3.2.0)", "pip (>=19.1)", "pytest (>=6)", "pytest-checkdocs (>=2.4)", "pytest-cov", "pytest-enabler (>=2.2)", "pytest-home (>=0.5)", "pytest-mypy (>=0.9.1)", "pytest-perf", "pytest-ruff (>=0.2.1)", "pytest-timeout", "pytest-xdist", "tomli-w (>=1.0.0)", "virtualenv (>=13.0.0)", "wheel"] -testing-integration = ["build[virtualenv] (>=1.0.3)", "filelock (>=3.4.0)", "jaraco.envs (>=2.2)", "jaraco.path (>=3.2.0)", "packaging (>=23.1)", "pytest", "pytest-enabler", "pytest-xdist", "tomli", "virtualenv (>=13.0.0)", "wheel"] +testing = ["build[virtualenv]", "filelock (>=3.4.0)", "importlib-metadata", "ini2toml[lite] (>=0.9)", "jaraco.develop (>=7.21)", "jaraco.envs (>=2.2)", "jaraco.path (>=3.2.0)", "mypy (==1.9)", "packaging (>=23.2)", "pip (>=19.1)", "pytest (>=6)", "pytest-checkdocs (>=2.4)", "pytest-cov", "pytest-enabler (>=2.2)", "pytest-home (>=0.5)", "pytest-mypy (>=0.9.1)", "pytest-perf", "pytest-ruff (>=0.2.1)", "pytest-timeout", "pytest-xdist (>=3)", "tomli", "tomli-w (>=1.0.0)", "virtualenv (>=13.0.0)", "wheel"] +testing-integration = ["build[virtualenv] (>=1.0.3)", "filelock (>=3.4.0)", "jaraco.envs (>=2.2)", "jaraco.path (>=3.2.0)", "packaging (>=23.2)", "pytest", "pytest-enabler", "pytest-xdist", "tomli", "virtualenv (>=13.0.0)", "wheel"] [[package]] name = "six" @@ -909,13 +909,13 @@ files = [ [[package]] name = "typing-extensions" -version = "4.9.0" +version = "4.10.0" description = "Backported and Experimental Type Hints for Python 3.8+" optional = false python-versions = ">=3.8" files = [ - {file = "typing_extensions-4.9.0-py3-none-any.whl", hash = "sha256:af72aea155e91adfc61c3ae9e0e342dbc0cba726d6cba4b6c72c1f34e47291cd"}, - {file = "typing_extensions-4.9.0.tar.gz", hash = "sha256:23478f88c37f27d76ac8aee6c905017a143b0b1b886c3c9f66bc2fd94f9f5783"}, + {file = "typing_extensions-4.10.0-py3-none-any.whl", hash = "sha256:69b1a937c3a517342112fb4c6df7e72fc39a38e7891a5730ed4985b5214b5475"}, + {file = "typing_extensions-4.10.0.tar.gz", hash = "sha256:b0abd7c89e8fb96f98db18d86106ff1d90ab692004eb746cf6eda2682f91b3cb"}, ] [[package]] @@ -934,13 +934,13 @@ six = "*" [[package]] name = "urllib3" -version = "2.2.0" +version = "2.2.1" description = "HTTP library with thread-safe connection pooling, file post, and more." optional = false python-versions = ">=3.8" files = [ - {file = "urllib3-2.2.0-py3-none-any.whl", hash = "sha256:ce3711610ddce217e6d113a2732fafad960a03fd0318c91faa79481e35c11224"}, - {file = "urllib3-2.2.0.tar.gz", hash = "sha256:051d961ad0c62a94e50ecf1af379c3aba230c66c710493493560c0c223c49f20"}, + {file = "urllib3-2.2.1-py3-none-any.whl", hash = "sha256:450b20ec296a467077128bff42b73080516e71b56ff59a60a02bef2232c4fa9d"}, + {file = "urllib3-2.2.1.tar.gz", hash = "sha256:d0570876c61ab9e520d776c38acbbb5b05a776d3f9ff98a5c8fd5162a444cf19"}, ] [package.extras] diff --git a/airbyte-integrations/connectors/source-slack/pyproject.toml b/airbyte-integrations/connectors/source-slack/pyproject.toml index 0896b798a3214..b5c3f0a2f3ec2 100644 --- a/airbyte-integrations/connectors/source-slack/pyproject.toml +++ b/airbyte-integrations/connectors/source-slack/pyproject.toml @@ -3,7 +3,7 @@ requires = [ "poetry-core>=1.0.0",] build-backend = "poetry.core.masonry.api" [tool.poetry] -version = "0.3.9" +version = "1.0.0" name = "source-slack" description = "Source implementation for Slack." authors = [ "Airbyte ",] From b5675086a2540c074958fc38952c6e088b81d479 Mon Sep 17 00:00:00 2001 From: darynaishchenko Date: Tue, 19 Mar 2024 19:33:50 +0200 Subject: [PATCH 24/56] bump version, added migration docs --- .../connectors/source-slack/metadata.yaml | 10 +++++++++- docs/integrations/sources/slack-migrations.md | 16 ++++++++++++++++ docs/integrations/sources/slack.md | 4 ++-- 3 files changed, 27 insertions(+), 3 deletions(-) create mode 100644 docs/integrations/sources/slack-migrations.md diff --git a/airbyte-integrations/connectors/source-slack/metadata.yaml b/airbyte-integrations/connectors/source-slack/metadata.yaml index 9a5ec30bad5e0..4e9ae423a5e40 100644 --- a/airbyte-integrations/connectors/source-slack/metadata.yaml +++ b/airbyte-integrations/connectors/source-slack/metadata.yaml @@ -10,7 +10,7 @@ data: connectorSubtype: api connectorType: source definitionId: c2281cee-86f9-4a86-bb48-d23286b4c7bd - dockerImageTag: 0.3.10 + dockerImageTag: 1.0.0 dockerRepository: airbyte/source-slack documentationUrl: https://docs.airbyte.com/integrations/sources/slack githubIssueLabel: source-slack @@ -27,6 +27,14 @@ data: oss: enabled: true releaseStage: generally_available + releases: + breakingChanges: + 1.0.0: + message: + The source slack connector is being migrated from the Python CDK to our declarative low-code CDK. + Due to changes in the handling of state format for incremental substreams, this migration constitutes a breaking change. + After updating, please reset your source before resuming syncs. For more information, see our migration documentation for source . + upgradeDeadline: "2024-04-19" # TODO: update this date before merge suggestedStreams: streams: - users diff --git a/docs/integrations/sources/slack-migrations.md b/docs/integrations/sources/slack-migrations.md new file mode 100644 index 0000000000000..dcd73293228cd --- /dev/null +++ b/docs/integrations/sources/slack-migrations.md @@ -0,0 +1,16 @@ +## Upgrading to 1.0.0 + +We're continuously striving to enhance the quality and reliability of our connectors at Airbyte. +As part of our commitment to delivering exceptional service, we are transitioning source slack from the +Python Connector Development Kit (CDK) to our innovative low-code framework. +This is part of a strategic move to streamline many processes across connectors, bolstering maintainability and +freeing us to focus more of our efforts on improving the performance and features of our evolving platform and growing catalog. +However, due to differences between the Python and low-code CDKs, this migration constitutes a breaking change. + +We’ve evolved and standardized how state is managed for incremental streams that are nested within a parent stream. +This change impacts how individual states are tracked and stored for each partition, using a more structured approach +to ensure the most granular and flexible state management. +This change will affect the `Threads` and `Channel Messages` streams. + +## Migration Steps +* The `reset` for`Threads` and `Channel Messages` streams is required after upgrading to this version. diff --git a/docs/integrations/sources/slack.md b/docs/integrations/sources/slack.md index 86ba2b2ac1d7b..54d2f7a4c15b1 100644 --- a/docs/integrations/sources/slack.md +++ b/docs/integrations/sources/slack.md @@ -163,10 +163,10 @@ Slack has [rate limit restrictions](https://api.slack.com/docs/rate-limits). | Version | Date | Pull Request | Subject | |:--------|:-----------|:---------------------------------------------------------|:-------------------------------------------------------------------------------------| -| 0.3.10 | 2024-02-27 | [35477](https://github.com/airbytehq/airbyte/pull/35477) | Migration to low code | +| 1.0.0 | 2024-03-19 | [35477](https://github.com/airbytehq/airbyte/pull/35477) | Migration to low code | | 0.3.9 | 2024-02-12 | [35157](https://github.com/airbytehq/airbyte/pull/35157) | Manage dependencies with Poetry. | | 0.3.8 | 2024-02-09 | [35131](https://github.com/airbytehq/airbyte/pull/35131) | Fixed the issue when `schema discovery` fails with `502` due to the platform timeout | -| 0.3.7 | 2024-01-10 | [1234](https://github.com/airbytehq/airbyte/pull/1234) | prepare for airbyte-lib | +| 0.3.7 | 2024-01-10 | [1234](https://github.com/airbytehq/airbyte/pull/1234) | prepare for airbyte-lib | | 0.3.6 | 2023-11-21 | [32707](https://github.com/airbytehq/airbyte/pull/32707) | Threads: do not use client-side record filtering | | 0.3.5 | 2023-10-19 | [31599](https://github.com/airbytehq/airbyte/pull/31599) | Base image migration: remove Dockerfile and use the python-connector-base image | | 0.3.4 | 2023-10-06 | [31134](https://github.com/airbytehq/airbyte/pull/31134) | Update CDK and remove non iterable return from records | From af8d4e43c08291b32c1cebb46aea5072469b6974 Mon Sep 17 00:00:00 2001 From: darynaishchenko Date: Tue, 19 Mar 2024 19:35:29 +0200 Subject: [PATCH 25/56] format fix --- .../connectors/source-slack/metadata.yaml | 4 ++-- .../source-slack/unit_tests/configs/legacy_config.json | 9 ++++++++- 2 files changed, 10 insertions(+), 3 deletions(-) diff --git a/airbyte-integrations/connectors/source-slack/metadata.yaml b/airbyte-integrations/connectors/source-slack/metadata.yaml index 4e9ae423a5e40..d6a2e4352e512 100644 --- a/airbyte-integrations/connectors/source-slack/metadata.yaml +++ b/airbyte-integrations/connectors/source-slack/metadata.yaml @@ -32,9 +32,9 @@ data: 1.0.0: message: The source slack connector is being migrated from the Python CDK to our declarative low-code CDK. - Due to changes in the handling of state format for incremental substreams, this migration constitutes a breaking change. + Due to changes in the handling of state format for incremental substreams, this migration constitutes a breaking change. After updating, please reset your source before resuming syncs. For more information, see our migration documentation for source . - upgradeDeadline: "2024-04-19" # TODO: update this date before merge + upgradeDeadline: "2024-04-19" # TODO: update this date before merge suggestedStreams: streams: - users diff --git a/airbyte-integrations/connectors/source-slack/unit_tests/configs/legacy_config.json b/airbyte-integrations/connectors/source-slack/unit_tests/configs/legacy_config.json index 0d60705f99388..99eda1d750b38 100644 --- a/airbyte-integrations/connectors/source-slack/unit_tests/configs/legacy_config.json +++ b/airbyte-integrations/connectors/source-slack/unit_tests/configs/legacy_config.json @@ -1 +1,8 @@ -{"start_date": "2021-07-22T20:00:00Z", "end_date": "2021-07-23T20:00:00Z", "lookback_window": 1, "join_channels": true, "channel_filter": ["airbyte-for-beginners", "good-reads"], "api_token": "api-token"} \ No newline at end of file +{ + "start_date": "2021-07-22T20:00:00Z", + "end_date": "2021-07-23T20:00:00Z", + "lookback_window": 1, + "join_channels": true, + "channel_filter": ["airbyte-for-beginners", "good-reads"], + "api_token": "api-token" +} From 3d571a3cd778b60e6365cb3e515b158b26a8e887 Mon Sep 17 00:00:00 2001 From: darynaishchenko Date: Wed, 20 Mar 2024 13:50:59 +0200 Subject: [PATCH 26/56] updated to latesl cdk --- .../connectors/source-slack/poetry.lock | 12 ++++++------ .../connectors/source-slack/pyproject.toml | 2 +- 2 files changed, 7 insertions(+), 7 deletions(-) diff --git a/airbyte-integrations/connectors/source-slack/poetry.lock b/airbyte-integrations/connectors/source-slack/poetry.lock index 281d41b733434..b04e98c831528 100644 --- a/airbyte-integrations/connectors/source-slack/poetry.lock +++ b/airbyte-integrations/connectors/source-slack/poetry.lock @@ -2,13 +2,13 @@ [[package]] name = "airbyte-cdk" -version = "0.58.7" +version = "0.72.1" description = "A framework for writing Airbyte Connectors." optional = false python-versions = ">=3.8" files = [ - {file = "airbyte-cdk-0.58.7.tar.gz", hash = "sha256:00e379e2379b38683992027114a2190f49befec8cbac67d0a2c907786111e77b"}, - {file = "airbyte_cdk-0.58.7-py3-none-any.whl", hash = "sha256:09b31d32899cc6dc91e39716e8d1601503a7884d837752e683d1e3ef7dfe73be"}, + {file = "airbyte-cdk-0.72.1.tar.gz", hash = "sha256:1dbd0a11f3784cfdd5afa9f40315c9a6123e803be91f9f861642a78e7ee14cd9"}, + {file = "airbyte_cdk-0.72.1-py3-none-any.whl", hash = "sha256:849077805442286de99f589ecba4be82491a3d9d3f516ce1a8b0cbaf303db9a4"}, ] [package.dependencies] @@ -32,8 +32,8 @@ requests-cache = "*" wcmatch = "8.4" [package.extras] -dev = ["avro (>=1.11.2,<1.12.0)", "cohere (==4.21)", "fastavro (>=1.8.0,<1.9.0)", "freezegun", "langchain (==0.0.271)", "markdown", "mypy", "openai[embeddings] (==0.27.9)", "pandas (==2.0.3)", "pdf2image (==1.16.3)", "pdfminer.six (==20221105)", "pyarrow (==12.0.1)", "pytesseract (==0.3.10)", "pytest", "pytest-cov", "pytest-httpserver", "pytest-mock", "requests-mock", "tiktoken (==0.4.0)", "unstructured (==0.10.27)", "unstructured.pytesseract (>=0.3.12)", "unstructured[docx,pptx] (==0.10.27)"] -file-based = ["avro (>=1.11.2,<1.12.0)", "fastavro (>=1.8.0,<1.9.0)", "markdown", "pdf2image (==1.16.3)", "pdfminer.six (==20221105)", "pyarrow (==12.0.1)", "pytesseract (==0.3.10)", "unstructured (==0.10.27)", "unstructured.pytesseract (>=0.3.12)", "unstructured[docx,pptx] (==0.10.27)"] +dev = ["avro (>=1.11.2,<1.12.0)", "cohere (==4.21)", "fastavro (>=1.8.0,<1.9.0)", "freezegun", "langchain (==0.0.271)", "markdown", "mypy", "openai[embeddings] (==0.27.9)", "pandas (==2.0.3)", "pdf2image (==1.16.3)", "pdfminer.six (==20221105)", "pyarrow (>=15.0.0,<15.1.0)", "pytesseract (==0.3.10)", "pytest", "pytest-cov", "pytest-httpserver", "pytest-mock", "requests-mock", "tiktoken (==0.4.0)", "unstructured (==0.10.27)", "unstructured.pytesseract (>=0.3.12)", "unstructured[docx,pptx] (==0.10.27)"] +file-based = ["avro (>=1.11.2,<1.12.0)", "fastavro (>=1.8.0,<1.9.0)", "markdown", "pdf2image (==1.16.3)", "pdfminer.six (==20221105)", "pyarrow (>=15.0.0,<15.1.0)", "pytesseract (==0.3.10)", "unstructured (==0.10.27)", "unstructured.pytesseract (>=0.3.12)", "unstructured[docx,pptx] (==0.10.27)"] sphinx-docs = ["Sphinx (>=4.2,<5.0)", "sphinx-rtd-theme (>=1.0,<2.0)"] vector-db-based = ["cohere (==4.21)", "langchain (==0.0.271)", "openai[embeddings] (==0.27.9)", "tiktoken (==0.4.0)"] @@ -1045,4 +1045,4 @@ files = [ [metadata] lock-version = "2.0" python-versions = "^3.9,<3.12" -content-hash = "a4b30965edb8628fc7d183ca5ee4c41956917426b273f3fc2b61697d97697e86" +content-hash = "59138844bec5f4f46b8a260d963d206e9881f8580ecdbeb4329d266ec0071a75" diff --git a/airbyte-integrations/connectors/source-slack/pyproject.toml b/airbyte-integrations/connectors/source-slack/pyproject.toml index b5c3f0a2f3ec2..aca63d06159f2 100644 --- a/airbyte-integrations/connectors/source-slack/pyproject.toml +++ b/airbyte-integrations/connectors/source-slack/pyproject.toml @@ -18,7 +18,7 @@ include = "source_slack" [tool.poetry.dependencies] python = "^3.9,<3.12" pendulum = "==2.1.2" -airbyte-cdk = "==0.58.7" +airbyte-cdk = "^0" freezegun = "^1.4.0" [tool.poetry.scripts] From 5cebd2dc6e86cd694aa212467699da0110cad110 Mon Sep 17 00:00:00 2001 From: darynaishchenko Date: Wed, 20 Mar 2024 13:52:04 +0200 Subject: [PATCH 27/56] updated stream slices in custom partition router --- .../components/threads_partition_router.py | 31 ++++++------------- .../source-slack/source_slack/manifest.yaml | 4 +-- .../source-slack/unit_tests/test_streams.py | 9 +++--- 3 files changed, 15 insertions(+), 29 deletions(-) diff --git a/airbyte-integrations/connectors/source-slack/source_slack/components/threads_partition_router.py b/airbyte-integrations/connectors/source-slack/source_slack/components/threads_partition_router.py index b0581988f3d82..cc80c42ecff49 100644 --- a/airbyte-integrations/connectors/source-slack/source_slack/components/threads_partition_router.py +++ b/airbyte-integrations/connectors/source-slack/source_slack/components/threads_partition_router.py @@ -1,13 +1,12 @@ # Copyright (c) 2023 Airbyte, Inc., all rights reserved. from dataclasses import dataclass -from typing import Any, Iterable, Mapping, Optional +from typing import Iterable import dpath.util -import pendulum from airbyte_cdk.models import AirbyteMessage, SyncMode, Type from airbyte_cdk.sources.declarative.partition_routers import SubstreamPartitionRouter -from airbyte_cdk.sources.declarative.types import Config, Record, StreamSlice, StreamState +from airbyte_cdk.sources.declarative.types import Record, StreamSlice @dataclass @@ -17,32 +16,18 @@ class ThreadsPartitionRouter(SubstreamPartitionRouter): """ def stream_slices(self) -> Iterable[StreamSlice]: - """ - Iterate over each parent stream's record and create a StreamSlice for each record. - - For each stream, iterate over its stream_slices. - For each stream slice, iterate over each record. - yield a stream slice for each such records. - - If a parent slice contains no record, emit a slice with parent_record=None. - - The template string can interpolate the following values: - - parent_stream_slice: mapping representing the parent's stream slice - - parent_record: mapping representing the parent record - - parent_stream_name: string representing the parent stream name - """ if not self.parent_stream_configs: yield from [] else: for parent_stream_config in self.parent_stream_configs: parent_stream = parent_stream_config.stream - parent_field = parent_stream_config.parent_key.eval(self.config) - stream_state_field = parent_stream_config.partition_field.eval(self.config) + parent_field = parent_stream_config.parent_key.eval(self.config) # type: ignore # parent_key is always casted to an interpolated string + partition_field = parent_stream_config.partition_field.eval(self.config) # type: ignore # partition_field is always casted to an interpolated string for parent_stream_slice in parent_stream.stream_slices( sync_mode=SyncMode.full_refresh, cursor_field=None, stream_state=None ): empty_parent_slice = True - parent_slice = parent_stream_slice + parent_partition = parent_stream_slice.partition if parent_stream_slice else {} for parent_record in parent_stream.read_records( sync_mode=SyncMode.full_refresh, cursor_field=None, stream_slice=parent_stream_slice, stream_state=None @@ -56,12 +41,14 @@ def stream_slices(self) -> Iterable[StreamSlice]: elif isinstance(parent_record, Record): parent_record = parent_record.data try: - stream_state_value = dpath.util.get(parent_record, parent_field) + partition_value = dpath.util.get(parent_record, parent_field) except KeyError: pass else: empty_parent_slice = False - yield {stream_state_field: stream_state_value, "channel": parent_slice["channel"]} + yield StreamSlice( + partition={partition_field: partition_value, "channel": parent_partition["channel"]}, cursor_slice={} + ) # If the parent slice contains no records, if empty_parent_slice: yield from [] diff --git a/airbyte-integrations/connectors/source-slack/source_slack/manifest.yaml b/airbyte-integrations/connectors/source-slack/source_slack/manifest.yaml index 9bba30094febd..b4d5e9cfdcc01 100644 --- a/airbyte-integrations/connectors/source-slack/source_slack/manifest.yaml +++ b/airbyte-integrations/connectors/source-slack/source_slack/manifest.yaml @@ -279,7 +279,7 @@ definitions: cursor_field: "float_ts" cursor_datetime_formats: - "%s" - datetime_format: "%Y-%m-%dT%H:%M:%SZ" + datetime_format: "%s" start_datetime: type: MinMaxDatetime datetime: "{{ config['start_date'] }}" @@ -289,7 +289,7 @@ definitions: fields: - path: - float_ts - value: "{{ record['ts'] }}" + value: "{{ record['ts']|int }}" - path: - channel_id value: "{{ stream_slice['channel'] }}" diff --git a/airbyte-integrations/connectors/source-slack/unit_tests/test_streams.py b/airbyte-integrations/connectors/source-slack/unit_tests/test_streams.py index afc46f42d663a..7210c2dfb8c52 100644 --- a/airbyte-integrations/connectors/source-slack/unit_tests/test_streams.py +++ b/airbyte-integrations/connectors/source-slack/unit_tests/test_streams.py @@ -42,10 +42,9 @@ def test_threads_stream_slices(requests_mock, token_config): threads_stream = get_stream_by_name("threads", token_config) slices = threads_stream.stream_slices(stream_state=None, sync_mode=SyncMode.full_refresh) - expected = [ - {'ts': 1710093600, 'channel': 'airbyte-for-beginners', 'start_time': '2024-02-29T20:00:00Z', 'end_time': '2024-03-10T18:00:00Z'}, - {'ts': 1709323200, 'channel': 'airbyte-for-beginners', 'start_time': '2024-02-29T20:00:00Z', 'end_time': '2024-03-10T18:00:00Z'}, - {'ts': 1710093600, 'channel': 'good-reads', 'start_time': '2024-02-29T20:00:00Z', 'end_time': '2024-03-10T18:00:00Z'}, - {'ts': 1709323200, 'channel': 'good-reads', 'start_time': '2024-02-29T20:00:00Z', 'end_time': '2024-03-10T18:00:00Z'}] + expected = [{"ts": 1710093600, "channel": "airbyte-for-beginners", "start_time": "1709236800", "end_time": "1710093600"}, + {"ts": 1709323200, "channel": "airbyte-for-beginners", "start_time": "1709236800", "end_time": "1710093600"}, + {"ts": 1710093600, "channel": "good-reads", "start_time": "1709236800", "end_time": "1710093600"}, + {"ts": 1709323200, "channel": "good-reads", "start_time": "1709236800", "end_time": "1710093600"}] assert list(slices) == expected From a898a701c3401149074b98512fed5ab0e10dffc0 Mon Sep 17 00:00:00 2001 From: darynaishchenko Date: Wed, 20 Mar 2024 14:16:56 +0200 Subject: [PATCH 28/56] added header for migration guide --- docs/integrations/sources/slack-migrations.md | 2 ++ 1 file changed, 2 insertions(+) diff --git a/docs/integrations/sources/slack-migrations.md b/docs/integrations/sources/slack-migrations.md index dcd73293228cd..45907d148bd44 100644 --- a/docs/integrations/sources/slack-migrations.md +++ b/docs/integrations/sources/slack-migrations.md @@ -1,3 +1,5 @@ +# Slack Migration Guide + ## Upgrading to 1.0.0 We're continuously striving to enhance the quality and reliability of our connectors at Airbyte. From 0c24c5fa521d8befcea5d5bdc0338802b0fce0d8 Mon Sep 17 00:00:00 2001 From: darynaishchenko Date: Wed, 20 Mar 2024 14:18:12 +0200 Subject: [PATCH 29/56] updated tags --- airbyte-integrations/connectors/source-slack/metadata.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/airbyte-integrations/connectors/source-slack/metadata.yaml b/airbyte-integrations/connectors/source-slack/metadata.yaml index edcf1e1f58e92..cce96feb08546 100644 --- a/airbyte-integrations/connectors/source-slack/metadata.yaml +++ b/airbyte-integrations/connectors/source-slack/metadata.yaml @@ -45,5 +45,5 @@ data: supportLevel: certified tags: - language:python - - cdk:python + - cdk:low-code metadataSpecVersion: "1.0" From dfd3eacdf57ca31345a4e5ae8b52691384c71b20 Mon Sep 17 00:00:00 2001 From: darynaishchenko Date: Thu, 21 Mar 2024 13:25:33 +0200 Subject: [PATCH 30/56] moved join channels logic to custom retriever for channels stream --- .../source_slack/components/join_channels.py | 79 ++++++++++++------ .../source-slack/source_slack/manifest.yaml | 40 +++++----- .../unit_tests/test_components.py | 80 +++++++++++++------ 3 files changed, 130 insertions(+), 69 deletions(-) diff --git a/airbyte-integrations/connectors/source-slack/source_slack/components/join_channels.py b/airbyte-integrations/connectors/source-slack/source_slack/components/join_channels.py index d4d3e1b310b96..e9243f58b4025 100644 --- a/airbyte-integrations/connectors/source-slack/source_slack/components/join_channels.py +++ b/airbyte-integrations/connectors/source-slack/source_slack/components/join_channels.py @@ -1,13 +1,15 @@ # Copyright (c) 2023 Airbyte, Inc., all rights reserved. import logging -from dataclasses import dataclass +from functools import partial from typing import Any, Iterable, List, Mapping, Optional import requests from airbyte_cdk.models import SyncMode -from airbyte_cdk.sources.declarative.transformations import RecordTransformation -from airbyte_cdk.sources.declarative.types import Config, Record, StreamSlice, StreamState +from airbyte_cdk.sources.declarative.partition_routers import SinglePartitionRouter +from airbyte_cdk.sources.declarative.retrievers import SimpleRetriever +from airbyte_cdk.sources.declarative.types import Record, StreamSlice +from airbyte_cdk.sources.streams.core import StreamData from airbyte_cdk.sources.streams.http import HttpStream from airbyte_cdk.sources.streams.http.auth import TokenAuthenticator @@ -36,8 +38,12 @@ def parse_response(self, response: requests.Response, stream_slice: Mapping[str, Override to simply indicate that the specific channel was joined successfully. This method should not return any data, but should return an empty iterable. """ - self.logger.info(f"Successfully joined channel: {stream_slice['channel_name']}") - yield response.json()["channel"] + is_ok = response.json()["ok"] + if is_ok: + self.logger.info(f"Successfully joined channel: {stream_slice['channel_name']}") + else: + self.logger.info(f"Unable to joined channel: {stream_slice['channel_name']}. Reason: {response.json()}") + return [] def request_body_json(self, stream_slice: Mapping = None, **kwargs) -> Optional[Mapping]: return {"channel": stream_slice["channel"]} @@ -49,12 +55,11 @@ def next_page_token(self, response: requests.Response) -> Optional[Mapping[str, return None -@dataclass -class JoinChannels(RecordTransformation): - """ - Make 'conversations.join' POST request for every found channel id - if we are not still a member of such channel - """ +class ChannelsRetriever(SimpleRetriever): + def __post_init__(self, parameters: Mapping[str, Any]): + super().__post_init__(parameters) + self.stream_slicer = SinglePartitionRouter(parameters={}) + self.record_selector.transformations = [] def should_join_to_channel(self, config: Mapping[str, Any], record: Record) -> bool: """ @@ -75,19 +80,43 @@ def join_channels_stream(self, config) -> JoinChannelsStream: channel_filter = config["channel_filter"] return JoinChannelsStream(authenticator=authenticator, channel_filter=channel_filter) - def transform( + def join_channel(self, config: Mapping[str, Any], record: Mapping[str, Any]): + list( + self.join_channels_stream(config).read_records( + sync_mode=SyncMode.full_refresh, + stream_slice=self.make_join_channel_slice(record), + ) + ) + + def read_records( self, - record: Record, - config: Optional[Config] = None, - stream_state: Optional[StreamState] = None, + records_schema: Mapping[str, Any], stream_slice: Optional[StreamSlice] = None, - ) -> Mapping[str, Any]: - if self.should_join_to_channel(config, record): - channel = list( - self.join_channels_stream(config).read_records( - sync_mode=SyncMode.full_refresh, - stream_slice=self.make_join_channel_slice(record), - ) - ) - return channel[0] - return record + ) -> Iterable[StreamData]: + _slice = stream_slice or StreamSlice(partition={}, cursor_slice={}) # None-check + + self._paginator.reset() + + most_recent_record_from_slice = None + record_generator = partial( + self._parse_records, + stream_state=self.state or {}, + stream_slice=_slice, + records_schema=records_schema, + ) + + for stream_data in self._read_pages(record_generator, self.state, _slice): + current_record = self._extract_record(stream_data, _slice) + if self.cursor and current_record: + self.cursor.observe(_slice, current_record) + + most_recent_record_from_slice = self._get_most_recent_record(most_recent_record_from_slice, current_record, _slice) + # joining channel logic + if self.should_join_to_channel(self.config, stream_data): + self.join_channel(self.config, stream_data) + + yield stream_data + + if self.cursor: + self.cursor.close_slice(_slice, most_recent_record_from_slice) + return diff --git a/airbyte-integrations/connectors/source-slack/source_slack/manifest.yaml b/airbyte-integrations/connectors/source-slack/source_slack/manifest.yaml index b4d5e9cfdcc01..ee0c8a707f827 100644 --- a/airbyte-integrations/connectors/source-slack/source_slack/manifest.yaml +++ b/airbyte-integrations/connectors/source-slack/source_slack/manifest.yaml @@ -72,21 +72,6 @@ definitions: $ref: "#/definitions/default_paginator" partition_router: [] - retriever_filter: - type: SimpleRetriever - requester: - $ref: "#/definitions/requester" - request_parameters: - types: "public_channel" - record_selector: - $ref: "#/definitions/selector" - record_filter: - type: RecordFilter - condition: "{{ record.id in config.channel_filter or not config.channel_filter }}" - paginator: - $ref: "#/definitions/default_paginator" - partition_router: [] - stream_base: primary_key: "id" retriever: @@ -125,16 +110,31 @@ definitions: field_path: members channels_stream: - $ref: "#/definitions/stream_base" + primary_key: "id" $parameters: name: channels path: conversations.list field_path: channels + schema_loader: + $ref: "#/definitions/schema_loader" retriever: - $ref: "#/definitions/retriever_filter" - transformations: - - type: CustomTransformation - class_name: "source_slack.components.join_channels.JoinChannels" + class_name: "source_slack.components.join_channels.ChannelsRetriever" + requester: + $ref: "#/definitions/requester" + request_parameters: + types: "public_channel" + record_selector: + $ref: "#/definitions/selector" + record_filter: + type: RecordFilter + condition: "{{ record.id in config.channel_filter or not config.channel_filter }}" + $parameters: + transformations: [[]] + paginator: + $ref: "#/definitions/default_paginator" + $parameters: + url_base: https://slack.com/api/ + partition_router: [] channels_partition_router: type: SubstreamPartitionRouter diff --git a/airbyte-integrations/connectors/source-slack/unit_tests/test_components.py b/airbyte-integrations/connectors/source-slack/unit_tests/test_components.py index 5ca5b7ab85994..f376fc3a2498f 100644 --- a/airbyte-integrations/connectors/source-slack/unit_tests/test_components.py +++ b/airbyte-integrations/connectors/source-slack/unit_tests/test_components.py @@ -3,14 +3,16 @@ from unittest.mock import MagicMock import pendulum +import pytest +from airbyte_cdk.sources.declarative.extractors import DpathExtractor, RecordSelector from airbyte_cdk.sources.declarative.partition_routers.substream_partition_router import ParentStreamConfig -from airbyte_cdk.sources.declarative.requesters import RequestOption +from airbyte_cdk.sources.declarative.requesters import HttpRequester, RequestOption from airbyte_cdk.sources.streams.http.auth import TokenAuthenticator from airbyte_protocol.models import SyncMode from freezegun import freeze_time from source_slack import SourceSlack from source_slack.components.channel_members_extractor import ChannelMembersExtractor -from source_slack.components.join_channels import JoinChannels, JoinChannelsStream +from source_slack.components.join_channels import ChannelsRetriever, JoinChannelsStream from source_slack.components.threads_partition_router import ThreadsPartitionRouter @@ -29,10 +31,10 @@ def test_channel_members_extractor(token_config): "U061F7AUR", "W012A3CDE" ]} - records = ChannelMembersExtractor(config=token_config, parameters={}, field_path=['members']).extract_records(response=response_mock) - assert records == [{'member_id': 'U023BECGF'}, - {'member_id': 'U061F7AUR'}, - {'member_id': 'W012A3CDE'}] + records = ChannelMembersExtractor(config=token_config, parameters={}, field_path=["members"]).extract_records(response=response_mock) + assert records == [{"member_id": "U023BECGF"}, + {"member_id": "U061F7AUR"}, + {"member_id": "W012A3CDE"}] @freeze_time("2024-03-10T20:00:00Z", tz_offset=-2) @@ -73,36 +75,66 @@ def test_threads_partition_router(token_config, requests_mock): def test_join_channels(token_config, requests_mock, joined_channel): - requests_mock.post( + mocked_request = requests_mock.post( url="https://slack.com/api/conversations.join", - json={"channel": joined_channel} + json={"ok": True, "channel": joined_channel} ) token = token_config["credentials"]["api_token"] authenticator = TokenAuthenticator(token) channel_filter = token_config["channel_filter"] stream = JoinChannelsStream(authenticator=authenticator, channel_filter=channel_filter) - records = list(stream.read_records(sync_mode=SyncMode.full_refresh, stream_slice={"channel": "C061EG9SL", "channel_name": "general"})) - assert records[0] == joined_channel + records = stream.read_records(sync_mode=SyncMode.full_refresh, stream_slice={"channel": "C061EG9SL", "channel_name": "general"}) + assert not list(records) + assert mocked_request.called -def test_join_channels_should_join_to_channel(token_config): - transformation = JoinChannels() - assert transformation.should_join_to_channel(token_config, {"is_member": False}) is True - assert transformation.should_join_to_channel(token_config, {"is_member": True}) is False +def get_channels_retriever_instance(token_config): + return ChannelsRetriever( + config=token_config, + requester=HttpRequester(name="channels", path="conversations.list", url_base="https://slack.com/api/", config=token_config, + parameters={}), + record_selector=RecordSelector( + extractor=DpathExtractor(field_path=["channels"], config=token_config, parameters={}), + config=token_config, parameters={}, + schema_normalization=None), + parameters={} + ) -def test_join_channels_make_join_channel_slice(token_config): - transformation = JoinChannels() - assert transformation.make_join_channel_slice({"id": "C061EG9SL", "name": "general"}) == {"channel": "C061EG9SL", - "channel_name": "general"} +def test_join_channels_should_join_to_channel(token_config): + retriever = get_channels_retriever_instance(token_config) + assert retriever.should_join_to_channel(token_config, {"is_member": False}) is True + assert retriever.should_join_to_channel(token_config, {"is_member": True}) is False -def test_join_channel_transformation(requests_mock, token_config, joined_channel): - requests_mock.post( +def test_join_channels_make_join_channel_slice(token_config): + retriever = get_channels_retriever_instance(token_config) + expected_slice = {"channel": "C061EG9SL", "channel_name": "general"} + assert retriever.make_join_channel_slice({"id": "C061EG9SL", "name": "general"}) == expected_slice + + +@pytest.mark.parametrize( + "join_response, log_message", + ( + ({"ok": True, "channel": {"is_member": True, "id": "channel 2", "name": "test channel"}}, "Successfully joined channel: test channel"), + ({"ok": False, "error": "missing_scope", "needed": "channels:write"}, + "Unable to joined channel: test channel. Reason: {'ok': False, 'error': " "'missing_scope', 'needed': 'channels:write'}"), + ), + ids=["successful_join_to_channel", "failed_join_to_channel"] +) +def test_join_channel_read(requests_mock, token_config, joined_channel, caplog, join_response, log_message): + mocked_request = requests_mock.post( url="https://slack.com/api/conversations.join", - json={"channel": joined_channel} + json=join_response ) + requests_mock.get( + url="https://slack.com/api/conversations.list", + json={"channels": [{"is_member": True, "id": "channel 1"}, {"is_member": False, "id": "channel 2", "name": "test channel"}]} + ) + + retriever = get_channels_retriever_instance(token_config) + assert len(list(retriever.read_records(records_schema={}))) == 2 + assert mocked_request.called + assert mocked_request.last_request._request.body == b'{"channel": "channel 2"}' + assert log_message in caplog.text - transformation = JoinChannels() - assert transformation.transform(config=token_config, record={"is_member": True}) == {"is_member": True} - assert transformation.transform(config=token_config, record={"is_member": False}) == joined_channel From 42f8309a17035c23da4c323e3da4e61f0cf636a1 Mon Sep 17 00:00:00 2001 From: darynaishchenko Date: Thu, 21 Mar 2024 18:09:33 +0200 Subject: [PATCH 31/56] refactor code --- .../source_slack/components/join_channels.py | 15 ++-- .../components/threads_partition_router.py | 74 +++++++++++++------ .../source-slack/source_slack/manifest.yaml | 10 +-- 3 files changed, 65 insertions(+), 34 deletions(-) diff --git a/airbyte-integrations/connectors/source-slack/source_slack/components/join_channels.py b/airbyte-integrations/connectors/source-slack/source_slack/components/join_channels.py index e9243f58b4025..e7f33851784c2 100644 --- a/airbyte-integrations/connectors/source-slack/source_slack/components/join_channels.py +++ b/airbyte-integrations/connectors/source-slack/source_slack/components/join_channels.py @@ -38,7 +38,7 @@ def parse_response(self, response: requests.Response, stream_slice: Mapping[str, Override to simply indicate that the specific channel was joined successfully. This method should not return any data, but should return an empty iterable. """ - is_ok = response.json()["ok"] + is_ok = response.json().get("ok", False) if is_ok: self.logger.info(f"Successfully joined channel: {stream_slice['channel_name']}") else: @@ -46,7 +46,8 @@ def parse_response(self, response: requests.Response, stream_slice: Mapping[str, return [] def request_body_json(self, stream_slice: Mapping = None, **kwargs) -> Optional[Mapping]: - return {"channel": stream_slice["channel"]} + if stream_slice: + return {"channel": stream_slice.get("channel")} def next_page_token(self, response: requests.Response) -> Optional[Mapping[str, Any]]: """ @@ -106,17 +107,17 @@ def read_records( ) for stream_data in self._read_pages(record_generator, self.state, _slice): + # joining channel logic + if self.should_join_to_channel(self.config, stream_data): + self.join_channel(self.config, stream_data) + current_record = self._extract_record(stream_data, _slice) if self.cursor and current_record: self.cursor.observe(_slice, current_record) most_recent_record_from_slice = self._get_most_recent_record(most_recent_record_from_slice, current_record, _slice) - # joining channel logic - if self.should_join_to_channel(self.config, stream_data): - self.join_channel(self.config, stream_data) - yield stream_data if self.cursor: - self.cursor.close_slice(_slice, most_recent_record_from_slice) + self.cursor.observe(_slice, most_recent_record_from_slice) return diff --git a/airbyte-integrations/connectors/source-slack/source_slack/components/threads_partition_router.py b/airbyte-integrations/connectors/source-slack/source_slack/components/threads_partition_router.py index cc80c42ecff49..d187578ced242 100644 --- a/airbyte-integrations/connectors/source-slack/source_slack/components/threads_partition_router.py +++ b/airbyte-integrations/connectors/source-slack/source_slack/components/threads_partition_router.py @@ -1,10 +1,11 @@ # Copyright (c) 2023 Airbyte, Inc., all rights reserved. from dataclasses import dataclass -from typing import Iterable +from typing import Any, Iterable, Mapping, Union import dpath.util from airbyte_cdk.models import AirbyteMessage, SyncMode, Type +from airbyte_cdk.sources.declarative.models import ParentStreamConfig from airbyte_cdk.sources.declarative.partition_routers import SubstreamPartitionRouter from airbyte_cdk.sources.declarative.types import Record, StreamSlice @@ -15,40 +16,69 @@ class ThreadsPartitionRouter(SubstreamPartitionRouter): from parent stream to stream_slices """ + def _get_parent_field(self, parent_stream_config: ParentStreamConfig) -> str: + parent_field = parent_stream_config.parent_key.eval(self.config) # type: ignore # parent_key is always casted to an interpolated string + return parent_field + + def _get_partition_field(self, parent_stream_config: ParentStreamConfig) -> str: + partition_field = parent_stream_config.partition_field.eval(self.config) # type: ignore # partition_field is always casted to an interpolated string + return partition_field + + @staticmethod + def _parse_read_output(parent_record: Union[AirbyteMessage, Record]) -> Union[dict[str, Any], Mapping[str, Any], None]: + # Skip non-records (eg AirbyteLogMessage) + if isinstance(parent_record, AirbyteMessage): + if parent_record.type == Type.RECORD: + return parent_record.record.data + else: + return + if isinstance(parent_record, Record): + return parent_record.data + + @staticmethod + def _get_partition_value(parent_record: Union[dict[str, Any], Mapping[str, Any]], parent_field: str) -> str: + partition_value = None + try: + partition_value = dpath.util.get(parent_record, parent_field) + except KeyError: + pass + + return partition_value + + @staticmethod + def _create_stream_slice(partition_field: str, partition_value: str, parent_partition: dict[str, Any]) -> StreamSlice: + return StreamSlice(partition={partition_field: partition_value, "channel": parent_partition["channel"]}, cursor_slice={}) + def stream_slices(self) -> Iterable[StreamSlice]: if not self.parent_stream_configs: - yield from [] + yield StreamSlice(partition={}, cursor_slice={}) else: for parent_stream_config in self.parent_stream_configs: + parent_stream = parent_stream_config.stream - parent_field = parent_stream_config.parent_key.eval(self.config) # type: ignore # parent_key is always casted to an interpolated string - partition_field = parent_stream_config.partition_field.eval(self.config) # type: ignore # partition_field is always casted to an interpolated string + parent_field = self._get_parent_field(parent_stream_config) + partition_field = self._get_partition_field(parent_stream_config) + for parent_stream_slice in parent_stream.stream_slices( sync_mode=SyncMode.full_refresh, cursor_field=None, stream_state=None ): + empty_parent_slice = True parent_partition = parent_stream_slice.partition if parent_stream_slice else {} for parent_record in parent_stream.read_records( sync_mode=SyncMode.full_refresh, cursor_field=None, stream_slice=parent_stream_slice, stream_state=None ): - # Skip non-records (eg AirbyteLogMessage) - if isinstance(parent_record, AirbyteMessage): - if parent_record.type == Type.RECORD: - parent_record = parent_record.record.data - else: - continue - elif isinstance(parent_record, Record): - parent_record = parent_record.data - try: - partition_value = dpath.util.get(parent_record, parent_field) - except KeyError: - pass - else: - empty_parent_slice = False - yield StreamSlice( - partition={partition_field: partition_value, "channel": parent_partition["channel"]}, cursor_slice={} - ) + parent_record = self._parse_read_output(parent_record) + if not parent_record: + continue + + partition_value = self._get_partition_value(parent_record, parent_field) + empty_parent_slice = False if partition_value else True + + if not empty_parent_slice: + yield self._create_stream_slice(partition_field, partition_value, parent_partition) + # If the parent slice contains no records, if empty_parent_slice: - yield from [] + yield StreamSlice(partition={}, cursor_slice={}) diff --git a/airbyte-integrations/connectors/source-slack/source_slack/manifest.yaml b/airbyte-integrations/connectors/source-slack/source_slack/manifest.yaml index ee0c8a707f827..0037db7e9bf62 100644 --- a/airbyte-integrations/connectors/source-slack/source_slack/manifest.yaml +++ b/airbyte-integrations/connectors/source-slack/source_slack/manifest.yaml @@ -1,4 +1,4 @@ -version: 0.58.7 +version: 0.73.0 type: DeclarativeSource definitions: @@ -171,7 +171,7 @@ definitions: fields: - path: - channel_id - value: "{{ stream_partition['channel_id'] }}" + value: "{{ stream_partition.get('channel_id') }}" channel_messages_stream: $ref: "#/definitions/stream_base" @@ -240,7 +240,7 @@ definitions: value: "{{ record.ts|int }}" - path: - channel_id - value: "{{ stream_partition['channel_id'] }}" + value: "{{ stream_partition.get('channel_id') }}" threads_stream: $ref: "#/definitions/stream_base" @@ -256,7 +256,7 @@ definitions: requester: $ref: "#/definitions/requester" request_parameters: - channel: "{{ stream_slice['channel'] }}" + channel: "{{ stream_slice.get('channel') }}" record_selector: $ref: "#/definitions/selector" paginator: @@ -292,7 +292,7 @@ definitions: value: "{{ record['ts']|int }}" - path: - channel_id - value: "{{ stream_slice['channel'] }}" + value: "{{ stream_slice.get('channel') }}" streams: - "#/definitions/users_stream" From bf4cb5d8436d2e0ae77f9c0e4532eebead85b864 Mon Sep 17 00:00:00 2001 From: darynaishchenko Date: Thu, 21 Mar 2024 18:46:01 +0200 Subject: [PATCH 32/56] fix channel messages transformation --- .../connectors/source-slack/source_slack/manifest.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/airbyte-integrations/connectors/source-slack/source_slack/manifest.yaml b/airbyte-integrations/connectors/source-slack/source_slack/manifest.yaml index 0037db7e9bf62..150a2e456cb10 100644 --- a/airbyte-integrations/connectors/source-slack/source_slack/manifest.yaml +++ b/airbyte-integrations/connectors/source-slack/source_slack/manifest.yaml @@ -240,7 +240,7 @@ definitions: value: "{{ record.ts|int }}" - path: - channel_id - value: "{{ stream_partition.get('channel_id') }}" + value: "{{ stream_partition.get('channel') }}" threads_stream: $ref: "#/definitions/stream_base" From b95bbd85b15b13415d95a75c61eff811341a0204 Mon Sep 17 00:00:00 2001 From: darynaishchenko Date: Fri, 22 Mar 2024 14:26:41 +0200 Subject: [PATCH 33/56] updated expected records --- .../source-slack/integration_tests/expected_records.jsonl | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/airbyte-integrations/connectors/source-slack/integration_tests/expected_records.jsonl b/airbyte-integrations/connectors/source-slack/integration_tests/expected_records.jsonl index d966942ca743b..1888ecc643f0e 100644 --- a/airbyte-integrations/connectors/source-slack/integration_tests/expected_records.jsonl +++ b/airbyte-integrations/connectors/source-slack/integration_tests/expected_records.jsonl @@ -4,10 +4,10 @@ {"stream": "channel_members", "data": {"member_id": "U04L65GPMKN", "channel_id": "C04KX3KEZ54"}, "emitted_at": 1707568736171} {"stream": "channel_members", "data": {"member_id": "U04LY6NARHU", "channel_id": "C04KX3KEZ54"}, "emitted_at": 1707568736172} {"stream": "channel_members", "data": {"member_id": "U04M23SBJGM", "channel_id": "C04KX3KEZ54"}, "emitted_at": 1707568736172} -{"stream": "channel_messages", "data": {"user": "U04L65GPMKN", "type": "message", "ts": "1683104542.931169", "client_msg_id": "3ae60d35-58b8-441c-923a-75de35a4ed8a", "text": "Test Thread 2", "team": "T04KX3KDDU6", "thread_ts": "1683104542.931169", "reply_count": 2, "reply_users_count": 1, "latest_reply": "1683104568.059569", "reply_users": ["U04L65GPMKN"], "is_locked": false, "subscribed": true, "last_read": "1683104568.059569", "blocks": [{"type": "rich_text", "block_id": "WLB", "elements": [{"type": "rich_text_section", "elements": [{"type": "text", "text": "Test Thread 2"}]}]}], "float_ts": 1683104542, "channel_id": ""}, "emitted_at": 1710778019252} -{"stream": "channel_messages", "data": {"user": "U04L65GPMKN", "type": "message", "ts": "1683104499.808709", "client_msg_id": "e27672c0-451e-42a6-8eff-a14d2db8ac1e", "text": "Test Thread 1", "team": "T04KX3KDDU6", "thread_ts": "1683104499.808709", "reply_count": 2, "reply_users_count": 1, "latest_reply": "1683104528.084359", "reply_users": ["U04L65GPMKN"], "is_locked": false, "subscribed": true, "last_read": "1683104528.084359", "blocks": [{"type": "rich_text", "block_id": "0j7", "elements": [{"type": "rich_text_section", "elements": [{"type": "text", "text": "Test Thread 1"}]}]}], "float_ts": 1683104499, "channel_id": ""}, "emitted_at": 1710778021028} -{"stream": "channel_messages", "data": {"user": "USLACKBOT", "type": "message", "ts": "1695880827.186049", "bot_id": "B01", "text": "Reminder: test reminder.", "team": "T04KX3KDDU6", "blocks": [{"type": "rich_text", "block_id": "BGzX", "elements": [{"type": "rich_text_section", "elements": [{"type": "text", "text": "Reminder: test reminder."}]}]}], "float_ts": 1695880827, "channel_id": ""}, "emitted_at": 1710778021270} -{"stream": "channel_messages", "data": {"subtype": "reminder_add", "user": "U04L65GPMKN", "type": "message", "ts": "1695814864.744249", "text": " set up a reminder \u201ctest reminder\u201d in this channel at 9AM tomorrow, Eastern European Summer Time.", "float_ts": 1695814864, "channel_id": ""}, "emitted_at": 1710778021275} +{"stream": "channel_messages", "data": {"user": "U04L65GPMKN", "type": "message", "ts": "1683104542.931169", "client_msg_id": "3ae60d35-58b8-441c-923a-75de35a4ed8a", "text": "Test Thread 2", "team": "T04KX3KDDU6", "thread_ts": "1683104542.931169", "reply_count": 2, "reply_users_count": 1, "latest_reply": "1683104568.059569", "reply_users": ["U04L65GPMKN"], "is_locked": false, "subscribed": true, "last_read": "1683104568.059569", "blocks": [{"type": "rich_text", "block_id": "WLB", "elements": [{"type": "rich_text_section", "elements": [{"type": "text", "text": "Test Thread 2"}]}]}], "float_ts": 1683104542, "channel_id": "C04KX3KEZ54"}, "emitted_at": 1711041520506} +{"stream": "channel_messages", "data": {"user": "U04L65GPMKN", "type": "message", "ts": "1683104499.808709", "client_msg_id": "e27672c0-451e-42a6-8eff-a14d2db8ac1e", "text": "Test Thread 1", "team": "T04KX3KDDU6", "thread_ts": "1683104499.808709", "reply_count": 2, "reply_users_count": 1, "latest_reply": "1683104528.084359", "reply_users": ["U04L65GPMKN"], "is_locked": false, "subscribed": true, "last_read": "1683104528.084359", "blocks": [{"type": "rich_text", "block_id": "0j7", "elements": [{"type": "rich_text_section", "elements": [{"type": "text", "text": "Test Thread 1"}]}]}], "float_ts": 1683104499, "channel_id": "C04LTCM2Y56"}, "emitted_at": 1711041522765} +{"stream": "channel_messages", "data": {"user": "USLACKBOT", "type": "message", "ts": "1695880827.186049", "bot_id": "B01", "text": "Reminder: test reminder.", "team": "T04KX3KDDU6", "blocks": [{"type": "rich_text", "block_id": "BGzX", "elements": [{"type": "rich_text_section", "elements": [{"type": "text", "text": "Reminder: test reminder."}]}]}], "float_ts": 1695880827, "channel_id": "C04LTCM2Y56"}, "emitted_at": 1711041523073} +{"stream": "channel_messages", "data": {"subtype": "reminder_add", "user": "U04L65GPMKN", "type": "message", "ts": "1695814864.744249", "text": " set up a reminder \u201ctest reminder\u201d in this channel at 9AM tomorrow, Eastern European Summer Time.", "float_ts": 1695814864, "channel_id": "C04LTCM2Y56"}, "emitted_at": 1711041523080} {"stream": "threads", "data": {"user": "U04L65GPMKN", "type": "message", "ts": "1683104528.084359", "client_msg_id": "ffccbb24-8dd6-476d-87bf-65e5fa033cb9", "text": "<@U04M23SBJGM> test test test", "team": "T04KX3KDDU6", "thread_ts": "1683104499.808709", "parent_user_id": "U04L65GPMKN", "blocks": [{"type": "rich_text", "block_id": "Lvl", "elements": [{"type": "rich_text_section", "elements": [{"type": "user", "user_id": "U04M23SBJGM"}, {"type": "text", "text": " test test test"}]}]}], "float_ts": 1683104528.084359, "channel_id": "C04LTCM2Y56"}, "emitted_at": 1710778305077} {"stream": "threads", "data": {"user": "USLACKBOT", "type": "message", "ts": "1695880827.186049", "bot_id": "B01", "text": "Reminder: test reminder.", "team": "T04KX3KDDU6", "blocks": [{"type": "rich_text", "block_id": "BGzX", "elements": [{"type": "rich_text_section", "elements": [{"type": "text", "text": "Reminder: test reminder."}]}]}], "float_ts": 1695880827.186049, "channel_id": "C04LTCM2Y56"}, "emitted_at": 1710778305751} {"stream": "threads", "data": {"subtype": "reminder_add", "user": "U04L65GPMKN", "type": "message", "ts": "1695814864.744249", "text": " set up a reminder \u201ctest reminder\u201d in this channel at 9AM tomorrow, Eastern European Summer Time.", "float_ts": 1695814864.744249, "channel_id": "C04LTCM2Y56"}, "emitted_at": 1710778305981} From 2c94e4e176749ae66a0d49d43a77f12af17fe701 Mon Sep 17 00:00:00 2001 From: darynaishchenko Date: Mon, 25 Mar 2024 13:52:39 +0200 Subject: [PATCH 34/56] updated cases for check test --- .../connectors/source-slack/unit_tests/test_source.py | 1 + 1 file changed, 1 insertion(+) diff --git a/airbyte-integrations/connectors/source-slack/unit_tests/test_source.py b/airbyte-integrations/connectors/source-slack/unit_tests/test_source.py index 39e7c8e4c0bf8..dee2b6c556603 100644 --- a/airbyte-integrations/connectors/source-slack/unit_tests/test_source.py +++ b/airbyte-integrations/connectors/source-slack/unit_tests/test_source.py @@ -26,6 +26,7 @@ def test_streams(conversations_list, config, is_valid): "status_code, response, is_connection_successful, error_msg", ( (200, {"members": [{"id": 1, "name": "Abraham"}]}, True, None), + (200, {"ok": False, "error": "invalid_auth"}, False, "Authentication has failed, please update your credentials."), ( 400, "Bad request", From b89e7f45846ea33f6456d2cdd3ada00212827f68 Mon Sep 17 00:00:00 2001 From: darynaishchenko Date: Mon, 25 Mar 2024 13:53:49 +0200 Subject: [PATCH 35/56] updated channel messages and threads streams with correct float_ts value --- .../connectors/source-slack/source_slack/manifest.yaml | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/airbyte-integrations/connectors/source-slack/source_slack/manifest.yaml b/airbyte-integrations/connectors/source-slack/source_slack/manifest.yaml index 150a2e456cb10..26c8340e93353 100644 --- a/airbyte-integrations/connectors/source-slack/source_slack/manifest.yaml +++ b/airbyte-integrations/connectors/source-slack/source_slack/manifest.yaml @@ -237,7 +237,7 @@ definitions: fields: - path: - float_ts - value: "{{ record.ts|int }}" + value: "{{ record.ts|float }}" - path: - channel_id value: "{{ stream_partition.get('channel') }}" @@ -268,7 +268,7 @@ definitions: stream: $ref: "#/definitions/channel_messages_stream" parent_key: ts - partition_field: ts + partition_field: float_ts request_option: type: RequestOption field_name: "ts" @@ -289,7 +289,7 @@ definitions: fields: - path: - float_ts - value: "{{ record['ts']|int }}" + value: "{{ record['ts']|float }}" - path: - channel_id value: "{{ stream_slice.get('channel') }}" From 53c3207c08076a27d6eb65e71c4e813bc456f32a Mon Sep 17 00:00:00 2001 From: darynaishchenko Date: Mon, 25 Mar 2024 13:54:29 +0200 Subject: [PATCH 36/56] added validation for request params in threads stream --- .../components/threads_partition_router.py | 35 +++++++++++++++++-- 1 file changed, 33 insertions(+), 2 deletions(-) diff --git a/airbyte-integrations/connectors/source-slack/source_slack/components/threads_partition_router.py b/airbyte-integrations/connectors/source-slack/source_slack/components/threads_partition_router.py index d187578ced242..896b92f3eaf9f 100644 --- a/airbyte-integrations/connectors/source-slack/source_slack/components/threads_partition_router.py +++ b/airbyte-integrations/connectors/source-slack/source_slack/components/threads_partition_router.py @@ -1,13 +1,14 @@ # Copyright (c) 2023 Airbyte, Inc., all rights reserved. from dataclasses import dataclass -from typing import Any, Iterable, Mapping, Union +from typing import Any, Iterable, Mapping, Optional, Union import dpath.util from airbyte_cdk.models import AirbyteMessage, SyncMode, Type from airbyte_cdk.sources.declarative.models import ParentStreamConfig from airbyte_cdk.sources.declarative.partition_routers import SubstreamPartitionRouter -from airbyte_cdk.sources.declarative.types import Record, StreamSlice +from airbyte_cdk.sources.declarative.requesters.request_option import RequestOptionType +from airbyte_cdk.sources.declarative.types import Record, StreamSlice, StreamState @dataclass @@ -16,6 +17,36 @@ class ThreadsPartitionRouter(SubstreamPartitionRouter): from parent stream to stream_slices """ + def _get_threads_request_params( + self, channel: str, stream_slice: Optional[StreamSlice] = None, stream_state: Optional[StreamState] = None + ) -> Mapping[str, Any]: + """ + Validates that the request params are >= than current state values for incremental syncs. + Threads request should be performed only for float_ts from slice >= current float ts from state. + """ + if stream_state: + for state in stream_state["states"]: + if state["partition"]["channel"] == channel: + float_ts = state["cursor"]["float_ts"] + if float(stream_slice.partition["float_ts"]) >= float(float_ts): + return self._get_request_option(RequestOptionType.request_parameter, stream_slice) + else: + return {} + + return self._get_request_option(RequestOptionType.request_parameter, stream_slice) + + def get_request_params( + self, + stream_state: Optional[StreamState] = None, + stream_slice: Optional[StreamSlice] = None, + next_page_token: Optional[Mapping[str, Any]] = None, + ) -> Mapping[str, Any]: + channel = stream_slice.partition.get("channel") if stream_slice else None + if channel: + return self._get_threads_request_params(channel, stream_slice, stream_state) + + return self._get_request_option(RequestOptionType.request_parameter, stream_slice) + def _get_parent_field(self, parent_stream_config: ParentStreamConfig) -> str: parent_field = parent_stream_config.parent_key.eval(self.config) # type: ignore # parent_key is always casted to an interpolated string return parent_field From 935e818ef62bbc3b01b3c95a147be0ff49f34a47 Mon Sep 17 00:00:00 2001 From: darynaishchenko Date: Mon, 25 Mar 2024 13:54:58 +0200 Subject: [PATCH 37/56] updated tests --- .../connectors/source-slack/poetry.lock | 14 ++-- .../unit_tests/test_components.py | 69 ++++++++++++++----- .../source-slack/unit_tests/test_streams.py | 8 +-- 3 files changed, 61 insertions(+), 30 deletions(-) diff --git a/airbyte-integrations/connectors/source-slack/poetry.lock b/airbyte-integrations/connectors/source-slack/poetry.lock index e196c2270f7a2..144216d14dcff 100644 --- a/airbyte-integrations/connectors/source-slack/poetry.lock +++ b/airbyte-integrations/connectors/source-slack/poetry.lock @@ -2,13 +2,13 @@ [[package]] name = "airbyte-cdk" -version = "0.73.0" +version = "0.74.0" description = "A framework for writing Airbyte Connectors." optional = false python-versions = ">=3.9" files = [ - {file = "airbyte-cdk-0.73.0.tar.gz", hash = "sha256:a03e0265a8a4afb1378d285993624659d9f481404aaf69cf7c0a5ddad3568ea2"}, - {file = "airbyte_cdk-0.73.0-py3-none-any.whl", hash = "sha256:339e42a7602461073a69bf0c4e11be26a7eea3157def43ffecdf9d0d73f32c6f"}, + {file = "airbyte-cdk-0.74.0.tar.gz", hash = "sha256:74241a055c205403a951383f43801067b7f451370e14d553d13d0cc476cbfff7"}, + {file = "airbyte_cdk-0.74.0-py3-none-any.whl", hash = "sha256:7e5b201d69ec0e7daab7e627dbc6add4dbba4a2f779132e86aaf6713650ff4d5"}, ] [package.dependencies] @@ -699,17 +699,17 @@ testing = ["argcomplete", "hypothesis (>=3.56)", "mock", "nose", "requests", "xm [[package]] name = "pytest-mock" -version = "3.12.0" +version = "3.14.0" description = "Thin-wrapper around the mock package for easier use with pytest" optional = false python-versions = ">=3.8" files = [ - {file = "pytest-mock-3.12.0.tar.gz", hash = "sha256:31a40f038c22cad32287bb43932054451ff5583ff094bca6f675df2f8bc1a6e9"}, - {file = "pytest_mock-3.12.0-py3-none-any.whl", hash = "sha256:0972719a7263072da3a21c7f4773069bcc7486027d7e8e1f81d98a47e701bc4f"}, + {file = "pytest-mock-3.14.0.tar.gz", hash = "sha256:2719255a1efeceadbc056d6bf3df3d1c5015530fb40cf347c0f9afac88410bd0"}, + {file = "pytest_mock-3.14.0-py3-none-any.whl", hash = "sha256:0b72c38033392a5f4621342fe11e9219ac11ec9d375f8e2a0c164539e0d70f6f"}, ] [package.dependencies] -pytest = ">=5.0" +pytest = ">=6.2.5" [package.extras] dev = ["pre-commit", "pytest-asyncio", "tox"] diff --git a/airbyte-integrations/connectors/source-slack/unit_tests/test_components.py b/airbyte-integrations/connectors/source-slack/unit_tests/test_components.py index f376fc3a2498f..b7857670c3d8c 100644 --- a/airbyte-integrations/connectors/source-slack/unit_tests/test_components.py +++ b/airbyte-integrations/connectors/source-slack/unit_tests/test_components.py @@ -7,6 +7,8 @@ from airbyte_cdk.sources.declarative.extractors import DpathExtractor, RecordSelector from airbyte_cdk.sources.declarative.partition_routers.substream_partition_router import ParentStreamConfig from airbyte_cdk.sources.declarative.requesters import HttpRequester, RequestOption +from airbyte_cdk.sources.declarative.requesters.request_option import RequestOptionType +from airbyte_cdk.sources.declarative.types import StreamSlice from airbyte_cdk.sources.streams.http.auth import TokenAuthenticator from airbyte_protocol.models import SyncMode from freezegun import freeze_time @@ -37,6 +39,23 @@ def test_channel_members_extractor(token_config): {"member_id": "W012A3CDE"}] +def get_threads_partition_router(config): + channel_messages_stream = get_stream_by_name("channel_messages", config) + return ThreadsPartitionRouter( + config=config, + parameters={}, + parent_stream_configs=[ + ParentStreamConfig( + config=config, + stream=channel_messages_stream, + parent_key="ts", + partition_field="float_ts", + parameters={}, + request_option=RequestOption(field_name="ts", inject_into=RequestOptionType.request_parameter, parameters={}) + ), ] + ) + + @freeze_time("2024-03-10T20:00:00Z", tz_offset=-2) def test_threads_partition_router(token_config, requests_mock): start_date = "2024-03-01T20:00:00Z" @@ -51,29 +70,42 @@ def test_threads_partition_router(token_config, requests_mock): json={"messages": [{"ts": latest}, {"ts": oldest}]} ) - channel_messages_stream = get_stream_by_name("channel_messages", token_config) - router = ThreadsPartitionRouter( - config=token_config, - parameters={}, - parent_stream_configs=[ - ParentStreamConfig( - config=token_config, - stream=channel_messages_stream, - parent_key="ts", - partition_field="ts", - parameters={}, - request_option=RequestOption(field_name="ts", inject_into="request_parameter", parameters={}) - ), ] - ) + router = get_threads_partition_router(token_config) slices = router.stream_slices() - expected = [{"channel": "airbyte-for-beginners", "ts": latest}, - {"channel": "airbyte-for-beginners", "ts": oldest}, - {"channel": "good-reads", "ts": latest}, - {"channel": "good-reads", "ts": oldest}] + expected = [{"channel": "airbyte-for-beginners", "float_ts": latest}, + {"channel": "airbyte-for-beginners", "float_ts": oldest}, + {"channel": "good-reads", "float_ts": latest}, + {"channel": "good-reads", "float_ts": oldest}] assert list(slices) == expected +@pytest.mark.parametrize( + "stream_slice, stream_state, expected", + ( + ({}, {}, {}), + ( + {'float_ts': '1683104542.931169', 'channel': 'C04KX3KEZ54'}, + {'states': [{'partition': {'channel': 'C04KX3KEZ54', 'float_ts': '1683104542.931169'}, 'cursor': {'float_ts': 1683104568}}]}, + {} + ), + ( + {'float_ts': '1783104542.931169', 'channel': 'C04KX3KEZ54'}, + {'states': [{'partition': {'channel': 'C04KX3KEZ54', 'float_ts': '1683104542.931169'}, 'cursor': {'float_ts': 1683104568}}]}, + {'ts': '1783104542.931169'} + ), + ), + ids=[ + "empty_params_without_slice_and_state", + "empty_params_cursor_grater_then_slice_value", + "params_slice_value_greater_then_cursor_value"] +) +def test_threads_request_params(token_config, stream_slice, stream_state, expected): + router = get_threads_partition_router(token_config) + _slice = StreamSlice(partition=stream_slice, cursor_slice={}) + assert router.get_request_params(stream_slice=_slice, stream_state=stream_state) == expected + + def test_join_channels(token_config, requests_mock, joined_channel): mocked_request = requests_mock.post( url="https://slack.com/api/conversations.join", @@ -137,4 +169,3 @@ def test_join_channel_read(requests_mock, token_config, joined_channel, caplog, assert mocked_request.called assert mocked_request.last_request._request.body == b'{"channel": "channel 2"}' assert log_message in caplog.text - diff --git a/airbyte-integrations/connectors/source-slack/unit_tests/test_streams.py b/airbyte-integrations/connectors/source-slack/unit_tests/test_streams.py index 7210c2dfb8c52..8b504aaf84ba0 100644 --- a/airbyte-integrations/connectors/source-slack/unit_tests/test_streams.py +++ b/airbyte-integrations/connectors/source-slack/unit_tests/test_streams.py @@ -42,9 +42,9 @@ def test_threads_stream_slices(requests_mock, token_config): threads_stream = get_stream_by_name("threads", token_config) slices = threads_stream.stream_slices(stream_state=None, sync_mode=SyncMode.full_refresh) - expected = [{"ts": 1710093600, "channel": "airbyte-for-beginners", "start_time": "1709236800", "end_time": "1710093600"}, - {"ts": 1709323200, "channel": "airbyte-for-beginners", "start_time": "1709236800", "end_time": "1710093600"}, - {"ts": 1710093600, "channel": "good-reads", "start_time": "1709236800", "end_time": "1710093600"}, - {"ts": 1709323200, "channel": "good-reads", "start_time": "1709236800", "end_time": "1710093600"}] + expected = [{"float_ts": 1710093600, "channel": "airbyte-for-beginners", "start_time": "1709236800", "end_time": "1710093600"}, + {"float_ts": 1709323200, "channel": "airbyte-for-beginners", "start_time": "1709236800", "end_time": "1710093600"}, + {"float_ts": 1710093600, "channel": "good-reads", "start_time": "1709236800", "end_time": "1710093600"}, + {"float_ts": 1709323200, "channel": "good-reads", "start_time": "1709236800", "end_time": "1710093600"}] assert list(slices) == expected From a1ae4f4f62d0821be75ff5d3271a87c7fe9fbbc0 Mon Sep 17 00:00:00 2001 From: darynaishchenko Date: Mon, 1 Apr 2024 17:12:06 +0300 Subject: [PATCH 38/56] fix channels filter --- .../connectors/source-slack/source_slack/manifest.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/airbyte-integrations/connectors/source-slack/source_slack/manifest.yaml b/airbyte-integrations/connectors/source-slack/source_slack/manifest.yaml index 26c8340e93353..5d67e7400b448 100644 --- a/airbyte-integrations/connectors/source-slack/source_slack/manifest.yaml +++ b/airbyte-integrations/connectors/source-slack/source_slack/manifest.yaml @@ -127,7 +127,7 @@ definitions: $ref: "#/definitions/selector" record_filter: type: RecordFilter - condition: "{{ record.id in config.channel_filter or not config.channel_filter }}" + condition: "{{ record.name in config.channel_filter or not config.channel_filter }}" $parameters: transformations: [[]] paginator: From 87b154499b5991dc7a1dbfa3bf29eb2022f45919 Mon Sep 17 00:00:00 2001 From: darynaishchenko Date: Mon, 1 Apr 2024 17:17:13 +0300 Subject: [PATCH 39/56] poetry update --- .../connectors/source-slack/poetry.lock | 43 +++++++++---------- 1 file changed, 20 insertions(+), 23 deletions(-) diff --git a/airbyte-integrations/connectors/source-slack/poetry.lock b/airbyte-integrations/connectors/source-slack/poetry.lock index 144216d14dcff..7118def6ca97e 100644 --- a/airbyte-integrations/connectors/source-slack/poetry.lock +++ b/airbyte-integrations/connectors/source-slack/poetry.lock @@ -2,39 +2,38 @@ [[package]] name = "airbyte-cdk" -version = "0.74.0" +version = "0.78.1" description = "A framework for writing Airbyte Connectors." optional = false -python-versions = ">=3.9" +python-versions = "<4.0,>=3.9" files = [ - {file = "airbyte-cdk-0.74.0.tar.gz", hash = "sha256:74241a055c205403a951383f43801067b7f451370e14d553d13d0cc476cbfff7"}, - {file = "airbyte_cdk-0.74.0-py3-none-any.whl", hash = "sha256:7e5b201d69ec0e7daab7e627dbc6add4dbba4a2f779132e86aaf6713650ff4d5"}, + {file = "airbyte_cdk-0.78.1-py3-none-any.whl", hash = "sha256:73dfc03e55a7107bf28b5bbc4e43572d448c60e9b34368d22cf48b6536aa2263"}, + {file = "airbyte_cdk-0.78.1.tar.gz", hash = "sha256:700e5526ae29db1e453b3def8682726f7d8aa653ee2f3056488d0a484f055133"}, ] [package.dependencies] airbyte-protocol-models = "0.5.1" backoff = "*" cachetools = "*" -Deprecated = ">=1.2,<2.0" +Deprecated = ">=1.2,<1.3" dpath = ">=2.0.1,<2.1.0" genson = "1.2.2" isodate = ">=0.6.1,<0.7.0" Jinja2 = ">=3.1.2,<3.2.0" -jsonref = ">=0.2,<1.0" +jsonref = ">=0.2,<0.3" jsonschema = ">=3.2.0,<3.3.0" pendulum = "<3.0.0" pydantic = ">=1.10.8,<2.0.0" pyrate-limiter = ">=3.1.0,<3.2.0" python-dateutil = "*" -PyYAML = ">=6.0.1" +PyYAML = ">=6.0.1,<7.0.0" requests = "*" -requests-cache = "*" +requests_cache = "*" wcmatch = "8.4" [package.extras] -dev = ["avro (>=1.11.2,<1.12.0)", "cohere (==4.21)", "fastavro (>=1.8.0,<1.9.0)", "freezegun", "langchain (==0.0.271)", "markdown", "mypy", "openai[embeddings] (==0.27.9)", "pandas (==2.0.3)", "pdf2image (==1.16.3)", "pdfminer.six (==20221105)", "pyarrow (>=15.0.0,<15.1.0)", "pytesseract (==0.3.10)", "pytest", "pytest-cov", "pytest-httpserver", "pytest-mock", "requests-mock", "tiktoken (==0.4.0)", "unstructured (==0.10.27)", "unstructured.pytesseract (>=0.3.12)", "unstructured[docx,pptx] (==0.10.27)"] -file-based = ["avro (>=1.11.2,<1.12.0)", "fastavro (>=1.8.0,<1.9.0)", "markdown", "pdf2image (==1.16.3)", "pdfminer.six (==20221105)", "pyarrow (>=15.0.0,<15.1.0)", "pytesseract (==0.3.10)", "unstructured (==0.10.27)", "unstructured.pytesseract (>=0.3.12)", "unstructured[docx,pptx] (==0.10.27)"] -sphinx-docs = ["Sphinx (>=4.2,<5.0)", "sphinx-rtd-theme (>=1.0,<2.0)"] +file-based = ["avro (>=1.11.2,<1.12.0)", "fastavro (>=1.8.0,<1.9.0)", "markdown", "pyarrow (>=15.0.0,<15.1.0)", "pytesseract (==0.3.10)", "unstructured.pytesseract (>=0.3.12)", "unstructured[docx,pptx] (==0.10.27)"] +sphinx-docs = ["Sphinx (>=4.2,<4.3)", "sphinx-rtd-theme (>=1.0,<1.1)"] vector-db-based = ["cohere (==4.21)", "langchain (==0.0.271)", "openai[embeddings] (==0.27.9)", "tiktoken (==0.4.0)"] [[package]] @@ -380,13 +379,13 @@ i18n = ["Babel (>=2.7)"] [[package]] name = "jsonref" -version = "0.3.0" -description = "jsonref is a library for automatic dereferencing of JSON Reference objects for Python." +version = "0.2" +description = "An implementation of JSON Reference for Python" optional = false -python-versions = ">=3.3,<4.0" +python-versions = "*" files = [ - {file = "jsonref-0.3.0-py3-none-any.whl", hash = "sha256:9480ad1b500f7e795daeb0ef29f9c55ae3a9ab38fb8d6659b6f4868acb5a5bc8"}, - {file = "jsonref-0.3.0.tar.gz", hash = "sha256:68b330c6815dc0d490dbb3d65ccda265ddde9f7856fd2f3322f971d456ea7549"}, + {file = "jsonref-0.2-py3-none-any.whl", hash = "sha256:b1e82fa0b62e2c2796a13e5401fe51790b248f6d9bf9d7212a3e31a3501b291f"}, + {file = "jsonref-0.2.tar.gz", hash = "sha256:f3c45b121cf6257eafabdc3a8008763aed1cd7da06dbabc59a9e4d2a5e4e6697"}, ] [[package]] @@ -852,22 +851,20 @@ yaml = ["pyyaml (>=6.0.1)"] [[package]] name = "requests-mock" -version = "1.11.0" +version = "1.12.1" description = "Mock out responses from the requests package" optional = false -python-versions = "*" +python-versions = ">=3.5" files = [ - {file = "requests-mock-1.11.0.tar.gz", hash = "sha256:ef10b572b489a5f28e09b708697208c4a3b2b89ef80a9f01584340ea357ec3c4"}, - {file = "requests_mock-1.11.0-py2.py3-none-any.whl", hash = "sha256:f7fae383f228633f6bececebdab236c478ace2284d6292c6e7e2867b9ab74d15"}, + {file = "requests-mock-1.12.1.tar.gz", hash = "sha256:e9e12e333b525156e82a3c852f22016b9158220d2f47454de9cae8a77d371401"}, + {file = "requests_mock-1.12.1-py2.py3-none-any.whl", hash = "sha256:b1e37054004cdd5e56c84454cc7df12b25f90f382159087f4b6915aaeef39563"}, ] [package.dependencies] -requests = ">=2.3,<3" -six = "*" +requests = ">=2.22,<3" [package.extras] fixture = ["fixtures"] -test = ["fixtures", "mock", "purl", "pytest", "requests-futures", "sphinx", "testtools"] [[package]] name = "setuptools" From 92c5f05f81f7092d067e0da3941203f3b3b2c5ac Mon Sep 17 00:00:00 2001 From: darynaishchenko Date: Mon, 1 Apr 2024 19:50:18 +0300 Subject: [PATCH 40/56] fix requests in threads stream --- .../integration_tests/abnormal_state.json | 120 ++++++++++-------- .../source-slack/source_slack/manifest.yaml | 6 +- 2 files changed, 71 insertions(+), 55 deletions(-) diff --git a/airbyte-integrations/connectors/source-slack/integration_tests/abnormal_state.json b/airbyte-integrations/connectors/source-slack/integration_tests/abnormal_state.json index 6cc36537a8fb0..b1e705f3a3551 100644 --- a/airbyte-integrations/connectors/source-slack/integration_tests/abnormal_state.json +++ b/airbyte-integrations/connectors/source-slack/integration_tests/abnormal_state.json @@ -1,94 +1,110 @@ [ { - "type": "STREAM", - "stream": { - "stream_descriptor": { - "name": "threads" + "type" : "STREAM", + "stream" : { + "stream_descriptor" : { + "name" : "threads", + "namespace" : null }, - "stream_state": { - "states": [ + "stream_state" : { + "states" : [ { - "partition": { - "channel": "C04KX3KEZ54", - "ts": "1683104542.931169" + "partition" : { + "float_ts" : "2534945416.931169", + "parent_slice" : { + "channel" : "C04KX3KEZ54", + "parent_slice" : {} + } }, - "cursor": { - "float_ts": "2534945416" + "cursor" : { + "float_ts" : 2534945416.059569 } }, { - "partition": { - "channel": "C04LTCM2Y56", - "ts": "1683104499.808709" + "partition" : { + "float_ts" : "2534945416.808709", + "parent_slice" : { + "channel" : "C04LTCM2Y56", + "parent_slice" : {} + } }, - "cursor": { - "float_ts": "2534945416" + "cursor" : { + "float_ts" : 2534945416.084359 } }, { - "partition": { - "channel": "C04LTCM2Y56", - "ts": "1695880827.186049" + "partition" : { + "float_ts" : "2534945416.186049", + "parent_slice" : { + "channel" : "C04LTCM2Y56", + "parent_slice" : {} + } }, - "cursor": { - "float_ts": "2534945416" + "cursor" : { + "float_ts" : 2534945416.186049 } }, { - "partition": { - "channel": "C04LTCM2Y56", - "ts": "1695814864.744249" + "partition" : { + "float_ts" : "2534945416.744249", + "parent_slice" : { + "channel" : "C04LTCM2Y56", + "parent_slice" : {} + } }, - "cursor": { - "float_ts": "2534945416" + "cursor" : { + "float_ts" : 2534945416.744249 } } ] } + }, + "sourceStats" : { + "recordCount" : 1 } }, { - "type": "STREAM", - "stream": { - "stream_descriptor": { - "name": "channel_messages" + "type" : "STREAM", + "stream" : { + "stream_descriptor" : { + "name" : "channel_messages" }, - "stream_state": { - "states": [ + "stream_state" : { + "states" : [ { - "partition": { - "channel_id": "C04LTCM2Y56", - "parent_slice": {} + "partition" : { + "channel_id" : "C04LTCM2Y56", + "parent_slice" : {} }, - "cursor": { - "float_ts": "2534945416" + "cursor" : { + "float_ts" : "2534945416" } }, { - "partition": { - "channel": "C04KX3KEZ54", - "parent_slice": {} + "partition" : { + "channel" : "C04KX3KEZ54", + "parent_slice" : {} }, - "cursor": { - "float_ts": "2534945416" + "cursor" : { + "float_ts" : "2534945416" } }, { - "partition": { - "channel": "C04L3M4PTJ6", - "parent_slice": {} + "partition" : { + "channel" : "C04L3M4PTJ6", + "parent_slice" : {} }, - "cursor": { - "float_ts": "2534945416" + "cursor" : { + "float_ts" : "2534945416" } }, { - "partition": { - "channel": "C04LTCM2Y56", - "parent_slice": {} + "partition" : { + "channel" : "C04LTCM2Y56", + "parent_slice" : {} }, - "cursor": { - "float_ts": "2534945416" + "cursor" : { + "float_ts" : "2534945416" } } ] diff --git a/airbyte-integrations/connectors/source-slack/source_slack/manifest.yaml b/airbyte-integrations/connectors/source-slack/source_slack/manifest.yaml index 5d67e7400b448..1d1a0f3476fda 100644 --- a/airbyte-integrations/connectors/source-slack/source_slack/manifest.yaml +++ b/airbyte-integrations/connectors/source-slack/source_slack/manifest.yaml @@ -256,13 +256,13 @@ definitions: requester: $ref: "#/definitions/requester" request_parameters: - channel: "{{ stream_slice.get('channel') }}" + channel: "{{ stream_slice['parent_slice']['channel'] }}" record_selector: $ref: "#/definitions/selector" paginator: $ref: "#/definitions/default_paginator" partition_router: - class_name: "source_slack.components.threads_partition_router.ThreadsPartitionRouter" + type: SubstreamPartitionRouter parent_stream_configs: - type: ParentStreamConfig stream: @@ -292,7 +292,7 @@ definitions: value: "{{ record['ts']|float }}" - path: - channel_id - value: "{{ stream_slice.get('channel') }}" + value: "{{ stream_slice['parent_slice']['channel'] }}" streams: - "#/definitions/users_stream" From 82e288fe71081af13cb04d886307dcd1a15b70cd Mon Sep 17 00:00:00 2001 From: darynaishchenko Date: Mon, 1 Apr 2024 19:52:05 +0300 Subject: [PATCH 41/56] format fix --- .../integration_tests/abnormal_state.json | 126 +++++++++--------- 1 file changed, 63 insertions(+), 63 deletions(-) diff --git a/airbyte-integrations/connectors/source-slack/integration_tests/abnormal_state.json b/airbyte-integrations/connectors/source-slack/integration_tests/abnormal_state.json index b1e705f3a3551..18cbb98009984 100644 --- a/airbyte-integrations/connectors/source-slack/integration_tests/abnormal_state.json +++ b/airbyte-integrations/connectors/source-slack/integration_tests/abnormal_state.json @@ -1,110 +1,110 @@ [ { - "type" : "STREAM", - "stream" : { - "stream_descriptor" : { - "name" : "threads", - "namespace" : null + "type": "STREAM", + "stream": { + "stream_descriptor": { + "name": "threads", + "namespace": null }, - "stream_state" : { - "states" : [ + "stream_state": { + "states": [ { - "partition" : { - "float_ts" : "2534945416.931169", - "parent_slice" : { - "channel" : "C04KX3KEZ54", - "parent_slice" : {} + "partition": { + "float_ts": "2534945416.931169", + "parent_slice": { + "channel": "C04KX3KEZ54", + "parent_slice": {} } }, - "cursor" : { - "float_ts" : 2534945416.059569 + "cursor": { + "float_ts": 2534945416.059569 } }, { - "partition" : { - "float_ts" : "2534945416.808709", - "parent_slice" : { - "channel" : "C04LTCM2Y56", - "parent_slice" : {} + "partition": { + "float_ts": "2534945416.808709", + "parent_slice": { + "channel": "C04LTCM2Y56", + "parent_slice": {} } }, - "cursor" : { - "float_ts" : 2534945416.084359 + "cursor": { + "float_ts": 2534945416.084359 } }, { - "partition" : { - "float_ts" : "2534945416.186049", - "parent_slice" : { - "channel" : "C04LTCM2Y56", - "parent_slice" : {} + "partition": { + "float_ts": "2534945416.186049", + "parent_slice": { + "channel": "C04LTCM2Y56", + "parent_slice": {} } }, - "cursor" : { - "float_ts" : 2534945416.186049 + "cursor": { + "float_ts": 2534945416.186049 } }, { - "partition" : { - "float_ts" : "2534945416.744249", - "parent_slice" : { - "channel" : "C04LTCM2Y56", - "parent_slice" : {} + "partition": { + "float_ts": "2534945416.744249", + "parent_slice": { + "channel": "C04LTCM2Y56", + "parent_slice": {} } }, - "cursor" : { - "float_ts" : 2534945416.744249 + "cursor": { + "float_ts": 2534945416.744249 } } ] } }, - "sourceStats" : { - "recordCount" : 1 + "sourceStats": { + "recordCount": 1 } }, { - "type" : "STREAM", - "stream" : { - "stream_descriptor" : { - "name" : "channel_messages" + "type": "STREAM", + "stream": { + "stream_descriptor": { + "name": "channel_messages" }, - "stream_state" : { - "states" : [ + "stream_state": { + "states": [ { - "partition" : { - "channel_id" : "C04LTCM2Y56", - "parent_slice" : {} + "partition": { + "channel_id": "C04LTCM2Y56", + "parent_slice": {} }, - "cursor" : { - "float_ts" : "2534945416" + "cursor": { + "float_ts": "2534945416" } }, { - "partition" : { - "channel" : "C04KX3KEZ54", - "parent_slice" : {} + "partition": { + "channel": "C04KX3KEZ54", + "parent_slice": {} }, - "cursor" : { - "float_ts" : "2534945416" + "cursor": { + "float_ts": "2534945416" } }, { - "partition" : { - "channel" : "C04L3M4PTJ6", - "parent_slice" : {} + "partition": { + "channel": "C04L3M4PTJ6", + "parent_slice": {} }, - "cursor" : { - "float_ts" : "2534945416" + "cursor": { + "float_ts": "2534945416" } }, { - "partition" : { - "channel" : "C04LTCM2Y56", - "parent_slice" : {} + "partition": { + "channel": "C04LTCM2Y56", + "parent_slice": {} }, - "cursor" : { - "float_ts" : "2534945416" + "cursor": { + "float_ts": "2534945416" } } ] From a36d6cdb0e4c2511c430f64ddecaf7f5c800ab7c Mon Sep 17 00:00:00 2001 From: darynaishchenko Date: Tue, 2 Apr 2024 14:01:24 +0300 Subject: [PATCH 42/56] reverted threads stream to python implementation --- .../integration_tests/abnormal_state.json | 61 +----- .../components/threads_partition_router.py | 115 ------------ .../source-slack/source_slack/manifest.yaml | 53 ------ .../source-slack/source_slack/source.py | 46 +++++ .../source-slack/source_slack/streams.py | 176 ++++++++++++++++++ 5 files changed, 225 insertions(+), 226 deletions(-) delete mode 100644 airbyte-integrations/connectors/source-slack/source_slack/components/threads_partition_router.py create mode 100644 airbyte-integrations/connectors/source-slack/source_slack/streams.py diff --git a/airbyte-integrations/connectors/source-slack/integration_tests/abnormal_state.json b/airbyte-integrations/connectors/source-slack/integration_tests/abnormal_state.json index 18cbb98009984..65b0e9d4d90f8 100644 --- a/airbyte-integrations/connectors/source-slack/integration_tests/abnormal_state.json +++ b/airbyte-integrations/connectors/source-slack/integration_tests/abnormal_state.json @@ -2,65 +2,10 @@ { "type": "STREAM", "stream": { - "stream_descriptor": { - "name": "threads", - "namespace": null - }, "stream_state": { - "states": [ - { - "partition": { - "float_ts": "2534945416.931169", - "parent_slice": { - "channel": "C04KX3KEZ54", - "parent_slice": {} - } - }, - "cursor": { - "float_ts": 2534945416.059569 - } - }, - { - "partition": { - "float_ts": "2534945416.808709", - "parent_slice": { - "channel": "C04LTCM2Y56", - "parent_slice": {} - } - }, - "cursor": { - "float_ts": 2534945416.084359 - } - }, - { - "partition": { - "float_ts": "2534945416.186049", - "parent_slice": { - "channel": "C04LTCM2Y56", - "parent_slice": {} - } - }, - "cursor": { - "float_ts": 2534945416.186049 - } - }, - { - "partition": { - "float_ts": "2534945416.744249", - "parent_slice": { - "channel": "C04LTCM2Y56", - "parent_slice": {} - } - }, - "cursor": { - "float_ts": 2534945416.744249 - } - } - ] - } - }, - "sourceStats": { - "recordCount": 1 + "float_ts": 7270247822 + }, + "stream_descriptor": { "name": "threads" } } }, { diff --git a/airbyte-integrations/connectors/source-slack/source_slack/components/threads_partition_router.py b/airbyte-integrations/connectors/source-slack/source_slack/components/threads_partition_router.py deleted file mode 100644 index 896b92f3eaf9f..0000000000000 --- a/airbyte-integrations/connectors/source-slack/source_slack/components/threads_partition_router.py +++ /dev/null @@ -1,115 +0,0 @@ -# Copyright (c) 2023 Airbyte, Inc., all rights reserved. - -from dataclasses import dataclass -from typing import Any, Iterable, Mapping, Optional, Union - -import dpath.util -from airbyte_cdk.models import AirbyteMessage, SyncMode, Type -from airbyte_cdk.sources.declarative.models import ParentStreamConfig -from airbyte_cdk.sources.declarative.partition_routers import SubstreamPartitionRouter -from airbyte_cdk.sources.declarative.requesters.request_option import RequestOptionType -from airbyte_cdk.sources.declarative.types import Record, StreamSlice, StreamState - - -@dataclass -class ThreadsPartitionRouter(SubstreamPartitionRouter): - """Overwrite SubstreamPartitionRouter to be able to pass more than one value - from parent stream to stream_slices - """ - - def _get_threads_request_params( - self, channel: str, stream_slice: Optional[StreamSlice] = None, stream_state: Optional[StreamState] = None - ) -> Mapping[str, Any]: - """ - Validates that the request params are >= than current state values for incremental syncs. - Threads request should be performed only for float_ts from slice >= current float ts from state. - """ - if stream_state: - for state in stream_state["states"]: - if state["partition"]["channel"] == channel: - float_ts = state["cursor"]["float_ts"] - if float(stream_slice.partition["float_ts"]) >= float(float_ts): - return self._get_request_option(RequestOptionType.request_parameter, stream_slice) - else: - return {} - - return self._get_request_option(RequestOptionType.request_parameter, stream_slice) - - def get_request_params( - self, - stream_state: Optional[StreamState] = None, - stream_slice: Optional[StreamSlice] = None, - next_page_token: Optional[Mapping[str, Any]] = None, - ) -> Mapping[str, Any]: - channel = stream_slice.partition.get("channel") if stream_slice else None - if channel: - return self._get_threads_request_params(channel, stream_slice, stream_state) - - return self._get_request_option(RequestOptionType.request_parameter, stream_slice) - - def _get_parent_field(self, parent_stream_config: ParentStreamConfig) -> str: - parent_field = parent_stream_config.parent_key.eval(self.config) # type: ignore # parent_key is always casted to an interpolated string - return parent_field - - def _get_partition_field(self, parent_stream_config: ParentStreamConfig) -> str: - partition_field = parent_stream_config.partition_field.eval(self.config) # type: ignore # partition_field is always casted to an interpolated string - return partition_field - - @staticmethod - def _parse_read_output(parent_record: Union[AirbyteMessage, Record]) -> Union[dict[str, Any], Mapping[str, Any], None]: - # Skip non-records (eg AirbyteLogMessage) - if isinstance(parent_record, AirbyteMessage): - if parent_record.type == Type.RECORD: - return parent_record.record.data - else: - return - if isinstance(parent_record, Record): - return parent_record.data - - @staticmethod - def _get_partition_value(parent_record: Union[dict[str, Any], Mapping[str, Any]], parent_field: str) -> str: - partition_value = None - try: - partition_value = dpath.util.get(parent_record, parent_field) - except KeyError: - pass - - return partition_value - - @staticmethod - def _create_stream_slice(partition_field: str, partition_value: str, parent_partition: dict[str, Any]) -> StreamSlice: - return StreamSlice(partition={partition_field: partition_value, "channel": parent_partition["channel"]}, cursor_slice={}) - - def stream_slices(self) -> Iterable[StreamSlice]: - if not self.parent_stream_configs: - yield StreamSlice(partition={}, cursor_slice={}) - else: - for parent_stream_config in self.parent_stream_configs: - - parent_stream = parent_stream_config.stream - parent_field = self._get_parent_field(parent_stream_config) - partition_field = self._get_partition_field(parent_stream_config) - - for parent_stream_slice in parent_stream.stream_slices( - sync_mode=SyncMode.full_refresh, cursor_field=None, stream_state=None - ): - - empty_parent_slice = True - parent_partition = parent_stream_slice.partition if parent_stream_slice else {} - - for parent_record in parent_stream.read_records( - sync_mode=SyncMode.full_refresh, cursor_field=None, stream_slice=parent_stream_slice, stream_state=None - ): - parent_record = self._parse_read_output(parent_record) - if not parent_record: - continue - - partition_value = self._get_partition_value(parent_record, parent_field) - empty_parent_slice = False if partition_value else True - - if not empty_parent_slice: - yield self._create_stream_slice(partition_field, partition_value, parent_partition) - - # If the parent slice contains no records, - if empty_parent_slice: - yield StreamSlice(partition={}, cursor_slice={}) diff --git a/airbyte-integrations/connectors/source-slack/source_slack/manifest.yaml b/airbyte-integrations/connectors/source-slack/source_slack/manifest.yaml index 1d1a0f3476fda..f90b9e8ed3d3e 100644 --- a/airbyte-integrations/connectors/source-slack/source_slack/manifest.yaml +++ b/airbyte-integrations/connectors/source-slack/source_slack/manifest.yaml @@ -242,64 +242,11 @@ definitions: - channel_id value: "{{ stream_partition.get('channel') }}" - threads_stream: - $ref: "#/definitions/stream_base" - $parameters: - name: threads - path: conversations.replies - field_path: messages - primary_key: - - channel_id - - ts - retriever: - type: SimpleRetriever - requester: - $ref: "#/definitions/requester" - request_parameters: - channel: "{{ stream_slice['parent_slice']['channel'] }}" - record_selector: - $ref: "#/definitions/selector" - paginator: - $ref: "#/definitions/default_paginator" - partition_router: - type: SubstreamPartitionRouter - parent_stream_configs: - - type: ParentStreamConfig - stream: - $ref: "#/definitions/channel_messages_stream" - parent_key: ts - partition_field: float_ts - request_option: - type: RequestOption - field_name: "ts" - inject_into: "request_parameter" - incremental_sync: - type: DatetimeBasedCursor - lookback_window: "P{{ config.get('lookback_window', 0) }}D" - cursor_field: "float_ts" - cursor_datetime_formats: - - "%s" - datetime_format: "%s" - start_datetime: - type: MinMaxDatetime - datetime: "{{ config['start_date'] }}" - datetime_format: "%Y-%m-%dT%H:%M:%SZ" - transformations: - - type: AddFields - fields: - - path: - - float_ts - value: "{{ record['ts']|float }}" - - path: - - channel_id - value: "{{ stream_slice['parent_slice']['channel'] }}" - streams: - "#/definitions/users_stream" - "#/definitions/channels_stream" - "#/definitions/channel_members_stream" - "#/definitions/channel_messages_stream" - - "#/definitions/threads_stream" check: type: CheckStream diff --git a/airbyte-integrations/connectors/source-slack/source_slack/source.py b/airbyte-integrations/connectors/source-slack/source_slack/source.py index ffc0fb532c4c1..b2c78a18eef69 100644 --- a/airbyte-integrations/connectors/source-slack/source_slack/source.py +++ b/airbyte-integrations/connectors/source-slack/source_slack/source.py @@ -2,9 +2,55 @@ # Copyright (c) 2023 Airbyte, Inc., all rights reserved. # +from typing import Any, List, Mapping + +import pendulum from airbyte_cdk.sources.declarative.yaml_declarative_source import YamlDeclarativeSource +from airbyte_cdk.sources.streams import Stream +from airbyte_cdk.sources.streams.http import HttpStream +from airbyte_cdk.sources.streams.http.requests_native_auth import TokenAuthenticator +from source_slack.streams import Threads class SourceSlack(YamlDeclarativeSource): def __init__(self): super().__init__(**{"path_to_yaml": "manifest.yaml"}) + + def _threads_authenticator(self, config: Mapping[str, Any]): + # Added to maintain backward compatibility with previous versions + if "api_token" in config: + return TokenAuthenticator(config["api_token"]) + + credentials = config.get("credentials", {}) + credentials_title = credentials.get("option_title") + if credentials_title == "Default OAuth2.0 authorization": + return TokenAuthenticator(credentials["access_token"]) + elif credentials_title == "API Token Credentials": + return TokenAuthenticator(credentials["api_token"]) + else: + raise Exception(f"No supported option_title: {credentials_title} specified. See spec.json for references") + + def get_threads_stream(self, config: Mapping[str, Any], channel_messages: Stream) -> HttpStream: + authenticator = self._threads_authenticator(config) + default_start_date = pendulum.parse(config["start_date"]) + # this field is not exposed to spec, used only for testing purposes + end_date = config.get("end_date") + end_date = end_date and pendulum.parse(end_date) + threads_lookback_window = pendulum.Duration(days=config["lookback_window"]) + threads = Threads( + authenticator=authenticator, + default_start_date=default_start_date, + end_date=end_date, + lookback_window=threads_lookback_window, + parent_stream=channel_messages, + ) + return threads + + def streams(self, config: Mapping[str, Any]) -> List[Stream]: + declarative_streams = super().streams(config) + + channel_messages = [stream for stream in declarative_streams if stream.name == "channel_messages"][0] + threads_stream = self.get_threads_stream(config, channel_messages) + declarative_streams.append(threads_stream) + + return declarative_streams diff --git a/airbyte-integrations/connectors/source-slack/source_slack/streams.py b/airbyte-integrations/connectors/source-slack/source_slack/streams.py new file mode 100644 index 0000000000000..a8eb9de546bb8 --- /dev/null +++ b/airbyte-integrations/connectors/source-slack/source_slack/streams.py @@ -0,0 +1,176 @@ +# +# Copyright (c) 2023 Airbyte, Inc., all rights reserved. +# +from abc import ABC, abstractmethod +from typing import Any, Iterable, Mapping, MutableMapping, Optional + +import pendulum +import requests +from airbyte_cdk.models import SyncMode +from airbyte_cdk.sources.streams.http import HttpStream +from pendulum import DateTime + + +class SlackStream(HttpStream, ABC): + url_base = "https://slack.com/api/" + primary_key = "id" + page_size = 1000 + + @property + def max_retries(self) -> int: + # Slack's rate limiting can be unpredictable so we increase the max number of retries by a lot before failing + return 20 + + def next_page_token(self, response: requests.Response) -> Optional[Mapping[str, Any]]: + """Slack uses a cursor-based pagination strategy. + Extract the cursor from the response if it exists and return it in a format + that can be used to update request parameters""" + + json_response = response.json() + next_cursor = json_response.get("response_metadata", {}).get("next_cursor") + if next_cursor: + return {"cursor": next_cursor} + + def request_params( + self, + stream_state: Mapping[str, Any], + stream_slice: Mapping[str, Any] = None, + next_page_token: Mapping[str, Any] = None, + ) -> MutableMapping[str, Any]: + params = {"limit": self.page_size} + if next_page_token: + params.update(**next_page_token) + return params + + def parse_response( + self, + response: requests.Response, + stream_state: Mapping[str, Any] = None, + stream_slice: Mapping[str, Any] = None, + next_page_token: Mapping[str, Any] = None, + ) -> Iterable[MutableMapping]: + json_response = response.json() + yield from json_response.get(self.data_field, []) + + def backoff_time(self, response: requests.Response) -> Optional[float]: + """This method is called if we run into the rate limit. + Slack puts the retry time in the `Retry-After` response header so we + we return that value. If the response is anything other than a 429 (e.g: 5XX) + fall back on default retry behavior. + Rate Limits Docs: https://api.slack.com/docs/rate-limits#web""" + + if "Retry-After" in response.headers: + return int(response.headers["Retry-After"]) + else: + self.logger.info("Retry-after header not found. Using default backoff value") + return 5 + + @property + @abstractmethod + def data_field(self) -> str: + """The name of the field in the response which contains the data""" + + def should_retry(self, response: requests.Response) -> bool: + return response.status_code == requests.codes.REQUEST_TIMEOUT or super().should_retry(response) + + +class IncrementalMessageStream(SlackStream, ABC): + data_field = "messages" + cursor_field = "float_ts" + primary_key = ["channel_id", "ts"] + + def __init__(self, default_start_date: DateTime, end_date: Optional[DateTime] = None, **kwargs): + self._start_ts = default_start_date.timestamp() + self._end_ts = end_date and end_date.timestamp() + self.set_sub_primary_key() + super().__init__(**kwargs) + + def set_sub_primary_key(self): + if isinstance(self.primary_key, list): + for index, value in enumerate(self.primary_key): + setattr(self, f"sub_primary_key_{index + 1}", value) + else: + self.logger.error("Failed during setting sub primary keys. Primary key should be list.") + + def request_params(self, stream_state: Mapping[str, Any], stream_slice: Mapping[str, Any] = None, **kwargs) -> MutableMapping[str, Any]: + params = super().request_params(stream_state=stream_state, stream_slice=stream_slice, **kwargs) + params.update(**stream_slice) + return params + + def parse_response(self, response: requests.Response, stream_slice: Mapping[str, Any] = None, **kwargs) -> Iterable[Mapping]: + for record in super().parse_response(response, **kwargs): + record[self.sub_primary_key_1] = stream_slice.get("channel", "") + record[self.cursor_field] = float(record[self.sub_primary_key_2]) + yield record + + def get_updated_state(self, current_stream_state: MutableMapping[str, Any], latest_record: Mapping[str, Any]) -> Mapping[str, Any]: + current_stream_state = current_stream_state or {} + current_stream_state[self.cursor_field] = max( + latest_record[self.cursor_field], current_stream_state.get(self.cursor_field, self._start_ts) + ) + + return current_stream_state + + +class Threads(IncrementalMessageStream): + def __init__(self, lookback_window: Mapping[str, int], parent_stream, **kwargs): + self.messages_lookback_window = lookback_window + self.parent_stream = parent_stream + super().__init__(**kwargs) + + def path(self, **kwargs) -> str: + return "conversations.replies" + + @property + def state(self) -> MutableMapping[str, Any]: + return self._state + + @state.setter + def state(self, value: MutableMapping[str, Any]) -> None: + self._state = value + + def stream_slices(self, stream_state: Mapping[str, Any] = None, **kwargs) -> Iterable[Optional[Mapping[str, Any]]]: + """ + The logic for incrementally syncing threads is not very obvious, so buckle up. + + To get all messages in a thread, one must specify the channel and timestamp of the parent (first) message of that thread, + basically its ID. + + One complication is that threads can be updated at Any time in the future. Therefore, if we wanted to comprehensively sync data + i.e: get every single response in a thread, we'd have to read every message in the slack instance every time we ran a sync, + because otherwise there is no way to guarantee that a thread deep in the past didn't receive a new message. + + A pragmatic workaround is to say we want threads to be at least N days fresh i.e: look back N days into the past, + get every message since, and read all of the thread responses. This is essentially the approach we're taking here via slicing: + create slices from N days into the past and read all messages in threads since then. We could optionally filter out records we have + already read, but that's omitted to keep the logic simple to reason about. + + Good luck. + """ + + stream_state = stream_state or {} + # channels_stream = Channels(authenticator=self._session.auth, channel_filter=self.channel_filter) + + if self.cursor_field in stream_state: + # Since new messages can be posted to threads continuously after the parent message has been posted, + # we get messages from the latest date + # found in the state minus X days to pick up any new messages in threads. + # If there is state always use lookback + messages_start_date = pendulum.from_timestamp(stream_state[self.cursor_field]) - self.messages_lookback_window + else: + # If there is no state i.e: this is the first sync then there is no use for lookback, just get messages + # from the default start date + messages_start_date = pendulum.from_timestamp(self._start_ts) + + slice_yielded = False + + for message_chunk in self.parent_stream.stream_slices(sync_mode=SyncMode.full_refresh): + self.logger.info(f"Syncing replies {message_chunk}") + for message in self.parent_stream.read_records(sync_mode=SyncMode.full_refresh, stream_slice=message_chunk): + # check if parent stream record timestamp >= state + if pendulum.from_timestamp(float(message[self.sub_primary_key_2])) >= messages_start_date: + yield {"channel": message_chunk["channel"], self.sub_primary_key_2: message[self.sub_primary_key_2]} + slice_yielded = True + if not slice_yielded: + # yield an empty slice to checkpoint state later + yield {} From 2537e03a2e81be252a54e67c638bdaa412bed1cf Mon Sep 17 00:00:00 2001 From: darynaishchenko Date: Tue, 2 Apr 2024 14:01:39 +0300 Subject: [PATCH 43/56] updated unit test --- .../unit_tests/test_components.py | 74 +-------- .../source-slack/unit_tests/test_source.py | 23 +++ .../source-slack/unit_tests/test_streams.py | 147 +++++++++++++++--- 3 files changed, 147 insertions(+), 97 deletions(-) diff --git a/airbyte-integrations/connectors/source-slack/unit_tests/test_components.py b/airbyte-integrations/connectors/source-slack/unit_tests/test_components.py index b7857670c3d8c..e40e700fe97b8 100644 --- a/airbyte-integrations/connectors/source-slack/unit_tests/test_components.py +++ b/airbyte-integrations/connectors/source-slack/unit_tests/test_components.py @@ -5,17 +5,12 @@ import pendulum import pytest from airbyte_cdk.sources.declarative.extractors import DpathExtractor, RecordSelector -from airbyte_cdk.sources.declarative.partition_routers.substream_partition_router import ParentStreamConfig -from airbyte_cdk.sources.declarative.requesters import HttpRequester, RequestOption -from airbyte_cdk.sources.declarative.requesters.request_option import RequestOptionType -from airbyte_cdk.sources.declarative.types import StreamSlice +from airbyte_cdk.sources.declarative.requesters import HttpRequester from airbyte_cdk.sources.streams.http.auth import TokenAuthenticator from airbyte_protocol.models import SyncMode -from freezegun import freeze_time from source_slack import SourceSlack from source_slack.components.channel_members_extractor import ChannelMembersExtractor from source_slack.components.join_channels import ChannelsRetriever, JoinChannelsStream -from source_slack.components.threads_partition_router import ThreadsPartitionRouter def get_stream_by_name(stream_name, config): @@ -39,73 +34,6 @@ def test_channel_members_extractor(token_config): {"member_id": "W012A3CDE"}] -def get_threads_partition_router(config): - channel_messages_stream = get_stream_by_name("channel_messages", config) - return ThreadsPartitionRouter( - config=config, - parameters={}, - parent_stream_configs=[ - ParentStreamConfig( - config=config, - stream=channel_messages_stream, - parent_key="ts", - partition_field="float_ts", - parameters={}, - request_option=RequestOption(field_name="ts", inject_into=RequestOptionType.request_parameter, parameters={}) - ), ] - ) - - -@freeze_time("2024-03-10T20:00:00Z", tz_offset=-2) -def test_threads_partition_router(token_config, requests_mock): - start_date = "2024-03-01T20:00:00Z" - end_date = pendulum.now() - oldest, latest = int(pendulum.parse(start_date).timestamp()), int(end_date.timestamp()) - token_config["start_date"] = start_date - for channel in token_config["channel_filter"]: - requests_mock.get( - url=f"https://slack.com/api/conversations.history?" - f"inclusive=True&limit=1000&channel={channel}&" - f"oldest={oldest}&latest={latest}", - json={"messages": [{"ts": latest}, {"ts": oldest}]} - ) - - router = get_threads_partition_router(token_config) - slices = router.stream_slices() - expected = [{"channel": "airbyte-for-beginners", "float_ts": latest}, - {"channel": "airbyte-for-beginners", "float_ts": oldest}, - {"channel": "good-reads", "float_ts": latest}, - {"channel": "good-reads", "float_ts": oldest}] - - assert list(slices) == expected - - -@pytest.mark.parametrize( - "stream_slice, stream_state, expected", - ( - ({}, {}, {}), - ( - {'float_ts': '1683104542.931169', 'channel': 'C04KX3KEZ54'}, - {'states': [{'partition': {'channel': 'C04KX3KEZ54', 'float_ts': '1683104542.931169'}, 'cursor': {'float_ts': 1683104568}}]}, - {} - ), - ( - {'float_ts': '1783104542.931169', 'channel': 'C04KX3KEZ54'}, - {'states': [{'partition': {'channel': 'C04KX3KEZ54', 'float_ts': '1683104542.931169'}, 'cursor': {'float_ts': 1683104568}}]}, - {'ts': '1783104542.931169'} - ), - ), - ids=[ - "empty_params_without_slice_and_state", - "empty_params_cursor_grater_then_slice_value", - "params_slice_value_greater_then_cursor_value"] -) -def test_threads_request_params(token_config, stream_slice, stream_state, expected): - router = get_threads_partition_router(token_config) - _slice = StreamSlice(partition=stream_slice, cursor_slice={}) - assert router.get_request_params(stream_slice=_slice, stream_state=stream_state) == expected - - def test_join_channels(token_config, requests_mock, joined_channel): mocked_request = requests_mock.post( url="https://slack.com/api/conversations.join", diff --git a/airbyte-integrations/connectors/source-slack/unit_tests/test_source.py b/airbyte-integrations/connectors/source-slack/unit_tests/test_source.py index dee2b6c556603..4e644c166c6cd 100644 --- a/airbyte-integrations/connectors/source-slack/unit_tests/test_source.py +++ b/airbyte-integrations/connectors/source-slack/unit_tests/test_source.py @@ -10,6 +10,13 @@ from .conftest import parametrized_configs +def get_stream_by_name(stream_name, config): + streams = SourceSlack().streams(config=config) + for stream in streams: + if stream.name == stream_name: + return stream + raise ValueError(f"Stream {stream_name} not found") + @parametrized_configs def test_streams(conversations_list, config, is_valid): source = SourceSlack() @@ -48,3 +55,19 @@ def test_check_connection(token_config, requests_mock, status_code, response, is assert success is is_connection_successful if not success: assert error_msg in error + + +def test_threads_auth(token_config, oauth_config): + source = SourceSlack() + auth = source._threads_authenticator(token_config) + assert auth.token == "Bearer api-token" + source = SourceSlack() + auth = source._threads_authenticator(oauth_config) + assert auth.token == "Bearer access-token" + + +def test_get_threads_stream(token_config): + source = SourceSlack() + channel_messages = get_stream_by_name("channel_messages", token_config) + threads_stream = source.get_threads_stream(token_config, channel_messages) + assert threads_stream diff --git a/airbyte-integrations/connectors/source-slack/unit_tests/test_streams.py b/airbyte-integrations/connectors/source-slack/unit_tests/test_streams.py index 8b504aaf84ba0..d679215363503 100644 --- a/airbyte-integrations/connectors/source-slack/unit_tests/test_streams.py +++ b/airbyte-integrations/connectors/source-slack/unit_tests/test_streams.py @@ -6,14 +6,13 @@ import pendulum import pytest from airbyte_cdk.sources.streams.http.requests_native_auth import TokenAuthenticator -from airbyte_protocol.models import SyncMode -from freezegun import freeze_time from source_slack import SourceSlack +from source_slack.streams import Threads @pytest.fixture -def authenticator(legacy_token_config): - return TokenAuthenticator(legacy_token_config["api_token"]) +def authenticator(token_config): + return TokenAuthenticator(token_config["credentials"]["api_token"]) def get_stream_by_name(stream_name, config): @@ -24,27 +23,127 @@ def get_stream_by_name(stream_name, config): raise ValueError(f"Stream {stream_name} not found") -@freeze_time("2024-03-10T20:00:00Z", tz_offset=-2) -def test_threads_stream_slices(requests_mock, token_config): - start_date = "2024-03-01T20:00:00Z" - end_date = pendulum.now() - oldest, latest = int(pendulum.parse(start_date).timestamp()), int(end_date.timestamp()) - token_config["start_date"] = start_date +@pytest.mark.parametrize( + "start_date, end_date, messages, stream_state, expected_result", + ( + ( + "2020-01-01T00:00:00Z", + "2020-01-02T00:00:00Z", + [{"ts": 1577866844}, {"ts": 1577877406}], + {}, + [ + # two messages per each channel + {'channel': 'airbyte-for-beginners', 'ts': 1577866844}, + {'channel': 'airbyte-for-beginners', 'ts': 1577877406}, + {'channel': 'good-reads', 'ts': 1577866844}, + {'channel': 'good-reads', 'ts': 1577877406}, + ], + ), + ("2020-01-02T00:00:00Z", "2020-01-01T00:00:00Z", [], {}, [{}]), + ( + "2020-01-01T00:00:00Z", + "2020-01-02T00:00:00Z", + [{"ts": 1577866844}, {"ts": 1577877406}], + {"float_ts": 2577866844}, + [ + # no slice when state greater than ts + {}, + ], + ), + ), +) +def test_threads_stream_slices( + requests_mock, authenticator, token_config, start_date, end_date, messages, stream_state, expected_result +): + token_config["channel_filter"] = [] - for channel in token_config["channel_filter"]: - requests_mock.get( - url=f"https://slack.com/api/conversations.history?" - f"inclusive=True&limit=1000&channel={channel}&" - f"oldest={oldest}&latest={latest}", - json={"messages": [{"ts": latest}, {"ts": oldest}]} - ) + requests_mock.register_uri( + "GET", "https://slack.com/api/conversations.history?inclusive=True&limit=1000&channel=airbyte-for-beginners", + [{"json": {"messages": messages}}, {"json": {"messages": []}}] + ) + requests_mock.register_uri( + "GET", "https://slack.com/api/conversations.history?inclusive=True&limit=1000&channel=good-reads", + [{"json": {"messages": messages}}, {"json": {"messages": []}}] + ) - threads_stream = get_stream_by_name("threads", token_config) - slices = threads_stream.stream_slices(stream_state=None, sync_mode=SyncMode.full_refresh) + start_date = pendulum.parse(start_date) + end_date = end_date and pendulum.parse(end_date) + + channel_messages_stream = get_stream_by_name("channel_messages", token_config) + + stream = Threads( + authenticator=authenticator, + default_start_date=start_date, + end_date=end_date, + lookback_window=pendulum.Duration(days=token_config["lookback_window"]), + parent_stream=channel_messages_stream + ) + slices = list(stream.stream_slices(stream_state=stream_state)) + assert slices == expected_result + + +@pytest.mark.parametrize( + "current_state, latest_record, expected_state", + ( + ({}, {"float_ts": 1507866844}, {"float_ts": 1626984000.0}), + ({}, {"float_ts": 1726984000}, {"float_ts": 1726984000.0}), + ({"float_ts": 1588866844}, {"float_ts": 1577866844}, {"float_ts": 1588866844}), + ({"float_ts": 1577800844}, {"float_ts": 1577866844}, {"float_ts": 1577866844}), + ), +) +def test_get_updated_state(authenticator, token_config, current_state, latest_record, expected_state): + channel_messages_stream = get_stream_by_name("channel_messages", token_config) + stream = Threads( + authenticator=authenticator, + default_start_date=pendulum.parse(token_config["start_date"]), + lookback_window=token_config["lookback_window"], + parent_stream=channel_messages_stream + ) + assert stream.get_updated_state(current_stream_state=current_state, latest_record=latest_record) == expected_state + + +def test_threads_request_params(authenticator, token_config): + channel_messages_stream = get_stream_by_name("channel_messages", token_config) + stream = Threads( + authenticator=authenticator, + default_start_date=pendulum.parse(token_config["start_date"]), + lookback_window=token_config["lookback_window"], + parent_stream=channel_messages_stream + ) + threads_slice = {'channel': 'airbyte-for-beginners', 'ts': 1577866844} + expected = {'channel': 'airbyte-for-beginners', 'limit': 1000, 'ts': 1577866844} + assert stream.request_params(stream_slice=threads_slice, stream_state={}) == expected + + +def test_threads_parse_response(mocker, authenticator, token_config): + channel_messages_stream = get_stream_by_name("channel_messages", token_config) + stream = Threads( + authenticator=authenticator, + default_start_date=pendulum.parse(token_config["start_date"]), + lookback_window=token_config["lookback_window"], + parent_stream=channel_messages_stream + ) + resp = { + "messages": [ + { + "type": "message", + "user": "U061F7AUR", + "text": "island", + "thread_ts": "1482960137.003543", + "reply_count": 3, + "subscribed": True, + "last_read": "1484678597.521003", + "unread_count": 0, + "ts": "1482960137.003543" + } + ] + } + resp_mock = mocker.Mock() + resp_mock.json.return_value = resp + threads_slice = {'channel': 'airbyte-for-beginners', 'ts': 1577866844} + actual_response = list(stream.parse_response(response=resp_mock,stream_slice=threads_slice)) + assert len(actual_response) == 1 + assert actual_response[0]["float_ts"] == 1482960137.003543 + assert actual_response[0]["channel_id"] == "airbyte-for-beginners" - expected = [{"float_ts": 1710093600, "channel": "airbyte-for-beginners", "start_time": "1709236800", "end_time": "1710093600"}, - {"float_ts": 1709323200, "channel": "airbyte-for-beginners", "start_time": "1709236800", "end_time": "1710093600"}, - {"float_ts": 1710093600, "channel": "good-reads", "start_time": "1709236800", "end_time": "1710093600"}, - {"float_ts": 1709323200, "channel": "good-reads", "start_time": "1709236800", "end_time": "1710093600"}] - assert list(slices) == expected From f23f55e4255f6a79f572050d5c64ea71d9e8f8c5 Mon Sep 17 00:00:00 2001 From: darynaishchenko Date: Tue, 2 Apr 2024 14:05:39 +0300 Subject: [PATCH 44/56] update poetry.lock --- airbyte-integrations/connectors/source-slack/poetry.lock | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/airbyte-integrations/connectors/source-slack/poetry.lock b/airbyte-integrations/connectors/source-slack/poetry.lock index 7118def6ca97e..e0eaa095b7819 100644 --- a/airbyte-integrations/connectors/source-slack/poetry.lock +++ b/airbyte-integrations/connectors/source-slack/poetry.lock @@ -2,13 +2,13 @@ [[package]] name = "airbyte-cdk" -version = "0.78.1" +version = "0.78.3" description = "A framework for writing Airbyte Connectors." optional = false python-versions = "<4.0,>=3.9" files = [ - {file = "airbyte_cdk-0.78.1-py3-none-any.whl", hash = "sha256:73dfc03e55a7107bf28b5bbc4e43572d448c60e9b34368d22cf48b6536aa2263"}, - {file = "airbyte_cdk-0.78.1.tar.gz", hash = "sha256:700e5526ae29db1e453b3def8682726f7d8aa653ee2f3056488d0a484f055133"}, + {file = "airbyte_cdk-0.78.3-py3-none-any.whl", hash = "sha256:699d61ace9f8ca4477e06af3ff1bc56856e955a444081a1701c41d94629dcd74"}, + {file = "airbyte_cdk-0.78.3.tar.gz", hash = "sha256:192c2594d0e93140a7ec635fea3d4644318faada6aa986805752adf4caf9b126"}, ] [package.dependencies] @@ -32,7 +32,7 @@ requests_cache = "*" wcmatch = "8.4" [package.extras] -file-based = ["avro (>=1.11.2,<1.12.0)", "fastavro (>=1.8.0,<1.9.0)", "markdown", "pyarrow (>=15.0.0,<15.1.0)", "pytesseract (==0.3.10)", "unstructured.pytesseract (>=0.3.12)", "unstructured[docx,pptx] (==0.10.27)"] +file-based = ["avro (>=1.11.2,<1.12.0)", "fastavro (>=1.8.0,<1.9.0)", "markdown", "pdf2image (==1.16.3)", "pdfminer.six (==20221105)", "pyarrow (>=15.0.0,<15.1.0)", "pytesseract (==0.3.10)", "unstructured.pytesseract (>=0.3.12)", "unstructured[docx,pptx] (==0.10.27)"] sphinx-docs = ["Sphinx (>=4.2,<4.3)", "sphinx-rtd-theme (>=1.0,<1.1)"] vector-db-based = ["cohere (==4.21)", "langchain (==0.0.271)", "openai[embeddings] (==0.27.9)", "tiktoken (==0.4.0)"] From 37de0b855d3d60dfdffdeda74e897f139071db63 Mon Sep 17 00:00:00 2001 From: darynaishchenko Date: Tue, 2 Apr 2024 14:53:22 +0300 Subject: [PATCH 45/56] refactor code+updated expected records --- .../source-slack/integration_tests/expected_records.jsonl | 7 ++++--- .../connectors/source-slack/unit_tests/conftest.py | 1 - .../connectors/source-slack/unit_tests/test_streams.py | 2 -- 3 files changed, 4 insertions(+), 6 deletions(-) diff --git a/airbyte-integrations/connectors/source-slack/integration_tests/expected_records.jsonl b/airbyte-integrations/connectors/source-slack/integration_tests/expected_records.jsonl index 1888ecc643f0e..6ed8e6208f04e 100644 --- a/airbyte-integrations/connectors/source-slack/integration_tests/expected_records.jsonl +++ b/airbyte-integrations/connectors/source-slack/integration_tests/expected_records.jsonl @@ -8,9 +8,10 @@ {"stream": "channel_messages", "data": {"user": "U04L65GPMKN", "type": "message", "ts": "1683104499.808709", "client_msg_id": "e27672c0-451e-42a6-8eff-a14d2db8ac1e", "text": "Test Thread 1", "team": "T04KX3KDDU6", "thread_ts": "1683104499.808709", "reply_count": 2, "reply_users_count": 1, "latest_reply": "1683104528.084359", "reply_users": ["U04L65GPMKN"], "is_locked": false, "subscribed": true, "last_read": "1683104528.084359", "blocks": [{"type": "rich_text", "block_id": "0j7", "elements": [{"type": "rich_text_section", "elements": [{"type": "text", "text": "Test Thread 1"}]}]}], "float_ts": 1683104499, "channel_id": "C04LTCM2Y56"}, "emitted_at": 1711041522765} {"stream": "channel_messages", "data": {"user": "USLACKBOT", "type": "message", "ts": "1695880827.186049", "bot_id": "B01", "text": "Reminder: test reminder.", "team": "T04KX3KDDU6", "blocks": [{"type": "rich_text", "block_id": "BGzX", "elements": [{"type": "rich_text_section", "elements": [{"type": "text", "text": "Reminder: test reminder."}]}]}], "float_ts": 1695880827, "channel_id": "C04LTCM2Y56"}, "emitted_at": 1711041523073} {"stream": "channel_messages", "data": {"subtype": "reminder_add", "user": "U04L65GPMKN", "type": "message", "ts": "1695814864.744249", "text": " set up a reminder \u201ctest reminder\u201d in this channel at 9AM tomorrow, Eastern European Summer Time.", "float_ts": 1695814864, "channel_id": "C04LTCM2Y56"}, "emitted_at": 1711041523080} -{"stream": "threads", "data": {"user": "U04L65GPMKN", "type": "message", "ts": "1683104528.084359", "client_msg_id": "ffccbb24-8dd6-476d-87bf-65e5fa033cb9", "text": "<@U04M23SBJGM> test test test", "team": "T04KX3KDDU6", "thread_ts": "1683104499.808709", "parent_user_id": "U04L65GPMKN", "blocks": [{"type": "rich_text", "block_id": "Lvl", "elements": [{"type": "rich_text_section", "elements": [{"type": "user", "user_id": "U04M23SBJGM"}, {"type": "text", "text": " test test test"}]}]}], "float_ts": 1683104528.084359, "channel_id": "C04LTCM2Y56"}, "emitted_at": 1710778305077} -{"stream": "threads", "data": {"user": "USLACKBOT", "type": "message", "ts": "1695880827.186049", "bot_id": "B01", "text": "Reminder: test reminder.", "team": "T04KX3KDDU6", "blocks": [{"type": "rich_text", "block_id": "BGzX", "elements": [{"type": "rich_text_section", "elements": [{"type": "text", "text": "Reminder: test reminder."}]}]}], "float_ts": 1695880827.186049, "channel_id": "C04LTCM2Y56"}, "emitted_at": 1710778305751} -{"stream": "threads", "data": {"subtype": "reminder_add", "user": "U04L65GPMKN", "type": "message", "ts": "1695814864.744249", "text": " set up a reminder \u201ctest reminder\u201d in this channel at 9AM tomorrow, Eastern European Summer Time.", "float_ts": 1695814864.744249, "channel_id": "C04LTCM2Y56"}, "emitted_at": 1710778305981} +{"stream": "threads", "data": {"user": "U04L65GPMKN", "type": "message", "ts": "1683104542.931169", "client_msg_id": "3ae60d35-58b8-441c-923a-75de35a4ed8a", "text": "Test Thread 2", "team": "T04KX3KDDU6", "thread_ts": "1683104542.931169", "reply_count": 2, "reply_users_count": 1, "latest_reply": "1683104568.059569", "reply_users": ["U04L65GPMKN"], "is_locked": false, "subscribed": true, "last_read": "1683104568.059569", "blocks": [{"type": "rich_text", "block_id": "WLB", "elements": [{"type": "rich_text_section", "elements": [{"type": "text", "text": "Test Thread 2"}]}]}], "channel_id": "C04KX3KEZ54", "float_ts": 1683104542.931169}, "emitted_at": 1712056304168} +{"stream": "threads", "data": {"user": "U04L65GPMKN", "type": "message", "ts": "1683104559.922849", "client_msg_id": "3e96d351-270c-493f-a1a0-fdc3c4c0e11f", "text": "<@U04M23SBJGM> test test test", "team": "T04KX3KDDU6", "thread_ts": "1683104542.931169", "parent_user_id": "U04L65GPMKN", "blocks": [{"type": "rich_text", "block_id": "tX6vr", "elements": [{"type": "rich_text_section", "elements": [{"type": "user", "user_id": "U04M23SBJGM"}, {"type": "text", "text": " test test test"}]}]}], "channel_id": "C04KX3KEZ54", "float_ts": 1683104559.922849}, "emitted_at": 1712056304169} +{"stream": "threads", "data": {"user": "U04L65GPMKN", "type": "message", "ts": "1683104568.059569", "client_msg_id": "08023e44-9d18-41ed-81dd-5f04ed699656", "text": "<@U04LY6NARHU> test test", "team": "T04KX3KDDU6", "thread_ts": "1683104542.931169", "parent_user_id": "U04L65GPMKN", "blocks": [{"type": "rich_text", "block_id": "IyUF", "elements": [{"type": "rich_text_section", "elements": [{"type": "user", "user_id": "U04LY6NARHU"}, {"type": "text", "text": " test test"}]}]}], "channel_id": "C04KX3KEZ54", "float_ts": 1683104568.059569}, "emitted_at": 1712056304169} +{"stream": "threads", "data": {"user": "USLACKBOT", "type": "message", "ts": "1695880827.186049", "bot_id": "B01", "text": "Reminder: test reminder.", "team": "T04KX3KDDU6", "blocks": [{"type": "rich_text", "block_id": "BGzX", "elements": [{"type": "rich_text_section", "elements": [{"type": "text", "text": "Reminder: test reminder."}]}]}], "channel_id": "C04LTCM2Y56", "float_ts": 1695880827.186049}, "emitted_at": 1712056304703} {"stream": "users", "data": {"id": "USLACKBOT", "team_id": "T04KX3KDDU6", "name": "slackbot", "deleted": false, "color": "757575", "real_name": "Slackbot", "tz": "America/Los_Angeles", "tz_label": "Pacific Daylight Time", "tz_offset": -25200, "profile": {"title": "", "phone": "", "skype": "", "real_name": "Slackbot", "real_name_normalized": "Slackbot", "display_name": "Slackbot", "display_name_normalized": "Slackbot", "fields": {}, "status_text": "", "status_emoji": "", "status_emoji_display_info": [], "status_expiration": 0, "avatar_hash": "sv41d8cd98f0", "always_active": true, "first_name": "slackbot", "last_name": "", "image_24": "https://a.slack-edge.com/80588/img/slackbot_24.png", "image_32": "https://a.slack-edge.com/80588/img/slackbot_32.png", "image_48": "https://a.slack-edge.com/80588/img/slackbot_48.png", "image_72": "https://a.slack-edge.com/80588/img/slackbot_72.png", "image_192": "https://a.slack-edge.com/80588/marketing/img/avatars/slackbot/avatar-slackbot.png", "image_512": "https://a.slack-edge.com/80588/img/slackbot_512.png", "status_text_canonical": "", "team": "T04KX3KDDU6"}, "is_admin": false, "is_owner": false, "is_primary_owner": false, "is_restricted": false, "is_ultra_restricted": false, "is_bot": false, "is_app_user": false, "updated": 0, "is_email_confirmed": false, "who_can_share_contact_card": "EVERYONE"}, "emitted_at": 1710501138877} {"stream": "users", "data": {"id": "U04KUMXNYMV", "team_id": "T04KX3KDDU6", "name": "deactivateduser693438", "deleted": true, "profile": {"title": "", "phone": "", "skype": "", "real_name": "Deactivated User", "real_name_normalized": "Deactivated User", "display_name": "deactivateduser", "display_name_normalized": "deactivateduser", "fields": null, "status_text": "", "status_emoji": "", "status_emoji_display_info": [], "status_expiration": 0, "avatar_hash": "g849cc56ed76", "huddle_state": "default_unset", "first_name": "Deactivated", "last_name": "User", "image_24": "https://secure.gravatar.com/avatar/d5320ceddda202563fd9e6222c07c00a.jpg?s=24&d=https%3A%2F%2Fa.slack-edge.com%2Fdf10d%2Fimg%2Favatars%2Fava_0011-24.png", "image_32": "https://secure.gravatar.com/avatar/d5320ceddda202563fd9e6222c07c00a.jpg?s=32&d=https%3A%2F%2Fa.slack-edge.com%2Fdf10d%2Fimg%2Favatars%2Fava_0011-32.png", "image_48": "https://secure.gravatar.com/avatar/d5320ceddda202563fd9e6222c07c00a.jpg?s=48&d=https%3A%2F%2Fa.slack-edge.com%2Fdf10d%2Fimg%2Favatars%2Fava_0011-48.png", "image_72": "https://secure.gravatar.com/avatar/d5320ceddda202563fd9e6222c07c00a.jpg?s=72&d=https%3A%2F%2Fa.slack-edge.com%2Fdf10d%2Fimg%2Favatars%2Fava_0011-72.png", "image_192": "https://secure.gravatar.com/avatar/d5320ceddda202563fd9e6222c07c00a.jpg?s=192&d=https%3A%2F%2Fa.slack-edge.com%2Fdf10d%2Fimg%2Favatars%2Fava_0011-192.png", "image_512": "https://secure.gravatar.com/avatar/d5320ceddda202563fd9e6222c07c00a.jpg?s=512&d=https%3A%2F%2Fa.slack-edge.com%2Fdf10d%2Fimg%2Favatars%2Fava_0011-512.png", "status_text_canonical": "", "team": "T04KX3KDDU6"}, "is_bot": false, "is_app_user": false, "updated": 1675090804, "is_forgotten": true, "is_invited_user": true}, "emitted_at": 1710501138879} {"stream": "users", "data": {"id": "U04L2KY5CES", "team_id": "T04KX3KDDU6", "name": "deactivateduser686066", "deleted": true, "profile": {"title": "", "phone": "", "skype": "", "real_name": "Deactivated User", "real_name_normalized": "Deactivated User", "display_name": "deactivateduser", "display_name_normalized": "deactivateduser", "fields": null, "status_text": "", "status_emoji": "", "status_emoji_display_info": [], "status_expiration": 0, "avatar_hash": "g849cc56ed76", "huddle_state": "default_unset", "first_name": "Deactivated", "last_name": "User", "image_24": "https://secure.gravatar.com/avatar/cacb225265b3b19c4e72029a62cf1ef1.jpg?s=24&d=https%3A%2F%2Fa.slack-edge.com%2Fdf10d%2Fimg%2Favatars%2Fava_0009-24.png", "image_32": "https://secure.gravatar.com/avatar/cacb225265b3b19c4e72029a62cf1ef1.jpg?s=32&d=https%3A%2F%2Fa.slack-edge.com%2Fdf10d%2Fimg%2Favatars%2Fava_0009-32.png", "image_48": "https://secure.gravatar.com/avatar/cacb225265b3b19c4e72029a62cf1ef1.jpg?s=48&d=https%3A%2F%2Fa.slack-edge.com%2Fdf10d%2Fimg%2Favatars%2Fava_0009-48.png", "image_72": "https://secure.gravatar.com/avatar/cacb225265b3b19c4e72029a62cf1ef1.jpg?s=72&d=https%3A%2F%2Fa.slack-edge.com%2Fdf10d%2Fimg%2Favatars%2Fava_0009-72.png", "image_192": "https://secure.gravatar.com/avatar/cacb225265b3b19c4e72029a62cf1ef1.jpg?s=192&d=https%3A%2F%2Fa.slack-edge.com%2Fdf10d%2Fimg%2Favatars%2Fava_0009-192.png", "image_512": "https://secure.gravatar.com/avatar/cacb225265b3b19c4e72029a62cf1ef1.jpg?s=512&d=https%3A%2F%2Fa.slack-edge.com%2Fdf10d%2Fimg%2Favatars%2Fava_0009-512.png", "status_text_canonical": "", "team": "T04KX3KDDU6"}, "is_bot": false, "is_app_user": false, "updated": 1675090785, "is_forgotten": true, "is_invited_user": true}, "emitted_at": 1710501138881} diff --git a/airbyte-integrations/connectors/source-slack/unit_tests/conftest.py b/airbyte-integrations/connectors/source-slack/unit_tests/conftest.py index 94897b9c22f5d..002a9ec96779d 100644 --- a/airbyte-integrations/connectors/source-slack/unit_tests/conftest.py +++ b/airbyte-integrations/connectors/source-slack/unit_tests/conftest.py @@ -91,7 +91,6 @@ def invalid_config() -> MutableMapping: ( (_token_config(), True), (_oauth_config(), True), - # (_legacy_token_config(), True), (_invalid_config(), False), ), ) diff --git a/airbyte-integrations/connectors/source-slack/unit_tests/test_streams.py b/airbyte-integrations/connectors/source-slack/unit_tests/test_streams.py index d679215363503..24b2b50a6e661 100644 --- a/airbyte-integrations/connectors/source-slack/unit_tests/test_streams.py +++ b/airbyte-integrations/connectors/source-slack/unit_tests/test_streams.py @@ -145,5 +145,3 @@ def test_threads_parse_response(mocker, authenticator, token_config): assert len(actual_response) == 1 assert actual_response[0]["float_ts"] == 1482960137.003543 assert actual_response[0]["channel_id"] == "airbyte-for-beginners" - - From 8f967acd9937b2921d7e3e5bb889ad9e65070f25 Mon Sep 17 00:00:00 2001 From: darynaishchenko Date: Thu, 4 Apr 2024 19:54:22 +0300 Subject: [PATCH 46/56] format fix --- airbyte-integrations/connectors/source-slack/.coveragerc | 2 +- .../source-slack/integration_tests/abnormal_state.json | 4 +--- 2 files changed, 2 insertions(+), 4 deletions(-) diff --git a/airbyte-integrations/connectors/source-slack/.coveragerc b/airbyte-integrations/connectors/source-slack/.coveragerc index df94ba63f945b..ce32301144712 100644 --- a/airbyte-integrations/connectors/source-slack/.coveragerc +++ b/airbyte-integrations/connectors/source-slack/.coveragerc @@ -1,3 +1,3 @@ [run] omit = - source_slack/run.py \ No newline at end of file + source_slack/run.py diff --git a/airbyte-integrations/connectors/source-slack/integration_tests/abnormal_state.json b/airbyte-integrations/connectors/source-slack/integration_tests/abnormal_state.json index 65b0e9d4d90f8..104b5856e0748 100644 --- a/airbyte-integrations/connectors/source-slack/integration_tests/abnormal_state.json +++ b/airbyte-integrations/connectors/source-slack/integration_tests/abnormal_state.json @@ -2,9 +2,7 @@ { "type": "STREAM", "stream": { - "stream_state": { - "float_ts": 7270247822 - }, + "stream_state": { "float_ts": 7270247822 }, "stream_descriptor": { "name": "threads" } } }, From 9b485892c7de90bd5315ee102c0e619c1982a425 Mon Sep 17 00:00:00 2001 From: darynaishchenko Date: Thu, 4 Apr 2024 19:54:45 +0300 Subject: [PATCH 47/56] updated migration guide --- .../connectors/source-slack/metadata.yaml | 2 +- .../connectors/source-slack/poetry.lock | 80 +++++++++---------- docs/integrations/sources/slack-migrations.md | 4 +- 3 files changed, 43 insertions(+), 43 deletions(-) diff --git a/airbyte-integrations/connectors/source-slack/metadata.yaml b/airbyte-integrations/connectors/source-slack/metadata.yaml index cce96feb08546..51cbe9235053e 100644 --- a/airbyte-integrations/connectors/source-slack/metadata.yaml +++ b/airbyte-integrations/connectors/source-slack/metadata.yaml @@ -33,7 +33,7 @@ data: message: The source slack connector is being migrated from the Python CDK to our declarative low-code CDK. Due to changes in the handling of state format for incremental substreams, this migration constitutes a breaking change. - After updating, please reset your source before resuming syncs. For more information, see our migration documentation for source . + After updating, please reset your source before resuming syncs. For more information, see our migration documentation for Source Slack. upgradeDeadline: "2024-04-19" # TODO: update this date before merge suggestedStreams: streams: diff --git a/airbyte-integrations/connectors/source-slack/poetry.lock b/airbyte-integrations/connectors/source-slack/poetry.lock index e0eaa095b7819..d2cbc846a2369 100644 --- a/airbyte-integrations/connectors/source-slack/poetry.lock +++ b/airbyte-integrations/connectors/source-slack/poetry.lock @@ -2,13 +2,13 @@ [[package]] name = "airbyte-cdk" -version = "0.78.3" +version = "0.78.6" description = "A framework for writing Airbyte Connectors." optional = false python-versions = "<4.0,>=3.9" files = [ - {file = "airbyte_cdk-0.78.3-py3-none-any.whl", hash = "sha256:699d61ace9f8ca4477e06af3ff1bc56856e955a444081a1701c41d94629dcd74"}, - {file = "airbyte_cdk-0.78.3.tar.gz", hash = "sha256:192c2594d0e93140a7ec635fea3d4644318faada6aa986805752adf4caf9b126"}, + {file = "airbyte_cdk-0.78.6-py3-none-any.whl", hash = "sha256:e5f44c6da6d5b5d6f3f6a7f41a3f4a5e2dfc6fefb4c6823af6302c34c6fb4a87"}, + {file = "airbyte_cdk-0.78.6.tar.gz", hash = "sha256:0178f3cefa705f600d51f09e1313024a89cd1c99f2f1f796e8e0181d8e02ad2f"}, ] [package.dependencies] @@ -566,47 +566,47 @@ files = [ [[package]] name = "pydantic" -version = "1.10.14" +version = "1.10.15" description = "Data validation and settings management using python type hints" optional = false python-versions = ">=3.7" files = [ - {file = "pydantic-1.10.14-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:7f4fcec873f90537c382840f330b90f4715eebc2bc9925f04cb92de593eae054"}, - {file = "pydantic-1.10.14-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:8e3a76f571970fcd3c43ad982daf936ae39b3e90b8a2e96c04113a369869dc87"}, - {file = "pydantic-1.10.14-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:82d886bd3c3fbeaa963692ef6b643159ccb4b4cefaf7ff1617720cbead04fd1d"}, - {file = "pydantic-1.10.14-cp310-cp310-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:798a3d05ee3b71967844a1164fd5bdb8c22c6d674f26274e78b9f29d81770c4e"}, - {file = "pydantic-1.10.14-cp310-cp310-musllinux_1_1_i686.whl", hash = "sha256:23d47a4b57a38e8652bcab15a658fdb13c785b9ce217cc3a729504ab4e1d6bc9"}, - {file = "pydantic-1.10.14-cp310-cp310-musllinux_1_1_x86_64.whl", hash = "sha256:f9f674b5c3bebc2eba401de64f29948ae1e646ba2735f884d1594c5f675d6f2a"}, - {file = "pydantic-1.10.14-cp310-cp310-win_amd64.whl", hash = "sha256:24a7679fab2e0eeedb5a8924fc4a694b3bcaac7d305aeeac72dd7d4e05ecbebf"}, - {file = "pydantic-1.10.14-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:9d578ac4bf7fdf10ce14caba6f734c178379bd35c486c6deb6f49006e1ba78a7"}, - {file = "pydantic-1.10.14-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:fa7790e94c60f809c95602a26d906eba01a0abee9cc24150e4ce2189352deb1b"}, - {file = "pydantic-1.10.14-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:aad4e10efa5474ed1a611b6d7f0d130f4aafadceb73c11d9e72823e8f508e663"}, - {file = "pydantic-1.10.14-cp311-cp311-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:1245f4f61f467cb3dfeced2b119afef3db386aec3d24a22a1de08c65038b255f"}, - {file = "pydantic-1.10.14-cp311-cp311-musllinux_1_1_i686.whl", hash = "sha256:21efacc678a11114c765eb52ec0db62edffa89e9a562a94cbf8fa10b5db5c046"}, - {file = "pydantic-1.10.14-cp311-cp311-musllinux_1_1_x86_64.whl", hash = "sha256:412ab4a3f6dbd2bf18aefa9f79c7cca23744846b31f1d6555c2ee2b05a2e14ca"}, - {file = "pydantic-1.10.14-cp311-cp311-win_amd64.whl", hash = "sha256:e897c9f35281f7889873a3e6d6b69aa1447ceb024e8495a5f0d02ecd17742a7f"}, - {file = "pydantic-1.10.14-cp37-cp37m-macosx_10_9_x86_64.whl", hash = "sha256:d604be0f0b44d473e54fdcb12302495fe0467c56509a2f80483476f3ba92b33c"}, - {file = "pydantic-1.10.14-cp37-cp37m-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:a42c7d17706911199798d4c464b352e640cab4351efe69c2267823d619a937e5"}, - {file = "pydantic-1.10.14-cp37-cp37m-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:596f12a1085e38dbda5cbb874d0973303e34227b400b6414782bf205cc14940c"}, - {file = "pydantic-1.10.14-cp37-cp37m-musllinux_1_1_i686.whl", hash = "sha256:bfb113860e9288d0886e3b9e49d9cf4a9d48b441f52ded7d96db7819028514cc"}, - {file = "pydantic-1.10.14-cp37-cp37m-musllinux_1_1_x86_64.whl", hash = "sha256:bc3ed06ab13660b565eed80887fcfbc0070f0aa0691fbb351657041d3e874efe"}, - {file = "pydantic-1.10.14-cp37-cp37m-win_amd64.whl", hash = "sha256:ad8c2bc677ae5f6dbd3cf92f2c7dc613507eafe8f71719727cbc0a7dec9a8c01"}, - {file = "pydantic-1.10.14-cp38-cp38-macosx_10_9_x86_64.whl", hash = "sha256:c37c28449752bb1f47975d22ef2882d70513c546f8f37201e0fec3a97b816eee"}, - {file = "pydantic-1.10.14-cp38-cp38-macosx_11_0_arm64.whl", hash = "sha256:49a46a0994dd551ec051986806122767cf144b9702e31d47f6d493c336462597"}, - {file = "pydantic-1.10.14-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:53e3819bd20a42470d6dd0fe7fc1c121c92247bca104ce608e609b59bc7a77ee"}, - {file = "pydantic-1.10.14-cp38-cp38-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:0fbb503bbbbab0c588ed3cd21975a1d0d4163b87e360fec17a792f7d8c4ff29f"}, - {file = "pydantic-1.10.14-cp38-cp38-musllinux_1_1_i686.whl", hash = "sha256:336709883c15c050b9c55a63d6c7ff09be883dbc17805d2b063395dd9d9d0022"}, - {file = "pydantic-1.10.14-cp38-cp38-musllinux_1_1_x86_64.whl", hash = "sha256:4ae57b4d8e3312d486e2498d42aed3ece7b51848336964e43abbf9671584e67f"}, - {file = "pydantic-1.10.14-cp38-cp38-win_amd64.whl", hash = "sha256:dba49d52500c35cfec0b28aa8b3ea5c37c9df183ffc7210b10ff2a415c125c4a"}, - {file = "pydantic-1.10.14-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:c66609e138c31cba607d8e2a7b6a5dc38979a06c900815495b2d90ce6ded35b4"}, - {file = "pydantic-1.10.14-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:d986e115e0b39604b9eee3507987368ff8148222da213cd38c359f6f57b3b347"}, - {file = "pydantic-1.10.14-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:646b2b12df4295b4c3148850c85bff29ef6d0d9621a8d091e98094871a62e5c7"}, - {file = "pydantic-1.10.14-cp39-cp39-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:282613a5969c47c83a8710cc8bfd1e70c9223feb76566f74683af889faadc0ea"}, - {file = "pydantic-1.10.14-cp39-cp39-musllinux_1_1_i686.whl", hash = "sha256:466669501d08ad8eb3c4fecd991c5e793c4e0bbd62299d05111d4f827cded64f"}, - {file = "pydantic-1.10.14-cp39-cp39-musllinux_1_1_x86_64.whl", hash = "sha256:13e86a19dca96373dcf3190fcb8797d40a6f12f154a244a8d1e8e03b8f280593"}, - {file = "pydantic-1.10.14-cp39-cp39-win_amd64.whl", hash = "sha256:08b6ec0917c30861e3fe71a93be1648a2aa4f62f866142ba21670b24444d7fd8"}, - {file = "pydantic-1.10.14-py3-none-any.whl", hash = "sha256:8ee853cd12ac2ddbf0ecbac1c289f95882b2d4482258048079d13be700aa114c"}, - {file = "pydantic-1.10.14.tar.gz", hash = "sha256:46f17b832fe27de7850896f3afee50ea682220dd218f7e9c88d436788419dca6"}, + {file = "pydantic-1.10.15-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:22ed12ee588b1df028a2aa5d66f07bf8f8b4c8579c2e96d5a9c1f96b77f3bb55"}, + {file = "pydantic-1.10.15-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:75279d3cac98186b6ebc2597b06bcbc7244744f6b0b44a23e4ef01e5683cc0d2"}, + {file = "pydantic-1.10.15-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:50f1666a9940d3d68683c9d96e39640f709d7a72ff8702987dab1761036206bb"}, + {file = "pydantic-1.10.15-cp310-cp310-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:82790d4753ee5d00739d6cb5cf56bceb186d9d6ce134aca3ba7befb1eedbc2c8"}, + {file = "pydantic-1.10.15-cp310-cp310-musllinux_1_1_i686.whl", hash = "sha256:d207d5b87f6cbefbdb1198154292faee8017d7495a54ae58db06762004500d00"}, + {file = "pydantic-1.10.15-cp310-cp310-musllinux_1_1_x86_64.whl", hash = "sha256:e49db944fad339b2ccb80128ffd3f8af076f9f287197a480bf1e4ca053a866f0"}, + {file = "pydantic-1.10.15-cp310-cp310-win_amd64.whl", hash = "sha256:d3b5c4cbd0c9cb61bbbb19ce335e1f8ab87a811f6d589ed52b0254cf585d709c"}, + {file = "pydantic-1.10.15-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:c3d5731a120752248844676bf92f25a12f6e45425e63ce22e0849297a093b5b0"}, + {file = "pydantic-1.10.15-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:c365ad9c394f9eeffcb30a82f4246c0006417f03a7c0f8315d6211f25f7cb654"}, + {file = "pydantic-1.10.15-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:3287e1614393119c67bd4404f46e33ae3be3ed4cd10360b48d0a4459f420c6a3"}, + {file = "pydantic-1.10.15-cp311-cp311-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:be51dd2c8596b25fe43c0a4a59c2bee4f18d88efb8031188f9e7ddc6b469cf44"}, + {file = "pydantic-1.10.15-cp311-cp311-musllinux_1_1_i686.whl", hash = "sha256:6a51a1dd4aa7b3f1317f65493a182d3cff708385327c1c82c81e4a9d6d65b2e4"}, + {file = "pydantic-1.10.15-cp311-cp311-musllinux_1_1_x86_64.whl", hash = "sha256:4e316e54b5775d1eb59187f9290aeb38acf620e10f7fd2f776d97bb788199e53"}, + {file = "pydantic-1.10.15-cp311-cp311-win_amd64.whl", hash = "sha256:0d142fa1b8f2f0ae11ddd5e3e317dcac060b951d605fda26ca9b234b92214986"}, + {file = "pydantic-1.10.15-cp37-cp37m-macosx_10_9_x86_64.whl", hash = "sha256:7ea210336b891f5ea334f8fc9f8f862b87acd5d4a0cbc9e3e208e7aa1775dabf"}, + {file = "pydantic-1.10.15-cp37-cp37m-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:3453685ccd7140715e05f2193d64030101eaad26076fad4e246c1cc97e1bb30d"}, + {file = "pydantic-1.10.15-cp37-cp37m-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:9bea1f03b8d4e8e86702c918ccfd5d947ac268f0f0cc6ed71782e4b09353b26f"}, + {file = "pydantic-1.10.15-cp37-cp37m-musllinux_1_1_i686.whl", hash = "sha256:005655cabc29081de8243126e036f2065bd7ea5b9dff95fde6d2c642d39755de"}, + {file = "pydantic-1.10.15-cp37-cp37m-musllinux_1_1_x86_64.whl", hash = "sha256:af9850d98fc21e5bc24ea9e35dd80a29faf6462c608728a110c0a30b595e58b7"}, + {file = "pydantic-1.10.15-cp37-cp37m-win_amd64.whl", hash = "sha256:d31ee5b14a82c9afe2bd26aaa405293d4237d0591527d9129ce36e58f19f95c1"}, + {file = "pydantic-1.10.15-cp38-cp38-macosx_10_9_x86_64.whl", hash = "sha256:5e09c19df304b8123938dc3c53d3d3be6ec74b9d7d0d80f4f4b5432ae16c2022"}, + {file = "pydantic-1.10.15-cp38-cp38-macosx_11_0_arm64.whl", hash = "sha256:7ac9237cd62947db00a0d16acf2f3e00d1ae9d3bd602b9c415f93e7a9fc10528"}, + {file = "pydantic-1.10.15-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:584f2d4c98ffec420e02305cf675857bae03c9d617fcfdc34946b1160213a948"}, + {file = "pydantic-1.10.15-cp38-cp38-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:bbc6989fad0c030bd70a0b6f626f98a862224bc2b1e36bfc531ea2facc0a340c"}, + {file = "pydantic-1.10.15-cp38-cp38-musllinux_1_1_i686.whl", hash = "sha256:d573082c6ef99336f2cb5b667b781d2f776d4af311574fb53d908517ba523c22"}, + {file = "pydantic-1.10.15-cp38-cp38-musllinux_1_1_x86_64.whl", hash = "sha256:6bd7030c9abc80134087d8b6e7aa957e43d35714daa116aced57269a445b8f7b"}, + {file = "pydantic-1.10.15-cp38-cp38-win_amd64.whl", hash = "sha256:3350f527bb04138f8aff932dc828f154847fbdc7a1a44c240fbfff1b57f49a12"}, + {file = "pydantic-1.10.15-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:51d405b42f1b86703555797270e4970a9f9bd7953f3990142e69d1037f9d9e51"}, + {file = "pydantic-1.10.15-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:a980a77c52723b0dc56640ced396b73a024d4b74f02bcb2d21dbbac1debbe9d0"}, + {file = "pydantic-1.10.15-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:67f1a1fb467d3f49e1708a3f632b11c69fccb4e748a325d5a491ddc7b5d22383"}, + {file = "pydantic-1.10.15-cp39-cp39-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:676ed48f2c5bbad835f1a8ed8a6d44c1cd5a21121116d2ac40bd1cd3619746ed"}, + {file = "pydantic-1.10.15-cp39-cp39-musllinux_1_1_i686.whl", hash = "sha256:92229f73400b80c13afcd050687f4d7e88de9234d74b27e6728aa689abcf58cc"}, + {file = "pydantic-1.10.15-cp39-cp39-musllinux_1_1_x86_64.whl", hash = "sha256:2746189100c646682eff0bce95efa7d2e203420d8e1c613dc0c6b4c1d9c1fde4"}, + {file = "pydantic-1.10.15-cp39-cp39-win_amd64.whl", hash = "sha256:394f08750bd8eaad714718812e7fab615f873b3cdd0b9d84e76e51ef3b50b6b7"}, + {file = "pydantic-1.10.15-py3-none-any.whl", hash = "sha256:28e552a060ba2740d0d2aabe35162652c1459a0b9069fe0db7f4ee0e18e74d58"}, + {file = "pydantic-1.10.15.tar.gz", hash = "sha256:ca832e124eda231a60a041da4f013e3ff24949d94a01154b137fc2f2a43c3ffb"}, ] [package.dependencies] diff --git a/docs/integrations/sources/slack-migrations.md b/docs/integrations/sources/slack-migrations.md index 45907d148bd44..6f1d46ced5a39 100644 --- a/docs/integrations/sources/slack-migrations.md +++ b/docs/integrations/sources/slack-migrations.md @@ -12,7 +12,7 @@ However, due to differences between the Python and low-code CDKs, this migration We’ve evolved and standardized how state is managed for incremental streams that are nested within a parent stream. This change impacts how individual states are tracked and stored for each partition, using a more structured approach to ensure the most granular and flexible state management. -This change will affect the `Threads` and `Channel Messages` streams. +This change will affect the `Channel Messages` stream. ## Migration Steps -* The `reset` for`Threads` and `Channel Messages` streams is required after upgrading to this version. +* The `reset` for `Channel Messages` stream is required after upgrading to this version. From f1a317b304669e9c8d624382d58a9c9252421c99 Mon Sep 17 00:00:00 2001 From: darynaishchenko Date: Fri, 5 Apr 2024 14:23:36 +0300 Subject: [PATCH 48/56] add lookback for channel_messages_stream and 503 handling --- .../source-slack/source_slack/manifest.yaml | 12 +++++++++--- 1 file changed, 9 insertions(+), 3 deletions(-) diff --git a/airbyte-integrations/connectors/source-slack/source_slack/manifest.yaml b/airbyte-integrations/connectors/source-slack/source_slack/manifest.yaml index f90b9e8ed3d3e..88faa3c4b327b 100644 --- a/airbyte-integrations/connectors/source-slack/source_slack/manifest.yaml +++ b/airbyte-integrations/connectors/source-slack/source_slack/manifest.yaml @@ -53,7 +53,10 @@ definitions: error_message: Authentication has failed, please update your credentials. - http_codes: [429] action: RETRY - error_message: Failed to perform a request due to rate limits. Retrying. + error_message: Failed to perform a request due to rate limits. + - http_codes: [503] + action: RETRY + error_message: Failed to perform a request due to internal server error. selector: type: RecordSelector @@ -93,10 +96,13 @@ definitions: error_message: Authentication has failed, please update your credentials. - http_codes: [429] action: RETRY - error_message: Failed to perform a request due to rate limits. Retrying. + error_message: Failed to perform a request due to rate limits. - http_codes: [403, 400] action: FAIL error_message: Got an exception while trying to set up the connection. Most probably, there are no users in the given Slack instance or your token is incorrect. + - http_codes: [503] + action: RETRY + error_message: Failed to perform a request due to internal server error. record_selector: $ref: "#/definitions/selector" paginator: @@ -202,7 +208,6 @@ definitions: name: channels path: conversations.list field_path: channels - use_lookback_window: true parent_key: id partition_field: channel request_option: @@ -215,6 +220,7 @@ definitions: - "%s" step: P100D cursor_granularity: P10D + lookback_window: "P{{ config.get('lookback_window', 0) }}D" datetime_format: "%s" start_datetime: type: MinMaxDatetime From 6fadec0aa85713279ba31c4a7657da06f3bdeec8 Mon Sep 17 00:00:00 2001 From: darynaishchenko Date: Tue, 9 Apr 2024 10:49:59 +0300 Subject: [PATCH 49/56] add 500 handling --- .../connectors/source-slack/source_slack/manifest.yaml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/airbyte-integrations/connectors/source-slack/source_slack/manifest.yaml b/airbyte-integrations/connectors/source-slack/source_slack/manifest.yaml index 88faa3c4b327b..5a00f9a41ea1f 100644 --- a/airbyte-integrations/connectors/source-slack/source_slack/manifest.yaml +++ b/airbyte-integrations/connectors/source-slack/source_slack/manifest.yaml @@ -54,7 +54,7 @@ definitions: - http_codes: [429] action: RETRY error_message: Failed to perform a request due to rate limits. - - http_codes: [503] + - http_codes: [500, 503] action: RETRY error_message: Failed to perform a request due to internal server error. @@ -100,7 +100,7 @@ definitions: - http_codes: [403, 400] action: FAIL error_message: Got an exception while trying to set up the connection. Most probably, there are no users in the given Slack instance or your token is incorrect. - - http_codes: [503] + - http_codes: [500, 503] action: RETRY error_message: Failed to perform a request due to internal server error. record_selector: From f67b342d18b2d4095eb6aba6da3cc87dd1b6da28 Mon Sep 17 00:00:00 2001 From: darynaishchenko Date: Tue, 9 Apr 2024 18:35:04 +0300 Subject: [PATCH 50/56] updated threads stream parent streams --- .../source-slack/source_slack/source.py | 8 +- .../source-slack/source_slack/streams.py | 146 +++++++++++++++--- .../source-slack/source_slack/utils.py | 24 +++ 3 files changed, 153 insertions(+), 25 deletions(-) create mode 100644 airbyte-integrations/connectors/source-slack/source_slack/utils.py diff --git a/airbyte-integrations/connectors/source-slack/source_slack/source.py b/airbyte-integrations/connectors/source-slack/source_slack/source.py index b2c78a18eef69..3925e4bd44a67 100644 --- a/airbyte-integrations/connectors/source-slack/source_slack/source.py +++ b/airbyte-integrations/connectors/source-slack/source_slack/source.py @@ -30,27 +30,27 @@ def _threads_authenticator(self, config: Mapping[str, Any]): else: raise Exception(f"No supported option_title: {credentials_title} specified. See spec.json for references") - def get_threads_stream(self, config: Mapping[str, Any], channel_messages: Stream) -> HttpStream: + def get_threads_stream(self, config: Mapping[str, Any]) -> HttpStream: authenticator = self._threads_authenticator(config) default_start_date = pendulum.parse(config["start_date"]) # this field is not exposed to spec, used only for testing purposes end_date = config.get("end_date") end_date = end_date and pendulum.parse(end_date) threads_lookback_window = pendulum.Duration(days=config["lookback_window"]) + channel_filter = config.get("channel_filter", []) threads = Threads( authenticator=authenticator, default_start_date=default_start_date, end_date=end_date, lookback_window=threads_lookback_window, - parent_stream=channel_messages, + channel_filter=channel_filter, ) return threads def streams(self, config: Mapping[str, Any]) -> List[Stream]: declarative_streams = super().streams(config) - channel_messages = [stream for stream in declarative_streams if stream.name == "channel_messages"][0] - threads_stream = self.get_threads_stream(config, channel_messages) + threads_stream = self.get_threads_stream(config) declarative_streams.append(threads_stream) return declarative_streams diff --git a/airbyte-integrations/connectors/source-slack/source_slack/streams.py b/airbyte-integrations/connectors/source-slack/source_slack/streams.py index a8eb9de546bb8..b565d9670e25b 100644 --- a/airbyte-integrations/connectors/source-slack/source_slack/streams.py +++ b/airbyte-integrations/connectors/source-slack/source_slack/streams.py @@ -1,15 +1,20 @@ # # Copyright (c) 2023 Airbyte, Inc., all rights reserved. # + + from abc import ABC, abstractmethod -from typing import Any, Iterable, Mapping, MutableMapping, Optional +from typing import Any, Iterable, List, Mapping, MutableMapping, Optional import pendulum import requests from airbyte_cdk.models import SyncMode -from airbyte_cdk.sources.streams.http import HttpStream +from airbyte_cdk.sources.streams.http import HttpStream, HttpSubStream from pendulum import DateTime +from .components.join_channels import JoinChannelsStream +from .utils import chunk_date_range + class SlackStream(HttpStream, ABC): url_base = "https://slack.com/api/" @@ -74,7 +79,75 @@ def should_retry(self, response: requests.Response) -> bool: return response.status_code == requests.codes.REQUEST_TIMEOUT or super().should_retry(response) -class IncrementalMessageStream(SlackStream, ABC): +class ChanneledStream(SlackStream, ABC): + """Slack stream with channel filter""" + + def __init__(self, channel_filter: List[str] = [], join_channels: bool = False, **kwargs): + self.channel_filter = channel_filter + self.join_channels = join_channels + self.kwargs = kwargs + super().__init__(**kwargs) + + @property + def join_channels_stream(self) -> JoinChannelsStream: + return JoinChannelsStream(authenticator=self.kwargs.get("authenticator"), channel_filter=self.channel_filter) + + def should_join_to_channel(self, channel: Mapping[str, Any]) -> bool: + """ + The `is_member` property indicates whether or not the API Bot is already assigned / joined to the channel. + https://api.slack.com/types/conversation#booleans + """ + return self.join_channels and not channel.get("is_member") + + def make_join_channel_slice(self, channel: Mapping[str, Any]) -> Mapping[str, Any]: + channel_id: str = channel.get("id") + channel_name: str = channel.get("name") + self.logger.info(f"Joining Slack Channel: `{channel_name}`") + return {"channel": channel_id, "channel_name": channel_name} + + +class Channels(ChanneledStream): + data_field = "channels" + + @property + def use_cache(self) -> bool: + return True + + def path(self, **kwargs) -> str: + return "conversations.list" + + def request_params(self, **kwargs) -> MutableMapping[str, Any]: + params = super().request_params(**kwargs) + params["types"] = "public_channel" + return params + + def parse_response(self, response: requests.Response, **kwargs) -> Iterable[MutableMapping]: + json_response = response.json() + channels = json_response.get(self.data_field, []) + if self.channel_filter: + channels = [channel for channel in channels if channel["name"] in self.channel_filter] + yield from channels + + def read_records(self, sync_mode: SyncMode, **kwargs) -> Iterable[Mapping[str, Any]]: + """ + Override the default `read_records` method to provide the `JoinChannelsStream` functionality, + and be able to read all the channels, not just the ones that already has the API Bot joined. + """ + for channel in super().read_records(sync_mode=sync_mode): + # check the channel should be joined before reading + if self.should_join_to_channel(channel): + # join the channel before reading it + yield from self.join_channels_stream.read_records( + sync_mode=sync_mode, + stream_slice=self.make_join_channel_slice(channel), + ) + # reading the channel data + self.logger.info(f"Reading the channel: `{channel.get('name')}`") + yield channel + + +# Incremental Streams +class IncrementalMessageStream(ChanneledStream, ABC): data_field = "messages" cursor_field = "float_ts" primary_key = ["channel_id", "ts"] @@ -111,24 +184,51 @@ def get_updated_state(self, current_stream_state: MutableMapping[str, Any], late return current_stream_state + def read_records( + self, + sync_mode: SyncMode, + cursor_field: List[str] = None, + stream_slice: Mapping[str, Any] = None, + stream_state: Mapping[str, Any] = None, + ) -> Iterable[Mapping[str, Any]]: + if not stream_slice: + # return an empty iterator + # this is done to emit at least one state message when no slices are generated + return iter([]) + return super().read_records(sync_mode, cursor_field=cursor_field, stream_slice=stream_slice, stream_state=stream_state) + + +class ChannelMessages(HttpSubStream, IncrementalMessageStream): + def path(self, **kwargs) -> str: + return "conversations.history" + + @property + def use_cache(self) -> bool: + return True + + def stream_slices(self, stream_state: Mapping[str, Any] = None, **kwargs) -> Iterable[Optional[Mapping[str, Any]]]: + stream_state = stream_state or {} + start_date = pendulum.from_timestamp(stream_state.get(self.cursor_field, self._start_ts)) + end_date = self._end_ts and pendulum.from_timestamp(self._end_ts) + slice_yielded = False + for parent_slice in super().stream_slices(sync_mode=SyncMode.full_refresh): + channel = parent_slice["parent"] + for period in chunk_date_range(start_date=start_date, end_date=end_date): + yield {"channel": channel["id"], "oldest": period.start.timestamp(), "latest": period.end.timestamp()} + slice_yielded = True + if not slice_yielded: + # yield an empty slice to checkpoint state later + yield {} + class Threads(IncrementalMessageStream): - def __init__(self, lookback_window: Mapping[str, int], parent_stream, **kwargs): + def __init__(self, lookback_window: Mapping[str, int], **kwargs): self.messages_lookback_window = lookback_window - self.parent_stream = parent_stream super().__init__(**kwargs) def path(self, **kwargs) -> str: return "conversations.replies" - @property - def state(self) -> MutableMapping[str, Any]: - return self._state - - @state.setter - def state(self, value: MutableMapping[str, Any]) -> None: - self._state = value - def stream_slices(self, stream_state: Mapping[str, Any] = None, **kwargs) -> Iterable[Optional[Mapping[str, Any]]]: """ The logic for incrementally syncing threads is not very obvious, so buckle up. @@ -149,7 +249,7 @@ def stream_slices(self, stream_state: Mapping[str, Any] = None, **kwargs) -> Ite """ stream_state = stream_state or {} - # channels_stream = Channels(authenticator=self._session.auth, channel_filter=self.channel_filter) + channels_stream = Channels(authenticator=self._session.auth, channel_filter=self.channel_filter) if self.cursor_field in stream_state: # Since new messages can be posted to threads continuously after the parent message has been posted, @@ -162,15 +262,19 @@ def stream_slices(self, stream_state: Mapping[str, Any] = None, **kwargs) -> Ite # from the default start date messages_start_date = pendulum.from_timestamp(self._start_ts) - slice_yielded = False + messages_stream = ChannelMessages( + parent=channels_stream, + authenticator=self._session.auth, + default_start_date=messages_start_date, + end_date=self._end_ts and pendulum.from_timestamp(self._end_ts), + ) - for message_chunk in self.parent_stream.stream_slices(sync_mode=SyncMode.full_refresh): + slice_yielded = False + for message_chunk in messages_stream.stream_slices(stream_state={self.cursor_field: messages_start_date.timestamp()}): self.logger.info(f"Syncing replies {message_chunk}") - for message in self.parent_stream.read_records(sync_mode=SyncMode.full_refresh, stream_slice=message_chunk): - # check if parent stream record timestamp >= state - if pendulum.from_timestamp(float(message[self.sub_primary_key_2])) >= messages_start_date: - yield {"channel": message_chunk["channel"], self.sub_primary_key_2: message[self.sub_primary_key_2]} - slice_yielded = True + for message in messages_stream.read_records(sync_mode=SyncMode.full_refresh, stream_slice=message_chunk): + yield {"channel": message_chunk["channel"], self.sub_primary_key_2: message[self.sub_primary_key_2]} + slice_yielded = True if not slice_yielded: # yield an empty slice to checkpoint state later yield {} diff --git a/airbyte-integrations/connectors/source-slack/source_slack/utils.py b/airbyte-integrations/connectors/source-slack/source_slack/utils.py new file mode 100644 index 0000000000000..febfb788442ac --- /dev/null +++ b/airbyte-integrations/connectors/source-slack/source_slack/utils.py @@ -0,0 +1,24 @@ +# +# Copyright (c) 2023 Airbyte, Inc., all rights reserved. +# + + +from typing import Iterable, Optional + +import pendulum +from pendulum import DateTime, Period + + +def chunk_date_range(start_date: DateTime, interval=pendulum.duration(days=100), end_date: Optional[DateTime] = None) -> Iterable[Period]: + """ + Yields a list of the beginning and ending timestamps of each day between the start date and now. + The return value is a pendulum.period + """ + + end_date = end_date or pendulum.now() + # Each stream_slice contains the beginning and ending timestamp for a 24 hour period + chunk_start_date = start_date + while chunk_start_date < end_date: + chunk_end_date = min(chunk_start_date + interval, end_date) + yield pendulum.period(chunk_start_date, chunk_end_date) + chunk_start_date = chunk_end_date From 47c6400791503d8523f5c15055a973e9462e0f06 Mon Sep 17 00:00:00 2001 From: darynaishchenko Date: Tue, 9 Apr 2024 18:35:26 +0300 Subject: [PATCH 51/56] updated unit tests --- .../source-slack/unit_tests/test_source.py | 3 +- .../source-slack/unit_tests/test_streams.py | 77 +++++++++++++++---- 2 files changed, 62 insertions(+), 18 deletions(-) diff --git a/airbyte-integrations/connectors/source-slack/unit_tests/test_source.py b/airbyte-integrations/connectors/source-slack/unit_tests/test_source.py index 4e644c166c6cd..ae1a589227970 100644 --- a/airbyte-integrations/connectors/source-slack/unit_tests/test_source.py +++ b/airbyte-integrations/connectors/source-slack/unit_tests/test_source.py @@ -68,6 +68,5 @@ def test_threads_auth(token_config, oauth_config): def test_get_threads_stream(token_config): source = SourceSlack() - channel_messages = get_stream_by_name("channel_messages", token_config) - threads_stream = source.get_threads_stream(token_config, channel_messages) + threads_stream = source.get_threads_stream(token_config) assert threads_stream diff --git a/airbyte-integrations/connectors/source-slack/unit_tests/test_streams.py b/airbyte-integrations/connectors/source-slack/unit_tests/test_streams.py index 24b2b50a6e661..9a3cd092d90b4 100644 --- a/airbyte-integrations/connectors/source-slack/unit_tests/test_streams.py +++ b/airbyte-integrations/connectors/source-slack/unit_tests/test_streams.py @@ -2,12 +2,13 @@ # Copyright (c) 2023 Airbyte, Inc., all rights reserved. # +from unittest.mock import Mock import pendulum import pytest from airbyte_cdk.sources.streams.http.requests_native_auth import TokenAuthenticator from source_slack import SourceSlack -from source_slack.streams import Threads +from source_slack.streams import Channels, JoinChannelsStream, Threads @pytest.fixture @@ -58,25 +59,22 @@ def test_threads_stream_slices( token_config["channel_filter"] = [] requests_mock.register_uri( - "GET", "https://slack.com/api/conversations.history?inclusive=True&limit=1000&channel=airbyte-for-beginners", + "GET", "https://slack.com/api/conversations.history?limit=1000&channel=airbyte-for-beginners", [{"json": {"messages": messages}}, {"json": {"messages": []}}] ) requests_mock.register_uri( - "GET", "https://slack.com/api/conversations.history?inclusive=True&limit=1000&channel=good-reads", + "GET", "https://slack.com/api/conversations.history?limit=1000&channel=good-reads", [{"json": {"messages": messages}}, {"json": {"messages": []}}] ) start_date = pendulum.parse(start_date) end_date = end_date and pendulum.parse(end_date) - channel_messages_stream = get_stream_by_name("channel_messages", token_config) - stream = Threads( authenticator=authenticator, default_start_date=start_date, end_date=end_date, - lookback_window=pendulum.Duration(days=token_config["lookback_window"]), - parent_stream=channel_messages_stream + lookback_window=pendulum.Duration(days=token_config["lookback_window"]) ) slices = list(stream.stream_slices(stream_state=stream_state)) assert slices == expected_result @@ -92,23 +90,20 @@ def test_threads_stream_slices( ), ) def test_get_updated_state(authenticator, token_config, current_state, latest_record, expected_state): - channel_messages_stream = get_stream_by_name("channel_messages", token_config) + stream = Threads( authenticator=authenticator, default_start_date=pendulum.parse(token_config["start_date"]), - lookback_window=token_config["lookback_window"], - parent_stream=channel_messages_stream + lookback_window=token_config["lookback_window"] ) assert stream.get_updated_state(current_stream_state=current_state, latest_record=latest_record) == expected_state def test_threads_request_params(authenticator, token_config): - channel_messages_stream = get_stream_by_name("channel_messages", token_config) stream = Threads( authenticator=authenticator, default_start_date=pendulum.parse(token_config["start_date"]), - lookback_window=token_config["lookback_window"], - parent_stream=channel_messages_stream + lookback_window=token_config["lookback_window"] ) threads_slice = {'channel': 'airbyte-for-beginners', 'ts': 1577866844} expected = {'channel': 'airbyte-for-beginners', 'limit': 1000, 'ts': 1577866844} @@ -116,12 +111,10 @@ def test_threads_request_params(authenticator, token_config): def test_threads_parse_response(mocker, authenticator, token_config): - channel_messages_stream = get_stream_by_name("channel_messages", token_config) stream = Threads( authenticator=authenticator, default_start_date=pendulum.parse(token_config["start_date"]), - lookback_window=token_config["lookback_window"], - parent_stream=channel_messages_stream + lookback_window=token_config["lookback_window"] ) resp = { "messages": [ @@ -145,3 +138,55 @@ def test_threads_parse_response(mocker, authenticator, token_config): assert len(actual_response) == 1 assert actual_response[0]["float_ts"] == 1482960137.003543 assert actual_response[0]["channel_id"] == "airbyte-for-beginners" + + +@pytest.mark.parametrize("headers, expected_result", (({}, 5), ({"Retry-After": 15}, 15))) +def test_backoff(token_config, authenticator, headers, expected_result): + stream = Threads( + authenticator=authenticator, + default_start_date=pendulum.parse(token_config["start_date"]), + lookback_window=token_config["lookback_window"] + ) + assert stream.backoff_time(Mock(headers=headers)) == expected_result + + +def test_channels_stream_with_autojoin(authenticator) -> None: + """ + The test uses the `conversations_list` fixture(autouse=true) as API mocker. + """ + expected = [ + {'id': 'airbyte-for-beginners', 'is_member': True}, + {'id': 'good-reads', 'is_member': True} + ] + stream = Channels(channel_filter=[], join_channels=True, authenticator=authenticator) + assert list(stream.read_records(None)) == expected + + +def test_next_page_token(authenticator, token_config): + stream = Threads( + authenticator=authenticator, + default_start_date=pendulum.parse(token_config["start_date"]), + lookback_window=token_config["lookback_window"] + ) + mocked_response = Mock() + mocked_response.json.return_value = {"response_metadata": {"next_cursor": "next page"}} + assert stream.next_page_token(mocked_response) == {"cursor": "next page"} + + +@pytest.mark.parametrize( + "status_code, expected", + ( + (200, False), + (403, False), + (429, True), + (500, True), + ), +) +def test_should_retry(authenticator, token_config, status_code, expected): + stream = Threads( + authenticator=authenticator, + default_start_date=pendulum.parse(token_config["start_date"]), + lookback_window=token_config["lookback_window"] + ) + mocked_response = Mock(status_code=status_code) + assert stream.should_retry(mocked_response) == expected From 3163df5e6fd925b07a0c1ac5ea7d4b6871c1e4b3 Mon Sep 17 00:00:00 2001 From: darynaishchenko Date: Wed, 10 Apr 2024 11:39:45 +0300 Subject: [PATCH 52/56] updated breakingChanges message --- .../connectors/source-slack/metadata.yaml | 11 ++++++++--- docs/integrations/sources/slack-migrations.md | 2 +- 2 files changed, 9 insertions(+), 4 deletions(-) diff --git a/airbyte-integrations/connectors/source-slack/metadata.yaml b/airbyte-integrations/connectors/source-slack/metadata.yaml index 51cbe9235053e..c1400b6310dd5 100644 --- a/airbyte-integrations/connectors/source-slack/metadata.yaml +++ b/airbyte-integrations/connectors/source-slack/metadata.yaml @@ -32,9 +32,14 @@ data: 1.0.0: message: The source slack connector is being migrated from the Python CDK to our declarative low-code CDK. - Due to changes in the handling of state format for incremental substreams, this migration constitutes a breaking change. - After updating, please reset your source before resuming syncs. For more information, see our migration documentation for Source Slack. - upgradeDeadline: "2024-04-19" # TODO: update this date before merge + Due to changes in the handling of state format for incremental substreams, this migration constitutes a breaking change for the channel_messages stream. + Users will need to retest source configuration, refresh the source schema and reset the channel_messages stream after upgrading. + For more information, see our migration documentation for Source Slack. + upgradeDeadline: "2024-05-03" + scopedImpact: + - scopeType: stream + impactedScopes: + - "channel_messages" suggestedStreams: streams: - users diff --git a/docs/integrations/sources/slack-migrations.md b/docs/integrations/sources/slack-migrations.md index 6f1d46ced5a39..bc1a3c3d08622 100644 --- a/docs/integrations/sources/slack-migrations.md +++ b/docs/integrations/sources/slack-migrations.md @@ -15,4 +15,4 @@ to ensure the most granular and flexible state management. This change will affect the `Channel Messages` stream. ## Migration Steps -* The `reset` for `Channel Messages` stream is required after upgrading to this version. +* A `reset` for `Channel Messages` stream is required after upgrading to this version. From e3ee23b69cbaccb01c9cc0fe9b6583a28c08b311 Mon Sep 17 00:00:00 2001 From: darynaishchenko Date: Thu, 11 Apr 2024 12:39:32 +0300 Subject: [PATCH 53/56] updated upgradeDeadline --- airbyte-integrations/connectors/source-slack/metadata.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/airbyte-integrations/connectors/source-slack/metadata.yaml b/airbyte-integrations/connectors/source-slack/metadata.yaml index c1400b6310dd5..70d67820e1555 100644 --- a/airbyte-integrations/connectors/source-slack/metadata.yaml +++ b/airbyte-integrations/connectors/source-slack/metadata.yaml @@ -35,7 +35,7 @@ data: Due to changes in the handling of state format for incremental substreams, this migration constitutes a breaking change for the channel_messages stream. Users will need to retest source configuration, refresh the source schema and reset the channel_messages stream after upgrading. For more information, see our migration documentation for Source Slack. - upgradeDeadline: "2024-05-03" + upgradeDeadline: "2024-04-29" scopedImpact: - scopeType: stream impactedScopes: From ca452f460ec0c92a77d49c27e317176f5f770564 Mon Sep 17 00:00:00 2001 From: darynaishchenko Date: Fri, 12 Apr 2024 19:47:59 +0300 Subject: [PATCH 54/56] updated docs --- airbyte-integrations/connectors/source-slack/metadata.yaml | 6 +++--- docs/integrations/sources/slack-migrations.md | 2 +- docs/integrations/sources/slack.md | 2 +- 3 files changed, 5 insertions(+), 5 deletions(-) diff --git a/airbyte-integrations/connectors/source-slack/metadata.yaml b/airbyte-integrations/connectors/source-slack/metadata.yaml index 70d67820e1555..37373659df839 100644 --- a/airbyte-integrations/connectors/source-slack/metadata.yaml +++ b/airbyte-integrations/connectors/source-slack/metadata.yaml @@ -31,10 +31,10 @@ data: breakingChanges: 1.0.0: message: - The source slack connector is being migrated from the Python CDK to our declarative low-code CDK. + The source Slack connector is being migrated from the Python CDK to our declarative low-code CDK. Due to changes in the handling of state format for incremental substreams, this migration constitutes a breaking change for the channel_messages stream. - Users will need to retest source configuration, refresh the source schema and reset the channel_messages stream after upgrading. - For more information, see our migration documentation for Source Slack. + Users will need to reset source configuration, refresh the source schema and reset the channel_messages stream after upgrading. + For more information, see our migration documentation for source Slack. upgradeDeadline: "2024-04-29" scopedImpact: - scopeType: stream diff --git a/docs/integrations/sources/slack-migrations.md b/docs/integrations/sources/slack-migrations.md index bc1a3c3d08622..31458bc54c1c9 100644 --- a/docs/integrations/sources/slack-migrations.md +++ b/docs/integrations/sources/slack-migrations.md @@ -3,7 +3,7 @@ ## Upgrading to 1.0.0 We're continuously striving to enhance the quality and reliability of our connectors at Airbyte. -As part of our commitment to delivering exceptional service, we are transitioning source slack from the +As part of our commitment to delivering exceptional service, we are transitioning source Slack from the Python Connector Development Kit (CDK) to our innovative low-code framework. This is part of a strategic move to streamline many processes across connectors, bolstering maintainability and freeing us to focus more of our efforts on improving the performance and features of our evolving platform and growing catalog. diff --git a/docs/integrations/sources/slack.md b/docs/integrations/sources/slack.md index 8ed5f3ecc88c3..008c2634c14a3 100644 --- a/docs/integrations/sources/slack.md +++ b/docs/integrations/sources/slack.md @@ -163,7 +163,7 @@ Slack has [rate limit restrictions](https://api.slack.com/docs/rate-limits). | Version | Date | Pull Request | Subject | |:--------|:-----------|:---------------------------------------------------------|:-------------------------------------------------------------------------------------| -| 1.0.0 | 2024-04-02 | [35477](https://github.com/airbytehq/airbyte/pull/35477) | Migration to low code | +| 1.0.0 | 2024-04-02 | [35477](https://github.com/airbytehq/airbyte/pull/35477) | Migration to low code CDK | | 0.4.1 | 2024-03-27 | [36579](https://github.com/airbytehq/airbyte/pull/36579) | Upgrade airbyte-cdk version to emit record counts as floats | | 0.4.0 | 2024-03-19 | [36267](https://github.com/airbytehq/airbyte/pull/36267) | Pin airbyte-cdk version to `^0` | | 0.3.9 | 2024-02-12 | [35157](https://github.com/airbytehq/airbyte/pull/35157) | Manage dependencies with Poetry. | From af613b2e2442dbd0c11e0c9bc7bbb4a29f02a6c2 Mon Sep 17 00:00:00 2001 From: darynaishchenko Date: Mon, 15 Apr 2024 16:45:16 +0300 Subject: [PATCH 55/56] updated changelog --- docs/integrations/sources/slack.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/integrations/sources/slack.md b/docs/integrations/sources/slack.md index 008c2634c14a3..a8db6c5c6ed23 100644 --- a/docs/integrations/sources/slack.md +++ b/docs/integrations/sources/slack.md @@ -163,7 +163,7 @@ Slack has [rate limit restrictions](https://api.slack.com/docs/rate-limits). | Version | Date | Pull Request | Subject | |:--------|:-----------|:---------------------------------------------------------|:-------------------------------------------------------------------------------------| -| 1.0.0 | 2024-04-02 | [35477](https://github.com/airbytehq/airbyte/pull/35477) | Migration to low code CDK | +| 1.0.0 | 2024-04-02 | [35477](https://github.com/airbytehq/airbyte/pull/35477) | Migration to low-code CDK | | 0.4.1 | 2024-03-27 | [36579](https://github.com/airbytehq/airbyte/pull/36579) | Upgrade airbyte-cdk version to emit record counts as floats | | 0.4.0 | 2024-03-19 | [36267](https://github.com/airbytehq/airbyte/pull/36267) | Pin airbyte-cdk version to `^0` | | 0.3.9 | 2024-02-12 | [35157](https://github.com/airbytehq/airbyte/pull/35157) | Manage dependencies with Poetry. | From deb7918e5e5f1478e1ea847475dc93872b32afdd Mon Sep 17 00:00:00 2001 From: darynaishchenko Date: Mon, 15 Apr 2024 16:46:38 +0300 Subject: [PATCH 56/56] updated poetry.lock --- .../connectors/source-slack/poetry.lock | 36 +++++++++---------- 1 file changed, 18 insertions(+), 18 deletions(-) diff --git a/airbyte-integrations/connectors/source-slack/poetry.lock b/airbyte-integrations/connectors/source-slack/poetry.lock index d2cbc846a2369..6cf0fe1c2aab4 100644 --- a/airbyte-integrations/connectors/source-slack/poetry.lock +++ b/airbyte-integrations/connectors/source-slack/poetry.lock @@ -2,17 +2,17 @@ [[package]] name = "airbyte-cdk" -version = "0.78.6" +version = "0.81.4" description = "A framework for writing Airbyte Connectors." optional = false python-versions = "<4.0,>=3.9" files = [ - {file = "airbyte_cdk-0.78.6-py3-none-any.whl", hash = "sha256:e5f44c6da6d5b5d6f3f6a7f41a3f4a5e2dfc6fefb4c6823af6302c34c6fb4a87"}, - {file = "airbyte_cdk-0.78.6.tar.gz", hash = "sha256:0178f3cefa705f600d51f09e1313024a89cd1c99f2f1f796e8e0181d8e02ad2f"}, + {file = "airbyte_cdk-0.81.4-py3-none-any.whl", hash = "sha256:4ed193da4e8be4867e1d8983172d10afb3c3b10f3e10ec618431deec1f2af4cb"}, + {file = "airbyte_cdk-0.81.4.tar.gz", hash = "sha256:5c63d8c792edf5f24d0ad804b34b3ebcc056ecede6cb4f87ebf9ac07aa987f24"}, ] [package.dependencies] -airbyte-protocol-models = "0.5.1" +airbyte-protocol-models = "*" backoff = "*" cachetools = "*" Deprecated = ">=1.2,<1.3" @@ -38,13 +38,13 @@ vector-db-based = ["cohere (==4.21)", "langchain (==0.0.271)", "openai[embedding [[package]] name = "airbyte-protocol-models" -version = "0.5.1" +version = "0.9.0" description = "Declares the Airbyte Protocol." optional = false python-versions = ">=3.8" files = [ - {file = "airbyte_protocol_models-0.5.1-py3-none-any.whl", hash = "sha256:dfe84e130e51ce2ae81a06d5aa36f6c5ce3152b9e36e6f0195fad6c3dab0927e"}, - {file = "airbyte_protocol_models-0.5.1.tar.gz", hash = "sha256:7c8b16c7c1c7956b1996052e40585a3a93b1e44cb509c4e97c1ee4fe507ea086"}, + {file = "airbyte_protocol_models-0.9.0-py3-none-any.whl", hash = "sha256:e972e140b5efd1edad5a338bcae8fdee9fc12545caf2c321e0f61b151c163a9b"}, + {file = "airbyte_protocol_models-0.9.0.tar.gz", hash = "sha256:40b69c33df23fe82d7078e84beb123bd604480e4d73cb277a890fcc92aedc8d2"}, ] [package.dependencies] @@ -326,13 +326,13 @@ files = [ [[package]] name = "idna" -version = "3.6" +version = "3.7" description = "Internationalized Domain Names in Applications (IDNA)" optional = false python-versions = ">=3.5" files = [ - {file = "idna-3.6-py3-none-any.whl", hash = "sha256:c05567e9c24a6b9faaa835c4821bad0590fbb9d5779e7caa6e1cc4978e7eb24f"}, - {file = "idna-3.6.tar.gz", hash = "sha256:9ecdbbd083b06798ae1e86adcbfe8ab1479cf864e4ee30fe4e46a003d12491ca"}, + {file = "idna-3.7-py3-none-any.whl", hash = "sha256:82fee1fc78add43492d3a1898bfa6d8a904cc97d8427f683ed8e798d07761aa0"}, + {file = "idna-3.7.tar.gz", hash = "sha256:028ff3aadf0609c1fd278d8ea3089299412a7a8b9bd005dd08b9f8285bcb5cfc"}, ] [[package]] @@ -868,18 +868,18 @@ fixture = ["fixtures"] [[package]] name = "setuptools" -version = "69.2.0" +version = "69.5.1" description = "Easily download, build, install, upgrade, and uninstall Python packages" optional = false python-versions = ">=3.8" files = [ - {file = "setuptools-69.2.0-py3-none-any.whl", hash = "sha256:c21c49fb1042386df081cb5d86759792ab89efca84cf114889191cd09aacc80c"}, - {file = "setuptools-69.2.0.tar.gz", hash = "sha256:0ff4183f8f42cd8fa3acea16c45205521a4ef28f73c6391d8a25e92893134f2e"}, + {file = "setuptools-69.5.1-py3-none-any.whl", hash = "sha256:c636ac361bc47580504644275c9ad802c50415c7522212252c033bd15f301f32"}, + {file = "setuptools-69.5.1.tar.gz", hash = "sha256:6c1fccdac05a97e598fb0ae3bbed5904ccb317337a51139dcd51453611bbb987"}, ] [package.extras] -docs = ["furo", "jaraco.packaging (>=9.3)", "jaraco.tidelift (>=1.4)", "pygments-github-lexers (==0.0.5)", "rst.linker (>=1.9)", "sphinx (<7.2.5)", "sphinx (>=3.5)", "sphinx-favicon", "sphinx-inline-tabs", "sphinx-lint", "sphinx-notfound-page (>=1,<2)", "sphinx-reredirects", "sphinxcontrib-towncrier"] -testing = ["build[virtualenv]", "filelock (>=3.4.0)", "importlib-metadata", "ini2toml[lite] (>=0.9)", "jaraco.develop (>=7.21)", "jaraco.envs (>=2.2)", "jaraco.path (>=3.2.0)", "mypy (==1.9)", "packaging (>=23.2)", "pip (>=19.1)", "pytest (>=6)", "pytest-checkdocs (>=2.4)", "pytest-cov", "pytest-enabler (>=2.2)", "pytest-home (>=0.5)", "pytest-mypy (>=0.9.1)", "pytest-perf", "pytest-ruff (>=0.2.1)", "pytest-timeout", "pytest-xdist (>=3)", "tomli", "tomli-w (>=1.0.0)", "virtualenv (>=13.0.0)", "wheel"] +docs = ["furo", "jaraco.packaging (>=9.3)", "jaraco.tidelift (>=1.4)", "pygments-github-lexers (==0.0.5)", "rst.linker (>=1.9)", "sphinx (>=3.5)", "sphinx-favicon", "sphinx-inline-tabs", "sphinx-lint", "sphinx-notfound-page (>=1,<2)", "sphinx-reredirects", "sphinxcontrib-towncrier"] +testing = ["build[virtualenv]", "filelock (>=3.4.0)", "importlib-metadata", "ini2toml[lite] (>=0.9)", "jaraco.develop (>=7.21)", "jaraco.envs (>=2.2)", "jaraco.path (>=3.2.0)", "mypy (==1.9)", "packaging (>=23.2)", "pip (>=19.1)", "pytest (>=6,!=8.1.1)", "pytest-checkdocs (>=2.4)", "pytest-cov", "pytest-enabler (>=2.2)", "pytest-home (>=0.5)", "pytest-mypy", "pytest-perf", "pytest-ruff (>=0.2.1)", "pytest-timeout", "pytest-xdist (>=3)", "tomli", "tomli-w (>=1.0.0)", "virtualenv (>=13.0.0)", "wheel"] testing-integration = ["build[virtualenv] (>=1.0.3)", "filelock (>=3.4.0)", "jaraco.envs (>=2.2)", "jaraco.path (>=3.2.0)", "packaging (>=23.2)", "pytest", "pytest-enabler", "pytest-xdist", "tomli", "virtualenv (>=13.0.0)", "wheel"] [[package]] @@ -906,13 +906,13 @@ files = [ [[package]] name = "typing-extensions" -version = "4.10.0" +version = "4.11.0" description = "Backported and Experimental Type Hints for Python 3.8+" optional = false python-versions = ">=3.8" files = [ - {file = "typing_extensions-4.10.0-py3-none-any.whl", hash = "sha256:69b1a937c3a517342112fb4c6df7e72fc39a38e7891a5730ed4985b5214b5475"}, - {file = "typing_extensions-4.10.0.tar.gz", hash = "sha256:b0abd7c89e8fb96f98db18d86106ff1d90ab692004eb746cf6eda2682f91b3cb"}, + {file = "typing_extensions-4.11.0-py3-none-any.whl", hash = "sha256:c1f94d72897edaf4ce775bb7558d5b79d8126906a14ea5ed1635921406c0387a"}, + {file = "typing_extensions-4.11.0.tar.gz", hash = "sha256:83f085bd5ca59c80295fc2a82ab5dac679cbe02b9f33f7d83af68e241bea51b0"}, ] [[package]]