diff --git a/airbyte-integrations/connectors/destination-amazon-sqs/.dockerignore b/airbyte-integrations/connectors/destination-amazon-sqs/.dockerignore
new file mode 100644
index 0000000..efa69d4
--- /dev/null
+++ b/airbyte-integrations/connectors/destination-amazon-sqs/.dockerignore
@@ -0,0 +1,5 @@
+*
+!Dockerfile
+!main.py
+!destination_amazon_sqs
+!setup.py
diff --git a/airbyte-integrations/connectors/destination-amazon-sqs/Dockerfile b/airbyte-integrations/connectors/destination-amazon-sqs/Dockerfile
new file mode 100644
index 0000000..9861de2
--- /dev/null
+++ b/airbyte-integrations/connectors/destination-amazon-sqs/Dockerfile
@@ -0,0 +1,38 @@
+FROM python:3.9.11-alpine3.15 as base
+
+# build and load all requirements
+FROM base as builder
+WORKDIR /airbyte/integration_code
+
+# upgrade pip to the latest version
+RUN apk --no-cache upgrade \
+ && pip install --upgrade pip \
+ && apk --no-cache add tzdata build-base
+
+
+COPY setup.py ./
+# install necessary packages to a temporary folder
+RUN pip install --prefix=/install .
+
+# build a clean environment
+FROM base
+WORKDIR /airbyte/integration_code
+
+# copy all loaded and built libraries to a pure basic image
+COPY --from=builder /install /usr/local
+# add default timezone settings
+COPY --from=builder /usr/share/zoneinfo/Etc/UTC /etc/localtime
+RUN echo "Etc/UTC" > /etc/timezone
+
+# bash is installed for more convenient debugging.
+RUN apk --no-cache add bash
+
+# copy payload code only
+COPY main.py ./
+COPY destination_amazon_sqs ./destination_amazon_sqs
+
+ENV AIRBYTE_ENTRYPOINT "python /airbyte/integration_code/main.py"
+ENTRYPOINT ["python", "/airbyte/integration_code/main.py"]
+
+LABEL io.airbyte.version=0.1.1
+LABEL io.airbyte.name=airbyte/destination-amazon-sqs
diff --git a/airbyte-integrations/connectors/destination-amazon-sqs/README.md b/airbyte-integrations/connectors/destination-amazon-sqs/README.md
new file mode 100644
index 0000000..2856f60
--- /dev/null
+++ b/airbyte-integrations/connectors/destination-amazon-sqs/README.md
@@ -0,0 +1,99 @@
+# Amazon Sqs Destination
+
+This is the repository for the Amazon Sqs destination connector, written in Python.
+For information about how to use this connector within Airbyte, see [the documentation](https://docs.airbyte.io/integrations/destinations/amazon-sqs).
+
+## Local development
+
+### Prerequisites
+**To iterate on this connector, make sure to complete this prerequisites section.**
+
+#### Minimum Python version required `= 3.7.0`
+
+#### Build & Activate Virtual Environment and install dependencies
+From this connector directory, create a virtual environment:
+```
+python -m venv .venv
+```
+
+This will generate a virtualenv for this module in `.venv/`. Make sure this venv is active in your
+development environment of choice. To activate it from the terminal, run:
+```
+source .venv/bin/activate
+pip install -r requirements.txt
+```
+If you are in an IDE, follow your IDE's instructions to activate the virtualenv.
+
+Note that while we are installing dependencies from `requirements.txt`, you should only edit `setup.py` for your dependencies. `requirements.txt` is
+used for editable installs (`pip install -e`) to pull in Python dependencies from the monorepo and will call `setup.py`.
+If this is mumbo jumbo to you, don't worry about it, just put your deps in `setup.py` but install using `pip install -r requirements.txt` and everything
+should work as you expect.
+
+#### Create credentials
+**If you are a community contributor**, follow the instructions in the [documentation](https://docs.airbyte.io/integrations/destinations/amazon-sqs)
+to generate the necessary credentials. Then create a file `secrets/config.json` conforming to the `destination_amazon_sqs/spec.json` file.
+Note that the `secrets` directory is gitignored by default, so there is no danger of accidentally checking in sensitive information.
+See `integration_tests/sample_config.json` for a sample config file.
+
+**If you are an Airbyte core member**, copy the credentials in Lastpass under the secret name `destination amazon-sqs test creds`
+and place them into `secrets/config.json`.
+
+### Locally running the connector
+```
+python main.py spec
+python main.py check --config secrets/config.json
+python main.py discover --config secrets/config.json
+python main.py read --config secrets/config.json --catalog integration_tests/configured_catalog.json
+```
+
+### Locally running the connector docker image
+
+
+#### Build
+**Via [`airbyte-ci`](https://github.com/airbytehq/airbyte/blob/master/airbyte-ci/connectors/pipelines/README.md) (recommended):**
+```bash
+airbyte-ci connectors --name=destination-amazon-sqs build
+```
+
+An image will be built with the tag `airbyte/destination-amazon-sqs:dev`.
+
+**Via `docker build`:**
+```bash
+docker build -t airbyte/destination-amazon-sqs:dev .
+```
+
+#### Run
+Then run any of the connector commands as follows:
+```
+docker run --rm airbyte/destination-amazon-sqs:dev spec
+docker run --rm -v $(pwd)/secrets:/secrets airbyte/destination-amazon-sqs:dev check --config /secrets/config.json
+# messages.jsonl is a file containing line-separated JSON representing AirbyteMessages
+cat messages.jsonl | docker run --rm -v $(pwd)/secrets:/secrets -v $(pwd)/integration_tests:/integration_tests airbyte/destination-amazon-sqs:dev write --config /secrets/config.json --catalog /integration_tests/configured_catalog.json
+```
+
+## Testing
+You can run our full test suite locally using [`airbyte-ci`](https://github.com/airbytehq/airbyte/blob/master/airbyte-ci/connectors/pipelines/README.md):
+```bash
+airbyte-ci connectors --name=destination-amazon-sqs test
+```
+
+### Customizing acceptance Tests
+Customize `acceptance-test-config.yml` file to configure tests. See [Connector Acceptance Tests](https://docs.airbyte.com/connector-development/testing-connectors/connector-acceptance-tests-reference) for more information.
+If your connector requires to create or destroy resources for use during acceptance tests create fixtures for it and place them inside integration_tests/acceptance.py.
+
+## Dependency Management
+All of your dependencies should go in `setup.py`, NOT `requirements.txt`. The requirements file is only used to connect internal Airbyte dependencies in the monorepo for local development.
+We split dependencies between two groups, dependencies that are:
+* required for your connector to work need to go to `MAIN_REQUIREMENTS` list.
+* required for the testing need to go to `TEST_REQUIREMENTS` list
+
+### Publishing a new version of the connector
+You've checked out the repo, implemented a million dollar feature, and you're ready to share your changes with the world. Now what?
+1. Make sure your changes are passing our test suite: `airbyte-ci connectors --name=destination-amazon-sqs test`
+2. Bump the connector version in `metadata.yaml`: increment the `dockerImageTag` value. Please follow [semantic versioning for connectors](https://docs.airbyte.com/contributing-to-airbyte/resources/pull-requests-handbook/#semantic-versioning-for-connectors).
+3. Make sure the `metadata.yaml` content is up to date.
+4. Make the connector documentation and its changelog is up to date (`docs/integrations/destinations/amazon-sqs.md`).
+5. Create a Pull Request: use [our PR naming conventions](https://docs.airbyte.com/contributing-to-airbyte/resources/pull-requests-handbook/#pull-request-title-convention).
+6. Pat yourself on the back for being an awesome contributor.
+7. Someone from Airbyte will take a look at your PR and iterate with you to merge it into master.
+
diff --git a/airbyte-integrations/connectors/destination-amazon-sqs/bootstrap.md b/airbyte-integrations/connectors/destination-amazon-sqs/bootstrap.md
new file mode 100644
index 0000000..ce91ec1
--- /dev/null
+++ b/airbyte-integrations/connectors/destination-amazon-sqs/bootstrap.md
@@ -0,0 +1,59 @@
+# Amazon SQS Destination
+
+## What
+This is a connector for producing messages to an [Amazon SQS Queue](https://docs.aws.amazon.com/AWSSimpleQueueService/latest/SQSDeveloperGuide/welcome.html)
+
+## How
+### Sending messages
+Amazon SQS allows messages to be sent individually or in batches. Currently, this Destination only supports sending messages individually. This can
+have performance implications if sending high volumes of messages.
+
+#### Message Body
+By default, the SQS Message body is built using the AirbyteMessageRecord's 'data' property.
+
+If the **message_body_key** config item is set, we use the value as a key within the AirbyteMessageRecord's 'data' property. This could be
+improved to handle nested keys by using JSONPath syntax to lookup values.
+
+For example, given the input Record:
+```
+{
+ "data":
+ {
+ "parent_key": {
+ "nested_key": "nested_value"
+ },
+ "top_key": "top_value"
+ }
+}
+```
+
+With no **message_body_key** set, the output SQS Message body will be
+```
+{
+ "parent_key": {
+ "nested_key": "nested_value"
+ },
+ "top_key": "top_value"
+}
+```
+
+With **message_body_key** set to `parent_key`, the output SQS Message body will be
+```
+{
+ "nested_key": "nested_value"
+}
+```
+
+#### Message attributes
+The airbyte_emmited_at timestamp is added to every message as an Attribute by default. This could be improved to allow the user to set Attributes through the UI, or to take keys from the Record as Attributes.
+
+#### FIFO Queues
+A Queue URL that ends with '.fifo' **must** be a valid FIFO Queue. When the queue is FIFO, the *message_group_id* property is required.
+
+Currently, a unique uuid4 is generated as the dedupe ID for every message. This could be improved to allow the user to specify a path in the Record
+to use as a dedupe ID.
+
+### Credentials
+Requires an AWS IAM Access Key ID and Secret Key.
+
+This could be improved to add support for configured AWS profiles, env vars etc.
diff --git a/airbyte-integrations/connectors/destination-amazon-sqs/destination_amazon_sqs/__init__.py b/airbyte-integrations/connectors/destination-amazon-sqs/destination_amazon_sqs/__init__.py
new file mode 100644
index 0000000..ff5ba7b
--- /dev/null
+++ b/airbyte-integrations/connectors/destination-amazon-sqs/destination_amazon_sqs/__init__.py
@@ -0,0 +1,8 @@
+#
+# Copyright (c) 2023 Airbyte, Inc., all rights reserved.
+#
+
+
+from .destination import DestinationAmazonSqs
+
+__all__ = ["DestinationAmazonSqs"]
diff --git a/airbyte-integrations/connectors/destination-amazon-sqs/destination_amazon_sqs/destination.py b/airbyte-integrations/connectors/destination-amazon-sqs/destination_amazon_sqs/destination.py
new file mode 100644
index 0000000..1eb0249
--- /dev/null
+++ b/airbyte-integrations/connectors/destination-amazon-sqs/destination_amazon_sqs/destination.py
@@ -0,0 +1,176 @@
+#
+# Copyright (c) 2023 Airbyte, Inc., all rights reserved.
+#
+
+
+import json
+from typing import Any, Iterable, Mapping
+from uuid import uuid4
+
+import boto3
+from airbyte_cdk import AirbyteLogger
+from airbyte_cdk.destinations import Destination
+from airbyte_cdk.models import AirbyteConnectionStatus, AirbyteMessage, ConfiguredAirbyteCatalog, Status, Type
+from botocore.exceptions import ClientError
+
+
+class DestinationAmazonSqs(Destination):
+ def queue_is_fifo(self, url: str) -> bool:
+ return url.endswith(".fifo")
+
+ def parse_queue_name(self, url: str) -> str:
+ return url.rsplit("/", 1)[-1]
+
+ def send_single_message(self, queue, message) -> dict:
+ return queue.send_message(**message)
+
+ def build_sqs_message(self, record, message_body_key=None):
+ data = None
+ if message_body_key:
+ data = record.data.get(message_body_key)
+ if data is None:
+ raise Exception("Message had no attribute of the configured Message Body Key: " + message_body_key)
+ else:
+ data = json.dumps(record.data)
+
+ message = {"MessageBody": data}
+
+ return message
+
+ def add_attributes_to_message(self, record, message):
+ attributes = {"airbyte_emitted_at": {"StringValue": str(record.emitted_at), "DataType": "String"}}
+ message["MessageAttributes"] = attributes
+ return message
+
+ def set_message_delay(self, message, message_delay):
+ message["DelaySeconds"] = message_delay
+ return message
+
+ # MessageGroupID and MessageDeduplicationID are required properties for FIFO queues
+ # https://docs.aws.amazon.com/AWSSimpleQueueService/latest/APIReference/API_SendMessage.html
+ def set_message_fifo_properties(self, message, message_group_id, use_content_dedupe=False):
+ # https://docs.aws.amazon.com/AWSSimpleQueueService/latest/SQSDeveloperGuide/using-messagegroupid-property.html
+ if not message_group_id:
+ raise Exception("Failed to build message - Message Group ID is required for FIFO queues")
+ else:
+ message["MessageGroupId"] = message_group_id
+ # https://docs.aws.amazon.com/AWSSimpleQueueService/latest/SQSDeveloperGuide/using-messagededuplicationid-property.html
+ if not use_content_dedupe:
+ message["MessageDeduplicationId"] = str(uuid4())
+ # TODO: Support getting MessageDeduplicationId from a key in the record
+ # if message_dedupe_id:
+ # message['MessageDeduplicationId'] = message_dedupe_id
+ return message
+
+ # TODO: Support batch send
+ # def send_batch_messages(messages, queue):
+ # entry = {
+ # 'Id': "1",
+ # 'MessageBody': str(record.data),
+ # }
+ # response = queue.send_messages(Entries=messages)
+ # if 'Successful' in response:
+ # for status in response['Successful']:
+ # print("Message sent: " + status['MessageId'])
+ # if 'Failed' in response:
+ # for status in response['Failed']:
+ # print("Message sent: " + status['MessageId'])
+
+ # https://docs.aws.amazon.com/AWSSimpleQueueService/latest/APIReference/API_SendMessage.html
+ def write(
+ self, config: Mapping[str, Any], configured_catalog: ConfiguredAirbyteCatalog, input_messages: Iterable[AirbyteMessage]
+ ) -> Iterable[AirbyteMessage]:
+
+ # Required propeties
+ queue_url = config["queue_url"]
+ queue_region = config["region"]
+
+ # TODO: Implement optional params for batch
+ # Optional Properties
+ # max_batch_size = config.get("max_batch_size", 10)
+ # send_as_batch = config.get("send_as_batch", False)
+ message_delay = config.get("message_delay")
+ message_body_key = config.get("message_body_key")
+
+ # FIFO Properties
+ message_group_id = config.get("message_group_id")
+
+ # Senstive Properties
+ access_key = config["access_key"]
+ secret_key = config["secret_key"]
+
+ session = boto3.Session(aws_access_key_id=access_key, aws_secret_access_key=secret_key, region_name=queue_region)
+ sqs = session.resource("sqs")
+ queue = sqs.Queue(url=queue_url)
+
+ # TODO: Make access/secret key optional, support public access & profiles
+ # TODO: Support adding/setting attributes in the UI
+ # TODO: Support extract a specific path as message attributes
+
+ for message in input_messages:
+ if message.type == Type.RECORD:
+ sqs_message = self.build_sqs_message(message.record, message_body_key)
+
+ if message_delay:
+ sqs_message = self.set_message_delay(sqs_message, message_delay)
+
+ sqs_message = self.add_attributes_to_message(message.record, sqs_message)
+
+ if self.queue_is_fifo(queue_url):
+ use_content_dedupe = False if queue.attributes.get("ContentBasedDeduplication") == "false" else "true"
+ self.set_message_fifo_properties(sqs_message, message_group_id, use_content_dedupe)
+
+ self.send_single_message(queue, sqs_message)
+ if message.type == Type.STATE:
+ yield message
+
+ def check(self, logger: AirbyteLogger, config: Mapping[str, Any]) -> AirbyteConnectionStatus:
+ try:
+ # Required propeties
+ queue_url = config["queue_url"]
+ logger.debug("Amazon SQS Destination Config Check - queue_url: " + queue_url)
+ queue_region = config["region"]
+ logger.debug("Amazon SQS Destination Config Check - region: " + queue_region)
+
+ # Senstive Properties
+ access_key = config["access_key"]
+ logger.debug("Amazon SQS Destination Config Check - access_key (ends with): " + access_key[-1])
+ secret_key = config["secret_key"]
+ logger.debug("Amazon SQS Destination Config Check - secret_key (ends with): " + secret_key[-1])
+
+ logger.debug("Amazon SQS Destination Config Check - Starting connection test ---")
+ session = boto3.Session(aws_access_key_id=access_key, aws_secret_access_key=secret_key, region_name=queue_region)
+ sqs = session.resource("sqs")
+ queue = sqs.Queue(url=queue_url)
+ if hasattr(queue, "attributes"):
+ logger.debug("Amazon SQS Destination Config Check - Connection test successful ---")
+
+ if self.queue_is_fifo(queue_url):
+ fifo = queue.attributes.get("FifoQueue", False)
+ if not fifo:
+ raise Exception("FIFO Queue URL set but Queue is not FIFO")
+
+ message_group_id = config.get("message_group_id")
+ if message_group_id is None:
+ raise Exception("Message Group ID is not set, but is required for FIFO Queues.")
+
+ # TODO: Support referencing an ID inside the Record to use as de-dupe ID
+ # message_dedupe_key = config.get("message_dedupe_key")
+ # content_dedupe = queue.attributes.get('ContentBasedDeduplication')
+ # if content_dedupe == "false":
+ # if message_dedupe_id is None:
+ # raise Exception("You must provide a Message Deduplication ID when ContentBasedDeduplication is not used.")
+
+ return AirbyteConnectionStatus(status=Status.SUCCEEDED)
+ else:
+ return AirbyteConnectionStatus(
+ status=Status.FAILED, message="Amazon SQS Destination Config Check - Could not connect to queue"
+ )
+ except ClientError as e:
+ return AirbyteConnectionStatus(
+ status=Status.FAILED, message=f"Amazon SQS Destination Config Check - Error in AWS Client: {str(e)}"
+ )
+ except Exception as e:
+ return AirbyteConnectionStatus(
+ status=Status.FAILED, message=f"Amazon SQS Destination Config Check - An exception occurred: {str(e)}"
+ )
diff --git a/airbyte-integrations/connectors/destination-amazon-sqs/destination_amazon_sqs/spec.json b/airbyte-integrations/connectors/destination-amazon-sqs/destination_amazon_sqs/spec.json
new file mode 100644
index 0000000..f94d7d0
--- /dev/null
+++ b/airbyte-integrations/connectors/destination-amazon-sqs/destination_amazon_sqs/spec.json
@@ -0,0 +1,101 @@
+{
+ "documentationUrl": "https://docs.airbyte.com/integrations/destinations/amazon-sqs",
+ "supported_destination_sync_modes": ["append"],
+ "supportsIncremental": true,
+ "connectionSpecification": {
+ "$schema": "http://json-schema.org/draft-07/schema#",
+ "title": "Destination Amazon Sqs",
+ "type": "object",
+ "required": ["queue_url", "region"],
+ "additionalProperties": false,
+ "properties": {
+ "queue_url": {
+ "title": "Queue URL",
+ "description": "URL of the SQS Queue",
+ "type": "string",
+ "examples": [
+ "https://sqs.eu-west-1.amazonaws.com/1234567890/my-example-queue"
+ ],
+ "order": 0
+ },
+ "region": {
+ "title": "AWS Region",
+ "description": "AWS Region of the SQS Queue",
+ "type": "string",
+ "enum": [
+ "af-south-1",
+ "ap-east-1",
+ "ap-northeast-1",
+ "ap-northeast-2",
+ "ap-northeast-3",
+ "ap-south-1",
+ "ap-south-2",
+ "ap-southeast-1",
+ "ap-southeast-2",
+ "ap-southeast-3",
+ "ap-southeast-4",
+ "ca-central-1",
+ "ca-west-1",
+ "cn-north-1",
+ "cn-northwest-1",
+ "eu-central-1",
+ "eu-central-2",
+ "eu-north-1",
+ "eu-south-1",
+ "eu-south-2",
+ "eu-west-1",
+ "eu-west-2",
+ "eu-west-3",
+ "il-central-1",
+ "me-central-1",
+ "me-south-1",
+ "sa-east-1",
+ "us-east-1",
+ "us-east-2",
+ "us-gov-east-1",
+ "us-gov-west-1",
+ "us-west-1",
+ "us-west-2"
+ ],
+ "order": 1
+ },
+ "message_delay": {
+ "title": "Message Delay",
+ "description": "Modify the Message Delay of the individual message from the Queue's default (seconds).",
+ "type": "integer",
+ "examples": ["15"],
+ "order": 2
+ },
+ "access_key": {
+ "title": "AWS IAM Access Key ID",
+ "description": "The Access Key ID of the AWS IAM Role to use for sending messages",
+ "type": "string",
+ "examples": ["xxxxxHRNxxx3TBxxxxxx"],
+ "order": 3,
+ "airbyte_secret": true
+ },
+ "secret_key": {
+ "title": "AWS IAM Secret Key",
+ "description": "The Secret Key of the AWS IAM Role to use for sending messages",
+ "type": "string",
+ "examples": ["hu+qE5exxxxT6o/ZrKsxxxxxxBhxxXLexxxxxVKz"],
+ "order": 4,
+ "airbyte_secret": true
+ },
+ "message_body_key": {
+ "title": "Message Body Key",
+ "description": "Use this property to extract the contents of the named key in the input record to use as the SQS message body. If not set, the entire content of the input record data is used as the message body.",
+ "type": "string",
+ "examples": ["myDataPath"],
+ "order": 5
+ },
+ "message_group_id": {
+ "title": "Message Group Id",
+ "description": "The tag that specifies that a message belongs to a specific message group. This parameter applies only to, and is REQUIRED by, FIFO queues.",
+ "type": "string",
+ "examples": ["my-fifo-group"],
+ "order": 6
+ }
+ }
+ }
+}
diff --git a/airbyte-integrations/connectors/destination-amazon-sqs/icon.svg b/airbyte-integrations/connectors/destination-amazon-sqs/icon.svg
new file mode 100644
index 0000000..6029b85
--- /dev/null
+++ b/airbyte-integrations/connectors/destination-amazon-sqs/icon.svg
@@ -0,0 +1 @@
+
\ No newline at end of file
diff --git a/airbyte-integrations/connectors/destination-amazon-sqs/integration_tests/input_records_json b/airbyte-integrations/connectors/destination-amazon-sqs/integration_tests/input_records_json
new file mode 100644
index 0000000..b46977c
--- /dev/null
+++ b/airbyte-integrations/connectors/destination-amazon-sqs/integration_tests/input_records_json
@@ -0,0 +1 @@
+{"type": "RECORD", "record": {"stream": "ab-airbyte-testing", "data": {"id": "ba0f237b-abf5-41ae-9d94-1dbd346f38dd", "body": "test 1", "attributes": null}, "emitted_at": 1633881878000}}
\ No newline at end of file
diff --git a/airbyte-integrations/connectors/destination-amazon-sqs/integration_tests/integration_test.py b/airbyte-integrations/connectors/destination-amazon-sqs/integration_tests/integration_test.py
new file mode 100644
index 0000000..5d1e711
--- /dev/null
+++ b/airbyte-integrations/connectors/destination-amazon-sqs/integration_tests/integration_test.py
@@ -0,0 +1,46 @@
+#
+# Copyright (c) 2023 Airbyte, Inc., all rights reserved.
+#
+
+import json
+from typing import Any, Mapping
+
+import pytest
+from airbyte_cdk import AirbyteLogger
+from airbyte_cdk.models import AirbyteStream, ConfiguredAirbyteCatalog, ConfiguredAirbyteStream, DestinationSyncMode, Status, SyncMode
+from destination_amazon_sqs import DestinationAmazonSqs
+
+
+@pytest.fixture(name="config")
+def config_fixture() -> Mapping[str, Any]:
+ with open("secrets/config.json", "r") as f:
+ return json.loads(f.read())
+
+
+@pytest.fixture(name="configured_catalog")
+def configured_catalog_fixture() -> ConfiguredAirbyteCatalog:
+ stream_schema = {"type": "object", "properties": {"string_col": {"type": "str"}, "int_col": {"type": "integer"}}}
+
+ append_stream = ConfiguredAirbyteStream(
+ stream=AirbyteStream(name="append_stream", json_schema=stream_schema, supported_sync_modes=[SyncMode.incremental]),
+ sync_mode=SyncMode.incremental,
+ destination_sync_mode=DestinationSyncMode.append,
+ )
+
+ overwrite_stream = ConfiguredAirbyteStream(
+ stream=AirbyteStream(name="overwrite_stream", json_schema=stream_schema, supported_sync_modes=[SyncMode.incremental]),
+ sync_mode=SyncMode.incremental,
+ destination_sync_mode=DestinationSyncMode.overwrite,
+ )
+
+ return ConfiguredAirbyteCatalog(streams=[append_stream, overwrite_stream])
+
+
+def test_check_valid_config(config: Mapping):
+ outcome = DestinationAmazonSqs().check(AirbyteLogger(), config)
+ assert outcome.status == Status.SUCCEEDED
+
+
+def test_check_invalid_config():
+ outcome = DestinationAmazonSqs().check(AirbyteLogger(), {"secret_key": "not_a_real_secret"})
+ assert outcome.status == Status.FAILED
diff --git a/airbyte-integrations/connectors/destination-amazon-sqs/main.py b/airbyte-integrations/connectors/destination-amazon-sqs/main.py
new file mode 100644
index 0000000..bc60769
--- /dev/null
+++ b/airbyte-integrations/connectors/destination-amazon-sqs/main.py
@@ -0,0 +1,11 @@
+#
+# Copyright (c) 2023 Airbyte, Inc., all rights reserved.
+#
+
+
+import sys
+
+from destination_amazon_sqs import DestinationAmazonSqs
+
+if __name__ == "__main__":
+ DestinationAmazonSqs().run(sys.argv[1:])
diff --git a/airbyte-integrations/connectors/destination-amazon-sqs/metadata.yaml b/airbyte-integrations/connectors/destination-amazon-sqs/metadata.yaml
new file mode 100644
index 0000000..3676f4f
--- /dev/null
+++ b/airbyte-integrations/connectors/destination-amazon-sqs/metadata.yaml
@@ -0,0 +1,24 @@
+data:
+ connectorSubtype: api
+ connectorType: destination
+ definitionId: 0eeee7fb-518f-4045-bacc-9619e31c43ea
+ dockerImageTag: 0.1.1
+ dockerRepository: airbyte/destination-amazon-sqs
+ githubIssueLabel: destination-amazon-sqs
+ icon: awssqs.svg
+ license: MIT
+ name: Amazon SQS
+ registries:
+ cloud:
+ enabled: false
+ oss:
+ enabled: false
+ releaseStage: alpha
+ documentationUrl: https://docs.airbyte.com/integrations/destinations/amazon-sqs
+ tags:
+ - language:python
+ ab_internal:
+ sl: 100
+ ql: 200
+ supportLevel: archived
+metadataSpecVersion: "1.0"
diff --git a/airbyte-integrations/connectors/destination-amazon-sqs/requirements.txt b/airbyte-integrations/connectors/destination-amazon-sqs/requirements.txt
new file mode 100644
index 0000000..d6e1198
--- /dev/null
+++ b/airbyte-integrations/connectors/destination-amazon-sqs/requirements.txt
@@ -0,0 +1 @@
+-e .
diff --git a/airbyte-integrations/connectors/destination-amazon-sqs/sample_files/configured_catalog.json b/airbyte-integrations/connectors/destination-amazon-sqs/sample_files/configured_catalog.json
new file mode 100644
index 0000000..ee132a2
--- /dev/null
+++ b/airbyte-integrations/connectors/destination-amazon-sqs/sample_files/configured_catalog.json
@@ -0,0 +1,27 @@
+{
+ "streams": [
+ {
+ "sync_mode": "full_refresh",
+ "destination_sync_mode": "append",
+ "stream": {
+ "name": "ab-airbyte-testing",
+ "supported_sync_modes": ["full_refresh"],
+ "source_defined_cursor": false,
+ "json_schema": {
+ "type": "object",
+ "properties": {
+ "id": {
+ "type": "string"
+ },
+ "body": {
+ "type": "string"
+ },
+ "attributes": {
+ "type": ["null", "object"]
+ }
+ }
+ }
+ }
+ }
+ ]
+}
diff --git a/airbyte-integrations/connectors/destination-amazon-sqs/setup.py b/airbyte-integrations/connectors/destination-amazon-sqs/setup.py
new file mode 100644
index 0000000..f1df000
--- /dev/null
+++ b/airbyte-integrations/connectors/destination-amazon-sqs/setup.py
@@ -0,0 +1,23 @@
+#
+# Copyright (c) 2023 Airbyte, Inc., all rights reserved.
+#
+
+
+from setuptools import find_packages, setup
+
+MAIN_REQUIREMENTS = ["airbyte-cdk", "boto3"]
+
+TEST_REQUIREMENTS = ["pytest~=6.1", "moto"]
+
+setup(
+ name="destination_amazon_sqs",
+ description="Destination implementation for Amazon Sqs.",
+ author="Airbyte",
+ author_email="contact@airbyte.io",
+ packages=find_packages(),
+ install_requires=MAIN_REQUIREMENTS,
+ package_data={"": ["*.json"]},
+ extras_require={
+ "tests": TEST_REQUIREMENTS,
+ },
+)
diff --git a/airbyte-integrations/connectors/destination-amazon-sqs/unit_tests/unit_test.py b/airbyte-integrations/connectors/destination-amazon-sqs/unit_tests/unit_test.py
new file mode 100644
index 0000000..719671f
--- /dev/null
+++ b/airbyte-integrations/connectors/destination-amazon-sqs/unit_tests/unit_test.py
@@ -0,0 +1,226 @@
+#
+# Copyright (c) 2023 Airbyte, Inc., all rights reserved.
+#
+
+import json
+import time
+from typing import Any, Mapping
+
+import boto3
+from airbyte_cdk.logger import AirbyteLogger
+from airbyte_cdk.models import AirbyteMessage, ConfiguredAirbyteCatalog, Status
+from destination_amazon_sqs import DestinationAmazonSqs
+
+# from airbyte_cdk.sources.source import Source
+from moto import mock_iam, mock_sqs
+from moto.core import set_initial_no_auth_action_count
+
+
+@mock_iam
+def create_user_with_all_permissions():
+ client = boto3.client("iam", region_name="eu-west-1")
+ client.create_user(UserName="test_user1")
+
+ policy_document = {
+ "Version": "2012-10-17",
+ "Statement": [{"Effect": "Allow", "Action": ["sqs:*"], "Resource": "*"}],
+ }
+
+ client.put_user_policy(
+ UserName="test_user1",
+ PolicyName="policy1",
+ PolicyDocument=json.dumps(policy_document),
+ )
+
+ return client.create_access_key(UserName="test_user1")["AccessKey"]
+
+
+def create_config(queue_url, queue_region, access_key, secret_key, message_delay):
+ return {
+ "queue_url": queue_url,
+ "region": queue_region,
+ "access_key": access_key,
+ "secret_key": secret_key,
+ "message_delay": message_delay,
+ }
+
+
+def create_fifo_config(queue_url, queue_region, access_key, secret_key, message_group_id, message_delay):
+ return {
+ "queue_url": queue_url,
+ "region": queue_region,
+ "access_key": access_key,
+ "secret_key": secret_key,
+ "message_group_id": message_group_id,
+ "message_delay": message_delay,
+ }
+
+
+def create_config_with_body_key(queue_url, queue_region, access_key, secret_key, message_body_key, message_delay):
+ return {
+ "queue_url": queue_url,
+ "region": queue_region,
+ "access_key": access_key,
+ "secret_key": secret_key,
+ "message_body_key": message_body_key,
+ "message_delay": message_delay,
+ }
+
+
+def get_catalog() -> Mapping[str, Any]:
+ with open("sample_files/configured_catalog.json", "r") as f:
+ return json.load(f)
+
+
+@set_initial_no_auth_action_count(3)
+@mock_sqs
+@mock_iam
+def test_check():
+ # Create User
+ user = create_user_with_all_permissions()
+ # Create Queue
+ queue_name = "amazon-sqs-mock-queue"
+ queue_region = "eu-west-1"
+ client = boto3.client(
+ "sqs", aws_access_key_id=user["AccessKeyId"], aws_secret_access_key=user["SecretAccessKey"], region_name=queue_region
+ )
+ queue_url = client.create_queue(QueueName=queue_name)["QueueUrl"]
+ # Create config
+ config = create_config(queue_url, queue_region, user["AccessKeyId"], user["SecretAccessKey"], 10)
+ # Create AirbyteLogger
+ logger = AirbyteLogger()
+ # Create Destination
+ destination = DestinationAmazonSqs()
+ # Run check
+ status = destination.check(logger, config)
+ assert status.status == Status.SUCCEEDED
+
+ # Create FIFO queue
+ fifo_queue_name = "amazon-sqs-mock-queue.fifo"
+ fif_queue_url = client.create_queue(QueueName=fifo_queue_name, Attributes={"FifoQueue": "true"})["QueueUrl"]
+ # Create config for FIFO
+ fifo_config = create_fifo_config(fif_queue_url, queue_region, user["AccessKeyId"], user["SecretAccessKey"], "fifo-group", 10)
+ # Run check
+ status = destination.check(logger, fifo_config)
+ assert status.status == Status.SUCCEEDED
+
+
+@set_initial_no_auth_action_count(4)
+@mock_sqs
+@mock_iam
+def test_write():
+ # Create User
+ user = create_user_with_all_permissions()
+
+ test_message = {
+ "type": "RECORD",
+ "record": {
+ "stream": "ab-airbyte-testing",
+ "data": {"id": "ba0f237b-abf5-41ae-9d94-1dbd346f38dd", "body": "test 1", "attributes": None},
+ "emitted_at": 1633881878000,
+ },
+ }
+ ab_message = AirbyteMessage(**test_message)
+
+ # Common params
+ message_delay = 1
+ queue_region = "eu-west-1"
+
+ # Standard Queue Test
+ print("## Starting standard queue test ##")
+ # Create Queue
+ queue_name = "amazon-sqs-mock-queue"
+ client = boto3.client(
+ "sqs", aws_access_key_id=user["AccessKeyId"], aws_secret_access_key=user["SecretAccessKey"], region_name=queue_region
+ )
+ queue_url = client.create_queue(QueueName=queue_name)["QueueUrl"]
+ # Create config
+ config = create_config(queue_url, queue_region, user["AccessKeyId"], user["SecretAccessKey"], message_delay)
+ # Create ConfiguredAirbyteCatalog
+ catalog = ConfiguredAirbyteCatalog(streams=get_catalog()["streams"])
+ # Create Destination
+ destination = DestinationAmazonSqs()
+ # Send messages using write()
+ for message in destination.write(config, catalog, [ab_message]):
+ print(f"Message Sent with delay of {message_delay} seconds")
+ # Listen for messages for max 20 seconds
+ timeout = time.time() + 20
+ print("Listening for messages.")
+ while True:
+ message_received = client.receive_message(QueueUrl=queue_url)
+ if message_received.get("Messages"):
+ print("Message received.")
+ message_body = json.loads(message_received["Messages"][0]["Body"])
+ # Compare the body of the received message, with the body of the message we sent
+ if message_body == test_message["record"]["data"]:
+ print("Received message matches for standard queue write.")
+ assert True
+ break
+ else:
+ continue
+ if time.time() > timeout:
+ print("Timed out waiting for message after 20 seconds.")
+ assert False
+
+ # Standard Queue with a Message Key Test
+ print("## Starting body key queue test ##")
+ # Create Queue
+ key_queue_name = "amazon-sqs-mock-queue-key"
+ key_queue_url = client.create_queue(QueueName=key_queue_name)["QueueUrl"]
+ # Create config
+ message_body_key = "body"
+ key_config = create_config_with_body_key(
+ key_queue_url, queue_region, user["AccessKeyId"], user["SecretAccessKey"], message_body_key, message_delay
+ )
+ # Send messages using write()
+ for message in destination.write(key_config, catalog, [ab_message]):
+ print(f"Message Sent with delay of {message_delay} seconds")
+ # Listen for messages for max 20 seconds
+ timeout = time.time() + 20
+ print("Listening for messages.")
+ while True:
+ message_received = client.receive_message(QueueUrl=key_queue_url)
+ if message_received.get("Messages"):
+ print("Message received.")
+ message_body = message_received["Messages"][0]["Body"]
+ # Compare the body of the received message, with the body of the message we sent
+ if message_body == test_message["record"]["data"][message_body_key]:
+ print("Received message matches for body key queue write.")
+ assert True
+ break
+ else:
+ continue
+ if time.time() > timeout:
+ print("Timed out waiting for message after 20 seconds.")
+ assert False
+
+ # FIFO Queue Test
+ print("## Starting FIFO queue test ##")
+ # Create Queue
+ fifo_queue_name = "amazon-sqs-mock-queue.fifo"
+ fifo_queue_url = client.create_queue(QueueName=fifo_queue_name, Attributes={"FifoQueue": "true"})["QueueUrl"]
+ # Create config
+ fifo_config = create_fifo_config(
+ fifo_queue_url, queue_region, user["AccessKeyId"], user["SecretAccessKey"], "fifo-group", message_delay
+ )
+ # Send messages using write()
+ for message in destination.write(fifo_config, catalog, [ab_message]):
+ print(f"Message Sent with delay of {message_delay} seconds")
+ # Listen for messages for max 20 seconds
+ timeout = time.time() + 20
+ print("Listening for messages.")
+ while True:
+ message_received = client.receive_message(QueueUrl=fifo_queue_url)
+ if message_received.get("Messages"):
+ print("Message received.")
+ message_body = json.loads(message_received["Messages"][0]["Body"])
+ # Compare the body of the received message, with the body of the message we sent
+ if message_body == test_message["record"]["data"]:
+ print("Received message matches for FIFO queue write.")
+ assert True
+ break
+ else:
+ continue
+ if time.time() > timeout:
+ print("Timed out waiting for message after 20 seconds.")
+ assert False
diff --git a/airbyte-integrations/connectors/destination-cassandra/README.md b/airbyte-integrations/connectors/destination-cassandra/README.md
new file mode 100644
index 0000000..21c6cde
--- /dev/null
+++ b/airbyte-integrations/connectors/destination-cassandra/README.md
@@ -0,0 +1,72 @@
+# Destination Cassandra
+
+This is the repository for the Cassandra destination connector in Java.
+For information about how to use this connector within Airbyte, see [the User Documentation](https://docs.airbyte.io/integrations/destinations/cassandra).
+
+## Local development
+
+#### Building via Gradle
+From the Airbyte repository root, run:
+```
+./gradlew :airbyte-integrations:connectors:destination-cassandra:build
+```
+
+#### Create credentials
+**If you are a community contributor**, generate the necessary credentials and place them in `secrets/config.json` conforming to the spec file in `src/main/resources/spec.json`.
+Note that the `secrets` directory is git-ignored by default, so there is no danger of accidentally checking in sensitive information.
+
+**If you are an Airbyte core member**, follow the [instructions](https://docs.airbyte.io/connector-development#using-credentials-in-ci) to set up the credentials.
+
+### Locally running the connector docker image
+
+#### Build
+Build the connector image via Gradle:
+
+```
+./gradlew :airbyte-integrations:connectors:destination-cassandra:buildConnectorImage
+```
+Once built, the docker image name and tag on your host will be `airbyte/destination-cassandra:dev`.
+the Dockerfile.
+
+#### Run
+Then run any of the connector commands as follows:
+```
+docker run --rm airbyte/destination-cassandra:dev spec
+docker run --rm -v $(pwd)/secrets:/secrets airbyte/destination-cassandra:dev check --config /secrets/config.json
+docker run --rm -v $(pwd)/secrets:/secrets airbyte/destination-cassandra:dev discover --config /secrets/config.json
+docker run --rm -v $(pwd)/secrets:/secrets -v $(pwd)/integration_tests:/integration_tests airbyte/destination-cassandra:dev read --config /secrets/config.json --catalog /integration_tests/configured_catalog.json
+```
+
+## Testing
+We use `JUnit` for Java tests.
+
+### Unit and Integration Tests
+Place unit tests under `src/test/io/airbyte/integrations/destinations/cassandra`.
+
+#### Acceptance Tests
+Airbyte has a standard test suite that all destination connectors must pass. Implement the `TODO`s in
+`src/test-integration/java/io/airbyte/integrations/destinations/cassandraDestinationAcceptanceTest.java`.
+
+### Using gradle to run tests
+All commands should be run from airbyte project root.
+To run unit tests:
+```
+./gradlew :airbyte-integrations:connectors:destination-cassandra:unitTest
+```
+To run acceptance and custom integration tests:
+```
+./gradlew :airbyte-integrations:connectors:destination-cassandra:integrationTest
+```
+
+## Dependency Management
+
+### Publishing a new version of the connector
+You've checked out the repo, implemented a million dollar feature, and you're ready to share your changes with the world. Now what?
+1. Make sure your changes are passing our test suite: `airbyte-ci connectors --name=destination-cassandra test`
+2. Bump the connector version in `metadata.yaml`: increment the `dockerImageTag` value. Please follow [semantic versioning for connectors](https://docs.airbyte.com/contributing-to-airbyte/resources/pull-requests-handbook/#semantic-versioning-for-connectors).
+3. Make sure the `metadata.yaml` content is up to date.
+4. Make the connector documentation and its changelog is up to date (`docs/integrations/destinations/cassandra.md`).
+5. Create a Pull Request: use [our PR naming conventions](https://docs.airbyte.com/contributing-to-airbyte/resources/pull-requests-handbook/#pull-request-title-convention).
+6. Pat yourself on the back for being an awesome contributor.
+7. Someone from Airbyte will take a look at your PR and iterate with you to merge it into master.
+
diff --git a/airbyte-integrations/connectors/destination-cassandra/bootstrap.md b/airbyte-integrations/connectors/destination-cassandra/bootstrap.md
new file mode 100644
index 0000000..35c1942
--- /dev/null
+++ b/airbyte-integrations/connectors/destination-cassandra/bootstrap.md
@@ -0,0 +1,30 @@
+# Cassandra Destination
+
+Cassandra is a free and open-source, distributed, wide-column store, NoSQL database management system designed to handle
+large amounts of data across many commodity servers, providing high availability with no single point of failure
+
+The data is structured in keyspaces and tables and is partitioned and replicated across different nodes in the
+cluster.
+[Read more about Cassandra](https://cassandra.apache.org/_/index.html)
+
+This connector maps an incoming `stream` to a Cassandra `table` and a `namespace` to a Cassandra`keyspace`.
+When using destination sync mode `append` and `append_dedup`, an `insert` operation is performed against an existing
+Cassandra table.
+When using `overwrite`, the records are first placed in a temp table. When all the messages have been received the data
+is copied to the final table which is first truncated and the temp table is deleted.
+
+The Implementation uses the [Datastax](https://github.com/datastax/java-driver) driver in order to access
+Cassandra. [CassandraCqlProvider](./src/main/java/io/airbyte/integrations/destination/cassandra/CassandraCqlProvider.java)
+handles the communication with the Cassandra cluster and internally it uses
+the [SessionManager](./src/main/java/io/airbyte/integrations/destination/cassandra/SessionManager.java) to retrieve a
+CqlSession to the cluster.
+
+The [CassandraMessageConsumer](./src/main/java/io/airbyte/integrations/destination/cassandra/CassandraMessageConsumer.java)
+class contains the logic for handling airbyte messages, events and copying data between tables.
+
+## Development
+
+See the [CassandraCqlProvider](./src/main/java/io/airbyte/integrations/destination/cassandra/CassandraCqlProvider.java)
+class on how to use the datastax driver.
+
+[Datastax docs.](https://docs.datastax.com/en/developer/java-driver/3.0/)
\ No newline at end of file
diff --git a/airbyte-integrations/connectors/destination-cassandra/build.gradle b/airbyte-integrations/connectors/destination-cassandra/build.gradle
new file mode 100644
index 0000000..b9774a9
--- /dev/null
+++ b/airbyte-integrations/connectors/destination-cassandra/build.gradle
@@ -0,0 +1,39 @@
+plugins {
+ id 'application'
+ id 'airbyte-java-connector'
+}
+
+airbyteJavaConnector {
+ cdkVersionRequired = '0.2.0'
+ features = ['db-destinations']
+ useLocalCdk = false
+}
+
+//remove once upgrading the CDK version to 0.4.x or later
+java {
+ compileJava {
+ options.compilerArgs.remove("-Werror")
+ }
+}
+
+airbyteJavaConnector.addCdkDependencies()
+
+application {
+ mainClass = 'io.airbyte.integrations.destination.cassandra.CassandraDestination'
+ applicationDefaultJvmArgs = ['-XX:+ExitOnOutOfMemoryError', '-XX:MaxRAMPercentage=75.0']
+}
+
+def cassandraDriver = '4.13.0'
+def assertVersion = '3.21.0'
+
+dependencies {
+
+ implementation "com.datastax.oss:java-driver-core:${cassandraDriver}"
+ implementation "com.datastax.oss:java-driver-query-builder:${cassandraDriver}"
+ implementation "com.datastax.oss:java-driver-mapper-runtime:${cassandraDriver}"
+
+
+ // https://mvnrepository.com/artifact/org.assertj/assertj-core
+ testImplementation "org.assertj:assertj-core:${assertVersion}"
+ testImplementation libs.testcontainers.cassandra
+}
diff --git a/airbyte-integrations/connectors/destination-cassandra/docker-compose.yml b/airbyte-integrations/connectors/destination-cassandra/docker-compose.yml
new file mode 100644
index 0000000..a4786dd
--- /dev/null
+++ b/airbyte-integrations/connectors/destination-cassandra/docker-compose.yml
@@ -0,0 +1,23 @@
+version: "3.7"
+
+services:
+ cassandra1:
+ image: cassandra:4.0
+ ports:
+ - "9042:9042"
+ environment:
+ - "MAX_HEAP_SIZE=2048M"
+ - "HEAP_NEWSIZE=1024M"
+ - "CASSANDRA_CLUSTER_NAME=cassandra_cluster"
+# Uncomment if you want to run a Cassandra cluster
+# cassandra2:
+# image: cassandra:4.0
+# ports:
+# - "9043:9042"
+# environment:
+# - "MAX_HEAP_SIZE=2048M"
+# - "HEAP_NEWSIZE=1024M"
+# - "CASSANDRA_SEEDS=cassandra1"
+# - "CASSANDRA_CLUSTER_NAME=cassandra_cluster"
+# depends_on:
+# - cassandra1
diff --git a/airbyte-integrations/connectors/destination-cassandra/icon.svg b/airbyte-integrations/connectors/destination-cassandra/icon.svg
new file mode 100644
index 0000000..26c12ef
--- /dev/null
+++ b/airbyte-integrations/connectors/destination-cassandra/icon.svg
@@ -0,0 +1 @@
+
\ No newline at end of file
diff --git a/airbyte-integrations/connectors/destination-cassandra/metadata.yaml b/airbyte-integrations/connectors/destination-cassandra/metadata.yaml
new file mode 100644
index 0000000..7b6e8b0
--- /dev/null
+++ b/airbyte-integrations/connectors/destination-cassandra/metadata.yaml
@@ -0,0 +1,24 @@
+data:
+ connectorSubtype: database
+ connectorType: destination
+ definitionId: 707456df-6f4f-4ced-b5c6-03f73bcad1c5
+ dockerImageTag: 0.1.4
+ dockerRepository: airbyte/destination-cassandra
+ githubIssueLabel: destination-cassandra
+ icon: cassandra.svg
+ license: MIT
+ name: Cassandra
+ registries:
+ cloud:
+ enabled: false
+ oss:
+ enabled: false
+ releaseStage: alpha
+ documentationUrl: https://docs.airbyte.com/integrations/destinations/cassandra
+ tags:
+ - language:java
+ ab_internal:
+ sl: 100
+ ql: 100
+ supportLevel: community
+metadataSpecVersion: "1.0"
diff --git a/airbyte-integrations/connectors/destination-cassandra/sample_secrets/config.json b/airbyte-integrations/connectors/destination-cassandra/sample_secrets/config.json
new file mode 100644
index 0000000..644fd54
--- /dev/null
+++ b/airbyte-integrations/connectors/destination-cassandra/sample_secrets/config.json
@@ -0,0 +1,4 @@
+{
+ "username": "paste-username-here",
+ "password": "paste-password-here"
+}
diff --git a/airbyte-integrations/connectors/destination-cassandra/src/main/java/io/airbyte/integrations/destination/cassandra/CassandraConfig.java b/airbyte-integrations/connectors/destination-cassandra/src/main/java/io/airbyte/integrations/destination/cassandra/CassandraConfig.java
new file mode 100644
index 0000000..5ea984f
--- /dev/null
+++ b/airbyte-integrations/connectors/destination-cassandra/src/main/java/io/airbyte/integrations/destination/cassandra/CassandraConfig.java
@@ -0,0 +1,114 @@
+/*
+ * Copyright (c) 2023 Airbyte, Inc., all rights reserved.
+ */
+
+package io.airbyte.integrations.destination.cassandra;
+
+import com.fasterxml.jackson.databind.JsonNode;
+import java.util.Objects;
+
+/*
+ * Immutable configuration class for storing cassandra related config.
+ */
+class CassandraConfig {
+
+ private final String keyspace;
+
+ private final String username;
+
+ private final String password;
+
+ private final String address;
+
+ private final int port;
+
+ private final String datacenter;
+
+ private final int replication;
+
+ public CassandraConfig(String keyspace,
+ String username,
+ String password,
+ String address,
+ int port,
+ String datacenter,
+ int replication) {
+ this.keyspace = keyspace;
+ this.username = username;
+ this.password = password;
+ this.address = address;
+ this.port = port;
+ this.datacenter = datacenter;
+ this.replication = replication;
+ }
+
+ public CassandraConfig(JsonNode config) {
+ this.keyspace = config.get("keyspace").asText();
+ this.username = config.get("username").asText();
+ this.password = config.get("password").asText();
+ this.address = config.get("address").asText();
+ this.port = config.get("port").asInt(9042);
+ this.datacenter = config.get("datacenter").asText("datacenter1");
+ this.replication = config.get("replication").asInt(1);
+ }
+
+ public String getKeyspace() {
+ return keyspace;
+ }
+
+ public String getUsername() {
+ return username;
+ }
+
+ public String getPassword() {
+ return password;
+ }
+
+ public String getAddress() {
+ return address;
+ }
+
+ public int getPort() {
+ return port;
+ }
+
+ public String getDatacenter() {
+ return datacenter;
+ }
+
+ public int getReplication() {
+ return replication;
+ }
+
+ @Override
+ public String toString() {
+ return "CassandraConfig{" +
+ "keyspace='" + keyspace + '\'' +
+ ", username='" + username + '\'' +
+ ", password='" + password + '\'' +
+ ", address='" + address + '\'' +
+ ", port=" + port +
+ ", datacenter='" + datacenter + '\'' +
+ ", replication=" + replication +
+ '}';
+ }
+
+ @Override
+ public boolean equals(Object o) {
+ if (this == o) {
+ return true;
+ }
+ if (o == null || getClass() != o.getClass()) {
+ return false;
+ }
+ CassandraConfig that = (CassandraConfig) o;
+ return port == that.port && username.equals(that.username) && password.equals(that.password) &&
+ address.equals(that.address) && datacenter.equals(that.datacenter);
+ }
+
+ @Override
+ public int hashCode() {
+ return Objects.hash(username, password, address, port, datacenter);
+ }
+
+}
diff --git a/airbyte-integrations/connectors/destination-cassandra/src/main/java/io/airbyte/integrations/destination/cassandra/CassandraCqlProvider.java b/airbyte-integrations/connectors/destination-cassandra/src/main/java/io/airbyte/integrations/destination/cassandra/CassandraCqlProvider.java
new file mode 100644
index 0000000..0e48b8d
--- /dev/null
+++ b/airbyte-integrations/connectors/destination-cassandra/src/main/java/io/airbyte/integrations/destination/cassandra/CassandraCqlProvider.java
@@ -0,0 +1,180 @@
+/*
+ * Copyright (c) 2023 Airbyte, Inc., all rights reserved.
+ */
+
+package io.airbyte.integrations.destination.cassandra;
+
+import static com.datastax.oss.driver.api.querybuilder.QueryBuilder.now;
+
+import com.datastax.oss.driver.api.core.CqlSession;
+import com.datastax.oss.driver.api.core.cql.BoundStatement;
+import com.datastax.oss.driver.api.core.cql.PreparedStatement;
+import com.datastax.oss.driver.api.core.metadata.TokenMap;
+import com.datastax.oss.driver.api.core.type.DataTypes;
+import com.datastax.oss.driver.api.core.uuid.Uuids;
+import com.datastax.oss.driver.api.querybuilder.QueryBuilder;
+import com.datastax.oss.driver.api.querybuilder.SchemaBuilder;
+import io.airbyte.cdk.integrations.base.JavaBaseConstants;
+import java.io.Closeable;
+import java.time.Instant;
+import java.util.List;
+import java.util.UUID;
+import java.util.concurrent.ExecutionException;
+import java.util.concurrent.ExecutorService;
+import java.util.concurrent.Executors;
+import java.util.concurrent.Future;
+import java.util.stream.Collectors;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+class CassandraCqlProvider implements Closeable {
+
+ private static final Logger LOGGER = LoggerFactory.getLogger(CassandraCqlProvider.class);
+
+ private static final int N_THREADS = Runtime.getRuntime().availableProcessors();
+
+ private final ExecutorService executorService;
+
+ private final CqlSession cqlSession;
+
+ private final CassandraConfig cassandraConfig;
+
+ private final String columnId;
+
+ private final String columnData;
+
+ private final String columnTimestamp;
+
+ public CassandraCqlProvider(CassandraConfig cassandraConfig) {
+ this.cassandraConfig = cassandraConfig;
+ this.cqlSession = SessionManager.initSession(cassandraConfig);
+ var nameTransformer = new CassandraNameTransformer(cassandraConfig);
+ this.columnId = nameTransformer.outputColumn(JavaBaseConstants.COLUMN_NAME_AB_ID);
+ this.columnData = nameTransformer.outputColumn(JavaBaseConstants.COLUMN_NAME_DATA);
+ this.columnTimestamp = nameTransformer.outputColumn(JavaBaseConstants.COLUMN_NAME_EMITTED_AT);
+ this.executorService = Executors.newFixedThreadPool(N_THREADS);
+ }
+
+ public void createKeySpaceIfNotExists(String keyspace, int replicationFactor) {
+ var query = SchemaBuilder.createKeyspace(keyspace)
+ .ifNotExists()
+ .withSimpleStrategy(replicationFactor)
+ .build();
+ cqlSession.execute(query);
+ }
+
+ public void createTableIfNotExists(String keyspace, String tableName) {
+ var query = SchemaBuilder.createTable(keyspace, tableName)
+ .ifNotExists()
+ .withPartitionKey(columnId, DataTypes.UUID)
+ .withColumn(columnData, DataTypes.TEXT)
+ .withColumn(columnTimestamp, DataTypes.TIMESTAMP)
+ .build();
+ cqlSession.execute(query);
+ }
+
+ public void dropTableIfExists(String keyspace, String tableName) {
+ var query = SchemaBuilder.dropTable(keyspace, tableName)
+ .ifExists()
+ .build();
+ cqlSession.execute(query);
+ }
+
+ public void insert(String keyspace, String tableName, String jsonData) {
+ var query = QueryBuilder.insertInto(keyspace, tableName)
+ .value(columnId, QueryBuilder.literal(Uuids.random()))
+ .value(columnData, QueryBuilder.literal(jsonData))
+ .value(columnTimestamp, QueryBuilder.toTimestamp(now()))
+ .build();
+ cqlSession.execute(query);
+ }
+
+ public void truncate(String keyspace, String tableName) {
+ var query = QueryBuilder.truncate(keyspace, tableName).build();
+ cqlSession.execute(query);
+ }
+
+ public List select(String keyspace, String tableName) {
+ var query = QueryBuilder.selectFrom(keyspace, tableName)
+ .columns(columnId, columnData, columnTimestamp)
+ .build();
+ return cqlSession.execute(query)
+ .map(result -> new CassandraRecord(
+ result.get(columnId, UUID.class),
+ result.get(columnData, String.class),
+ result.get(columnTimestamp, Instant.class)))
+ .all();
+ }
+
+ public List>> retrieveMetadata() {
+ return cqlSession.getMetadata().getKeyspaces().values().stream()
+ .map(keyspace -> Tuple.of(keyspace.getName().toString(), keyspace.getTables().values()
+ .stream()
+ .map(table -> table.getName().toString())
+ .collect(Collectors.toList())))
+ .collect(Collectors.toList());
+ }
+
+ public void copy(String keyspace, String sourceTable, String destinationTable) {
+ var select = String.format("SELECT * FROM %s.%s WHERE token(%s) > ? AND token(%s) <= ?",
+ keyspace, sourceTable, columnId, columnId);
+
+ var selectStatement = cqlSession.prepare(select);
+
+ var insert = String.format("INSERT INTO %s.%s (%s, %s, %s) VALUES (?, ?, ?)",
+ keyspace, destinationTable, columnId, columnData, columnTimestamp);
+
+ var insertStatement = cqlSession.prepare(insert);
+
+ // perform full table scan in parallel using token ranges
+ // optimal for copying large amounts of data
+ cqlSession.getMetadata().getTokenMap()
+ .map(TokenMap::getTokenRanges)
+ .orElseThrow(IllegalStateException::new)
+ .stream()
+ .flatMap(range -> range.unwrap().stream())
+ .map(range -> selectStatement.bind(range.getStart(), range.getEnd()))
+ // explore datastax 4.x async api as an alternative for async processing
+ .map(selectBoundStatement -> executorService.submit(() -> asyncInsert(selectBoundStatement, insertStatement)))
+ .forEach(this::awaitThread);
+
+ }
+
+ private void asyncInsert(BoundStatement select, PreparedStatement insert) {
+ var boundStatements = cqlSession.execute(select).all().stream()
+ .map(r -> CassandraRecord.of(
+ r.get(columnId, UUID.class),
+ r.get(columnData, String.class),
+ r.get(columnTimestamp, Instant.class)))
+ .map(r -> insert.bind(r.getId(), r.getData(), r.getTimestamp())).toList();
+
+ boundStatements.forEach(boundStatement -> {
+ var resultSetCompletionStage = cqlSession.executeAsync(boundStatement);
+ resultSetCompletionStage.whenCompleteAsync((res, err) -> {
+ if (err != null) {
+ LOGGER.error("Something went wrong during async insertion: " + err.getMessage());
+ }
+ });
+ });
+ }
+
+ private void awaitThread(Future> future) {
+ try {
+ future.get();
+ } catch (InterruptedException e) {
+ Thread.currentThread().interrupt();
+ LOGGER.error("Interrupted thread while copying data with reason: ", e);
+ } catch (ExecutionException e) {
+ LOGGER.error("Error while copying data with reason: ", e);
+ }
+ }
+
+ @Override
+ public void close() {
+ // wait for tasks completion and terminate executor gracefully
+ executorService.shutdown();
+ // close cassandra session for the given config
+ SessionManager.closeSession(cassandraConfig);
+ }
+
+}
diff --git a/airbyte-integrations/connectors/destination-cassandra/src/main/java/io/airbyte/integrations/destination/cassandra/CassandraDestination.java b/airbyte-integrations/connectors/destination-cassandra/src/main/java/io/airbyte/integrations/destination/cassandra/CassandraDestination.java
new file mode 100644
index 0000000..e2727ba
--- /dev/null
+++ b/airbyte-integrations/connectors/destination-cassandra/src/main/java/io/airbyte/integrations/destination/cassandra/CassandraDestination.java
@@ -0,0 +1,66 @@
+/*
+ * Copyright (c) 2023 Airbyte, Inc., all rights reserved.
+ */
+
+package io.airbyte.integrations.destination.cassandra;
+
+import com.fasterxml.jackson.databind.JsonNode;
+import io.airbyte.cdk.integrations.BaseConnector;
+import io.airbyte.cdk.integrations.base.AirbyteMessageConsumer;
+import io.airbyte.cdk.integrations.base.Destination;
+import io.airbyte.cdk.integrations.base.IntegrationRunner;
+import io.airbyte.protocol.models.v0.AirbyteConnectionStatus;
+import io.airbyte.protocol.models.v0.AirbyteMessage;
+import io.airbyte.protocol.models.v0.ConfiguredAirbyteCatalog;
+import java.util.UUID;
+import java.util.function.Consumer;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+class CassandraDestination extends BaseConnector implements Destination {
+
+ private static final Logger LOGGER = LoggerFactory.getLogger(CassandraDestination.class);
+
+ public static void main(String[] args) throws Exception {
+ new IntegrationRunner(new CassandraDestination()).run(args);
+ }
+
+ @Override
+ public AirbyteConnectionStatus check(final JsonNode config) {
+ var cassandraConfig = new CassandraConfig(config);
+ // add random uuid to avoid conflicts with existing tables.
+ String tableName = "table_" + UUID.randomUUID().toString().replace("-", "");
+ CassandraCqlProvider cassandraCqlProvider = null;
+ try {
+ cassandraCqlProvider = new CassandraCqlProvider(cassandraConfig);
+ // check connection and write permissions
+ cassandraCqlProvider.createKeySpaceIfNotExists(cassandraConfig.getKeyspace(),
+ cassandraConfig.getReplication());
+ cassandraCqlProvider.createTableIfNotExists(cassandraConfig.getKeyspace(), tableName);
+ cassandraCqlProvider.insert(cassandraConfig.getKeyspace(), tableName, "{}");
+ return new AirbyteConnectionStatus().withStatus(AirbyteConnectionStatus.Status.SUCCEEDED);
+ } catch (Exception e) {
+ LOGGER.error("Can't establish Cassandra connection with reason: ", e);
+ return new AirbyteConnectionStatus().withStatus(AirbyteConnectionStatus.Status.FAILED);
+ } finally {
+ if (cassandraCqlProvider != null) {
+ try {
+ cassandraCqlProvider.dropTableIfExists(cassandraConfig.getKeyspace(), tableName);
+ } catch (Exception e) {
+ LOGGER.error("Error while deleting temp table {} with reason: ", tableName, e);
+ }
+ cassandraCqlProvider.close();
+ }
+ }
+ }
+
+ @Override
+ public AirbyteMessageConsumer getConsumer(final JsonNode config,
+ final ConfiguredAirbyteCatalog configuredCatalog,
+ final Consumer outputRecordCollector) {
+ final CassandraConfig cassandraConfig = new CassandraConfig(config);
+ final CassandraCqlProvider cassandraCqlProvider = new CassandraCqlProvider(cassandraConfig);
+ return new CassandraMessageConsumer(cassandraConfig, configuredCatalog, cassandraCqlProvider, outputRecordCollector);
+ }
+
+}
diff --git a/airbyte-integrations/connectors/destination-cassandra/src/main/java/io/airbyte/integrations/destination/cassandra/CassandraMessageConsumer.java b/airbyte-integrations/connectors/destination-cassandra/src/main/java/io/airbyte/integrations/destination/cassandra/CassandraMessageConsumer.java
new file mode 100644
index 0000000..803cde8
--- /dev/null
+++ b/airbyte-integrations/connectors/destination-cassandra/src/main/java/io/airbyte/integrations/destination/cassandra/CassandraMessageConsumer.java
@@ -0,0 +1,107 @@
+/*
+ * Copyright (c) 2023 Airbyte, Inc., all rights reserved.
+ */
+
+package io.airbyte.integrations.destination.cassandra;
+
+import io.airbyte.cdk.integrations.base.FailureTrackingAirbyteMessageConsumer;
+import io.airbyte.commons.json.Jsons;
+import io.airbyte.protocol.models.v0.AirbyteMessage;
+import io.airbyte.protocol.models.v0.AirbyteStreamNameNamespacePair;
+import io.airbyte.protocol.models.v0.ConfiguredAirbyteCatalog;
+import java.util.Map;
+import java.util.function.Consumer;
+import java.util.stream.Collectors;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+class CassandraMessageConsumer extends FailureTrackingAirbyteMessageConsumer {
+
+ private static final Logger LOGGER = LoggerFactory.getLogger(CassandraMessageConsumer.class);
+
+ private final CassandraConfig cassandraConfig;
+
+ private final Consumer outputRecordCollector;
+
+ private final Map cassandraStreams;
+
+ private final CassandraCqlProvider cassandraCqlProvider;
+
+ public CassandraMessageConsumer(final CassandraConfig cassandraConfig,
+ final ConfiguredAirbyteCatalog configuredCatalog,
+ final CassandraCqlProvider provider,
+ final Consumer outputRecordCollector) {
+ this.cassandraConfig = cassandraConfig;
+ this.outputRecordCollector = outputRecordCollector;
+ this.cassandraCqlProvider = provider;
+ var nameTransformer = new CassandraNameTransformer(cassandraConfig);
+ this.cassandraStreams = configuredCatalog.getStreams().stream()
+ .collect(Collectors.toUnmodifiableMap(
+ AirbyteStreamNameNamespacePair::fromConfiguredAirbyteSteam,
+ k -> new CassandraStreamConfig(
+ nameTransformer.outputKeyspace(k.getStream().getNamespace()),
+ nameTransformer.outputTable(k.getStream().getName()),
+ nameTransformer.outputTmpTable(k.getStream().getName()),
+ k.getDestinationSyncMode())));
+ }
+
+ @Override
+ protected void startTracked() {
+ cassandraStreams.forEach((k, v) -> {
+ cassandraCqlProvider.createKeySpaceIfNotExists(v.getKeyspace(), cassandraConfig.getReplication());
+ cassandraCqlProvider.createTableIfNotExists(v.getKeyspace(), v.getTempTableName());
+ });
+ }
+
+ @Override
+ protected void acceptTracked(final AirbyteMessage message) {
+ if (message.getType() == AirbyteMessage.Type.RECORD) {
+ var messageRecord = message.getRecord();
+ var streamConfig =
+ cassandraStreams.get(AirbyteStreamNameNamespacePair.fromRecordMessage(messageRecord));
+ if (streamConfig == null) {
+ throw new IllegalArgumentException("Unrecognized destination stream");
+ }
+ var data = Jsons.serialize(messageRecord.getData());
+ cassandraCqlProvider.insert(streamConfig.getKeyspace(), streamConfig.getTempTableName(), data);
+ } else if (message.getType() == AirbyteMessage.Type.STATE) {
+ outputRecordCollector.accept(message);
+ } else {
+ LOGGER.warn("Unsupported airbyte message type: {}", message.getType());
+ }
+ }
+
+ @Override
+ protected void close(final boolean hasFailed) {
+ if (!hasFailed) {
+ cassandraStreams.forEach((k, v) -> {
+ try {
+ cassandraCqlProvider.createTableIfNotExists(v.getKeyspace(), v.getTableName());
+ switch (v.getDestinationSyncMode()) {
+ case APPEND -> {
+ cassandraCqlProvider.copy(v.getKeyspace(), v.getTempTableName(), v.getTableName());
+ }
+ case OVERWRITE -> {
+ cassandraCqlProvider.truncate(v.getKeyspace(), v.getTableName());
+ cassandraCqlProvider.copy(v.getKeyspace(), v.getTempTableName(), v.getTableName());
+ }
+ default -> throw new UnsupportedOperationException();
+ }
+ } catch (final Exception e) {
+ LOGGER.error("Error while copying data to table {}: : ", v.getTableName(), e);
+ }
+ });
+ }
+
+ cassandraStreams.forEach((k, v) -> {
+ try {
+ cassandraCqlProvider.dropTableIfExists(v.getKeyspace(), v.getTempTableName());
+ } catch (final Exception e) {
+ LOGGER.error("Error while deleting temp table {} with reason: ", v.getTempTableName(), e);
+ }
+ });
+ cassandraCqlProvider.close();
+
+ }
+
+}
diff --git a/airbyte-integrations/connectors/destination-cassandra/src/main/java/io/airbyte/integrations/destination/cassandra/CassandraNameTransformer.java b/airbyte-integrations/connectors/destination-cassandra/src/main/java/io/airbyte/integrations/destination/cassandra/CassandraNameTransformer.java
new file mode 100644
index 0000000..da7f60b
--- /dev/null
+++ b/airbyte-integrations/connectors/destination-cassandra/src/main/java/io/airbyte/integrations/destination/cassandra/CassandraNameTransformer.java
@@ -0,0 +1,42 @@
+/*
+ * Copyright (c) 2023 Airbyte, Inc., all rights reserved.
+ */
+
+package io.airbyte.integrations.destination.cassandra;
+
+import com.google.common.base.CharMatcher;
+import io.airbyte.cdk.integrations.destination.StandardNameTransformer;
+import io.airbyte.commons.text.Names;
+
+class CassandraNameTransformer extends StandardNameTransformer {
+
+ private final CassandraConfig cassandraConfig;
+
+ public CassandraNameTransformer(CassandraConfig cassandraConfig) {
+ this.cassandraConfig = cassandraConfig;
+ }
+
+ String outputKeyspace(String namespace) {
+ if (namespace == null || namespace.isBlank()) {
+ return cassandraConfig.getKeyspace();
+ }
+ return CharMatcher.is('_').trimLeadingFrom(Names.toAlphanumericAndUnderscore(namespace));
+ }
+
+ String outputTable(String streamName) {
+ var tableName = super.getRawTableName(streamName.toLowerCase()).substring(1);
+ // max allowed length for a cassandra table is 48 characters
+ return tableName.length() > 48 ? tableName.substring(0, 48) : tableName;
+ }
+
+ String outputTmpTable(String streamName) {
+ var tableName = super.getTmpTableName(streamName.toLowerCase()).substring(1);
+ // max allowed length for a cassandra table is 48 characters
+ return tableName.length() > 48 ? tableName.substring(0, 48) : tableName;
+ }
+
+ String outputColumn(String columnName) {
+ return Names.doubleQuote(columnName.toLowerCase());
+ }
+
+}
diff --git a/airbyte-integrations/connectors/destination-cassandra/src/main/java/io/airbyte/integrations/destination/cassandra/CassandraRecord.java b/airbyte-integrations/connectors/destination-cassandra/src/main/java/io/airbyte/integrations/destination/cassandra/CassandraRecord.java
new file mode 100644
index 0000000..63af6d9
--- /dev/null
+++ b/airbyte-integrations/connectors/destination-cassandra/src/main/java/io/airbyte/integrations/destination/cassandra/CassandraRecord.java
@@ -0,0 +1,49 @@
+/*
+ * Copyright (c) 2023 Airbyte, Inc., all rights reserved.
+ */
+
+package io.airbyte.integrations.destination.cassandra;
+
+import java.time.Instant;
+import java.util.UUID;
+
+class CassandraRecord {
+
+ private final UUID id;
+
+ private final String data;
+
+ private final Instant timestamp;
+
+ public CassandraRecord(UUID id, String data, Instant timestamp) {
+ this.id = id;
+ this.data = data;
+ this.timestamp = timestamp;
+ }
+
+ static CassandraRecord of(UUID id, String data, Instant timestamp) {
+ return new CassandraRecord(id, data, timestamp);
+ }
+
+ public UUID getId() {
+ return id;
+ }
+
+ public String getData() {
+ return data;
+ }
+
+ public Instant getTimestamp() {
+ return timestamp;
+ }
+
+ @Override
+ public String toString() {
+ return "CassandraRecord{" +
+ "id=" + id +
+ ", data='" + data + '\'' +
+ ", timestamp=" + timestamp +
+ '}';
+ }
+
+}
diff --git a/airbyte-integrations/connectors/destination-cassandra/src/main/java/io/airbyte/integrations/destination/cassandra/CassandraStreamConfig.java b/airbyte-integrations/connectors/destination-cassandra/src/main/java/io/airbyte/integrations/destination/cassandra/CassandraStreamConfig.java
new file mode 100644
index 0000000..dd7d85d
--- /dev/null
+++ b/airbyte-integrations/connectors/destination-cassandra/src/main/java/io/airbyte/integrations/destination/cassandra/CassandraStreamConfig.java
@@ -0,0 +1,58 @@
+/*
+ * Copyright (c) 2023 Airbyte, Inc., all rights reserved.
+ */
+
+package io.airbyte.integrations.destination.cassandra;
+
+import io.airbyte.protocol.models.v0.DestinationSyncMode;
+
+/*
+ * Immutable configuration class for storing destination stream config.
+ */
+class CassandraStreamConfig {
+
+ private final String keyspace;
+
+ private final String tableName;
+
+ private final String tempTableName;
+
+ private final DestinationSyncMode destinationSyncMode;
+
+ public CassandraStreamConfig(String keyspace,
+ String tableName,
+ String tempTableName,
+ DestinationSyncMode destinationSyncMode) {
+ this.keyspace = keyspace;
+ this.tableName = tableName;
+ this.tempTableName = tempTableName;
+ this.destinationSyncMode = destinationSyncMode;
+ }
+
+ public String getKeyspace() {
+ return keyspace;
+ }
+
+ public String getTableName() {
+ return tableName;
+ }
+
+ public String getTempTableName() {
+ return tempTableName;
+ }
+
+ public DestinationSyncMode getDestinationSyncMode() {
+ return destinationSyncMode;
+ }
+
+ @Override
+ public String toString() {
+ return "CassandraStreamConfig{" +
+ "keyspace='" + keyspace + '\'' +
+ ", tableName='" + tableName + '\'' +
+ ", tempTableName='" + tempTableName + '\'' +
+ ", destinationSyncMode=" + destinationSyncMode +
+ '}';
+ }
+
+}
diff --git a/airbyte-integrations/connectors/destination-cassandra/src/main/java/io/airbyte/integrations/destination/cassandra/SessionManager.java b/airbyte-integrations/connectors/destination-cassandra/src/main/java/io/airbyte/integrations/destination/cassandra/SessionManager.java
new file mode 100644
index 0000000..3837725
--- /dev/null
+++ b/airbyte-integrations/connectors/destination-cassandra/src/main/java/io/airbyte/integrations/destination/cassandra/SessionManager.java
@@ -0,0 +1,66 @@
+/*
+ * Copyright (c) 2023 Airbyte, Inc., all rights reserved.
+ */
+
+package io.airbyte.integrations.destination.cassandra;
+
+import com.datastax.oss.driver.api.core.CqlSession;
+import java.net.InetSocketAddress;
+import java.util.concurrent.ConcurrentHashMap;
+import java.util.concurrent.atomic.AtomicInteger;
+
+class SessionManager {
+
+ // AtomicInteger is used for convenience, this class is not thread safe
+ // and needs additional synchronization for that.
+ private static final ConcurrentHashMap> sessions;
+
+ static {
+ sessions = new ConcurrentHashMap<>();
+ }
+
+ private SessionManager() {
+
+ }
+
+ /*
+ * CqlSession objects are heavyweight and can hold several tcp connections to the Cassandra cluster,
+ * for that reason it is better if sessions are reused per configuration. Sessions are thread-safe
+ * and can be accessed from different threads.
+ *
+ */
+ public static CqlSession initSession(CassandraConfig cassandraConfig) {
+ var cachedSession = sessions.get(cassandraConfig);
+ if (cachedSession != null) {
+ cachedSession.value2().incrementAndGet();
+ return cachedSession.value1();
+ } else {
+ var session = CqlSession.builder()
+ .withLocalDatacenter(cassandraConfig.getDatacenter())
+ .addContactPoint(new InetSocketAddress(cassandraConfig.getAddress(), cassandraConfig.getPort()))
+ .withAuthCredentials(cassandraConfig.getUsername(), cassandraConfig.getPassword())
+ .build();
+ sessions.put(cassandraConfig, Tuple.of(session, new AtomicInteger(1)));
+ return session;
+ }
+ }
+
+ /*
+ * Close session configured with cassandra config. if the session is being used by more than one
+ * external instance only decrease the usage count, otherwise close the session and remove it from
+ * the map.
+ *
+ */
+ public static void closeSession(CassandraConfig cassandraConfig) {
+ var cachedSession = sessions.get(cassandraConfig);
+ if (cachedSession == null) {
+ throw new IllegalStateException("No session for the provided config");
+ }
+ int count = cachedSession.value2().decrementAndGet();
+ if (count < 1) {
+ cachedSession.value1().close();
+ sessions.remove(cassandraConfig);
+ }
+ }
+
+}
diff --git a/airbyte-integrations/connectors/destination-cassandra/src/main/java/io/airbyte/integrations/destination/cassandra/Tuple.java b/airbyte-integrations/connectors/destination-cassandra/src/main/java/io/airbyte/integrations/destination/cassandra/Tuple.java
new file mode 100644
index 0000000..224f9b9
--- /dev/null
+++ b/airbyte-integrations/connectors/destination-cassandra/src/main/java/io/airbyte/integrations/destination/cassandra/Tuple.java
@@ -0,0 +1,38 @@
+/*
+ * Copyright (c) 2023 Airbyte, Inc., all rights reserved.
+ */
+
+package io.airbyte.integrations.destination.cassandra;
+
+public class Tuple {
+
+ private final V1 value1;
+
+ private final V2 value2;
+
+ public Tuple(V1 value1, V2 value2) {
+ this.value1 = value1;
+ this.value2 = value2;
+ }
+
+ public static Tuple of(V1 value1, V2 value2) {
+ return new Tuple<>(value1, value2);
+ }
+
+ public V1 value1() {
+ return value1;
+ }
+
+ public V2 value2() {
+ return value2;
+ }
+
+ @Override
+ public String toString() {
+ return "Tuple{" +
+ "value1=" + value1 +
+ ", value2=" + value2 +
+ '}';
+ }
+
+}
diff --git a/airbyte-integrations/connectors/destination-cassandra/src/main/resources/spec.json b/airbyte-integrations/connectors/destination-cassandra/src/main/resources/spec.json
new file mode 100644
index 0000000..fac77fe
--- /dev/null
+++ b/airbyte-integrations/connectors/destination-cassandra/src/main/resources/spec.json
@@ -0,0 +1,65 @@
+{
+ "documentationUrl": "https://docs.airbyte.com/integrations/destinations/cassandra",
+ "supportsIncremental": true,
+ "supportsNormalization": false,
+ "supportsDBT": false,
+ "supported_destination_sync_modes": ["overwrite", "append"],
+ "connectionSpecification": {
+ "$schema": "http://json-schema.org/draft-07/schema#",
+ "title": "Cassandra Destination Spec",
+ "type": "object",
+ "required": ["keyspace", "username", "password", "address", "port"],
+ "additionalProperties": true,
+ "properties": {
+ "keyspace": {
+ "title": "Keyspace",
+ "description": "Default Cassandra keyspace to create data in.",
+ "type": "string",
+ "order": 0
+ },
+ "username": {
+ "title": "Username",
+ "description": "Username to use to access Cassandra.",
+ "type": "string",
+ "order": 1
+ },
+ "password": {
+ "title": "Password",
+ "description": "Password associated with Cassandra.",
+ "type": "string",
+ "airbyte_secret": true,
+ "order": 2
+ },
+ "address": {
+ "title": "Address",
+ "description": "Address to connect to.",
+ "type": "string",
+ "examples": ["localhost,127.0.0.1"],
+ "order": 3
+ },
+ "port": {
+ "title": "Port",
+ "description": "Port of Cassandra.",
+ "type": "integer",
+ "minimum": 0,
+ "maximum": 65536,
+ "default": 9042,
+ "order": 4
+ },
+ "datacenter": {
+ "title": "Datacenter",
+ "description": "Datacenter of the cassandra cluster.",
+ "type": "string",
+ "default": "datacenter1",
+ "order": 5
+ },
+ "replication": {
+ "title": "Replication factor",
+ "type": "integer",
+ "description": "Indicates to how many nodes the data should be replicated to.",
+ "default": 1,
+ "order": 6
+ }
+ }
+ }
+}
diff --git a/airbyte-integrations/connectors/destination-cassandra/src/test-integration/java/io/airbyte/integrations/destination/cassandra/CassandraContainerInitializr.java b/airbyte-integrations/connectors/destination-cassandra/src/test-integration/java/io/airbyte/integrations/destination/cassandra/CassandraContainerInitializr.java
new file mode 100644
index 0000000..76cb904
--- /dev/null
+++ b/airbyte-integrations/connectors/destination-cassandra/src/test-integration/java/io/airbyte/integrations/destination/cassandra/CassandraContainerInitializr.java
@@ -0,0 +1,34 @@
+/*
+ * Copyright (c) 2023 Airbyte, Inc., all rights reserved.
+ */
+
+package io.airbyte.integrations.destination.cassandra;
+
+import org.testcontainers.containers.CassandraContainer;
+
+class CassandraContainerInitializr {
+
+ private static ConfiguredCassandraContainer cassandraContainer;
+
+ private CassandraContainerInitializr() {
+
+ }
+
+ public static ConfiguredCassandraContainer initContainer() {
+ if (cassandraContainer == null) {
+ cassandraContainer = new ConfiguredCassandraContainer();
+ }
+ cassandraContainer.start();
+ return cassandraContainer;
+ }
+
+ public static class ConfiguredCassandraContainer extends CassandraContainer {
+
+ ConfiguredCassandraContainer() {
+ // latest compatible version with the internal testcontainers datastax driver.
+ super("cassandra:3.11.11");
+ }
+
+ }
+
+}
diff --git a/airbyte-integrations/connectors/destination-cassandra/src/test-integration/java/io/airbyte/integrations/destination/cassandra/CassandraCqlProviderIT.java b/airbyte-integrations/connectors/destination-cassandra/src/test-integration/java/io/airbyte/integrations/destination/cassandra/CassandraCqlProviderIT.java
new file mode 100644
index 0000000..9f0ebae
--- /dev/null
+++ b/airbyte-integrations/connectors/destination-cassandra/src/test-integration/java/io/airbyte/integrations/destination/cassandra/CassandraCqlProviderIT.java
@@ -0,0 +1,135 @@
+/*
+ * Copyright (c) 2023 Airbyte, Inc., all rights reserved.
+ */
+
+package io.airbyte.integrations.destination.cassandra;
+
+import static org.assertj.core.api.Assertions.assertThat;
+import static org.junit.jupiter.api.Assertions.assertDoesNotThrow;
+import static org.junit.jupiter.api.Assertions.assertThrows;
+
+import com.datastax.oss.driver.api.core.servererrors.InvalidQueryException;
+import org.junit.jupiter.api.AfterEach;
+import org.junit.jupiter.api.BeforeAll;
+import org.junit.jupiter.api.Test;
+import org.junit.jupiter.api.TestInstance;
+
+@TestInstance(TestInstance.Lifecycle.PER_CLASS)
+class CassandraCqlProviderIT {
+
+ private static final String CASSANDRA_KEYSPACE = "cassandra_keyspace";
+
+ private static final String CASSANDRA_TABLE = "cassandra_table";
+
+ private CassandraCqlProvider cassandraCqlProvider;
+
+ private CassandraNameTransformer nameTransformer;
+
+ @BeforeAll
+ void setup() {
+ var cassandraContainer = CassandraContainerInitializr.initContainer();
+ var cassandraConfig = TestDataFactory.createCassandraConfig(
+ cassandraContainer.getUsername(),
+ cassandraContainer.getPassword(),
+ cassandraContainer.getHost(),
+ cassandraContainer.getFirstMappedPort());
+ this.cassandraCqlProvider = new CassandraCqlProvider(cassandraConfig);
+ this.nameTransformer = new CassandraNameTransformer(cassandraConfig);
+ cassandraCqlProvider.createKeySpaceIfNotExists(CASSANDRA_KEYSPACE, 1);
+ cassandraCqlProvider.createTableIfNotExists(CASSANDRA_KEYSPACE, CASSANDRA_TABLE);
+ }
+
+ @AfterEach
+ void clean() {
+ cassandraCqlProvider.truncate(CASSANDRA_KEYSPACE, CASSANDRA_TABLE);
+ }
+
+ @Test
+ void testCreateKeySpaceIfNotExists() {
+ String keyspace = nameTransformer.outputKeyspace("test_keyspace");
+ assertDoesNotThrow(() -> cassandraCqlProvider.createKeySpaceIfNotExists(keyspace, 1));
+ }
+
+ @Test
+ void testCreateTableIfNotExists() {
+ String table = nameTransformer.outputTable("test_stream");
+ assertDoesNotThrow(() -> cassandraCqlProvider.createTableIfNotExists(CASSANDRA_KEYSPACE, table));
+ }
+
+ @Test
+ void testInsert() {
+ // given
+ cassandraCqlProvider.insert(CASSANDRA_KEYSPACE, CASSANDRA_TABLE, "{\"property\":\"data1\"}");
+ cassandraCqlProvider.insert(CASSANDRA_KEYSPACE, CASSANDRA_TABLE, "{\"property\":\"data2\"}");
+ cassandraCqlProvider.insert(CASSANDRA_KEYSPACE, CASSANDRA_TABLE, "{\"property\":\"data3\"}");
+
+ // when
+ var resultSet = cassandraCqlProvider.select(CASSANDRA_KEYSPACE, CASSANDRA_TABLE);
+
+ // then
+ assertThat(resultSet)
+ .isNotNull()
+ .hasSize(3)
+ .anyMatch(r -> r.getData().equals("{\"property\":\"data1\"}"))
+ .anyMatch(r -> r.getData().equals("{\"property\":\"data2\"}"))
+ .anyMatch(r -> r.getData().equals("{\"property\":\"data3\"}"));
+
+ }
+
+ @Test
+ void testTruncate() {
+ // given
+ cassandraCqlProvider.insert(CASSANDRA_KEYSPACE, CASSANDRA_TABLE, "{\"property\":\"data1\"}");
+ cassandraCqlProvider.insert(CASSANDRA_KEYSPACE, CASSANDRA_TABLE, "{\"property\":\"data2\"}");
+ cassandraCqlProvider.insert(CASSANDRA_KEYSPACE, CASSANDRA_TABLE, "{\"property\":\"data3\"}");
+
+ // when
+ cassandraCqlProvider.truncate(CASSANDRA_KEYSPACE, CASSANDRA_TABLE);
+ var resultSet = cassandraCqlProvider.select(CASSANDRA_KEYSPACE, CASSANDRA_TABLE);
+
+ // then
+ assertThat(resultSet)
+ .isNotNull()
+ .isEmpty();
+ }
+
+ @Test
+ void testDropTableIfExists() {
+ // given
+ String table = nameTransformer.outputTmpTable("test_stream");
+ cassandraCqlProvider.createTableIfNotExists(CASSANDRA_KEYSPACE, table);
+
+ // when
+ cassandraCqlProvider.dropTableIfExists(CASSANDRA_KEYSPACE, table);
+
+ // then
+ assertThrows(InvalidQueryException.class, () -> cassandraCqlProvider.select(CASSANDRA_KEYSPACE, table));
+ }
+
+ @Test
+ void testCopy() {
+ // given
+ String tmpTable = nameTransformer.outputTmpTable("test_stream_copy");
+ cassandraCqlProvider.createTableIfNotExists(CASSANDRA_KEYSPACE, tmpTable);
+ cassandraCqlProvider.insert(CASSANDRA_KEYSPACE, tmpTable, "{\"property\":\"data1\"}");
+ cassandraCqlProvider.insert(CASSANDRA_KEYSPACE, tmpTable, "{\"property\":\"data2\"}");
+ cassandraCqlProvider.insert(CASSANDRA_KEYSPACE, tmpTable, "{\"property\":\"data3\"}");
+
+ String rawTable = nameTransformer.outputTable("test_stream_copy");
+ cassandraCqlProvider.createTableIfNotExists(CASSANDRA_KEYSPACE, rawTable);
+
+ // when
+ cassandraCqlProvider.copy(CASSANDRA_KEYSPACE, tmpTable, rawTable);
+ var resultSet = cassandraCqlProvider.select(CASSANDRA_KEYSPACE, rawTable);
+
+ // then
+ assertThat(resultSet)
+ .isNotNull()
+ .hasSize(3)
+ .anyMatch(r -> r.getData().equals("{\"property\":\"data1\"}"))
+ .anyMatch(r -> r.getData().equals("{\"property\":\"data2\"}"))
+ .anyMatch(r -> r.getData().equals("{\"property\":\"data3\"}"));
+
+ }
+
+}
diff --git a/airbyte-integrations/connectors/destination-cassandra/src/test-integration/java/io/airbyte/integrations/destination/cassandra/CassandraDestinationAcceptanceTest.java b/airbyte-integrations/connectors/destination-cassandra/src/test-integration/java/io/airbyte/integrations/destination/cassandra/CassandraDestinationAcceptanceTest.java
new file mode 100644
index 0000000..44c7bf0
--- /dev/null
+++ b/airbyte-integrations/connectors/destination-cassandra/src/test-integration/java/io/airbyte/integrations/destination/cassandra/CassandraDestinationAcceptanceTest.java
@@ -0,0 +1,90 @@
+/*
+ * Copyright (c) 2023 Airbyte, Inc., all rights reserved.
+ */
+
+package io.airbyte.integrations.destination.cassandra;
+
+import com.fasterxml.jackson.databind.JsonNode;
+import io.airbyte.cdk.integrations.standardtest.destination.DestinationAcceptanceTest;
+import io.airbyte.cdk.integrations.util.HostPortResolver;
+import io.airbyte.commons.json.Jsons;
+import java.util.Comparator;
+import java.util.HashSet;
+import java.util.List;
+import java.util.stream.Collectors;
+import org.junit.jupiter.api.BeforeAll;
+
+public class CassandraDestinationAcceptanceTest extends DestinationAcceptanceTest {
+
+ private JsonNode configJson;
+
+ private CassandraCqlProvider cassandraCqlProvider;
+
+ private CassandraNameTransformer cassandraNameTransformer;
+
+ private static CassandraContainerInitializr.ConfiguredCassandraContainer cassandraContainer;
+
+ @BeforeAll
+ static void initContainer() {
+ cassandraContainer = CassandraContainerInitializr.initContainer();
+ }
+
+ @Override
+ protected void setup(final TestDestinationEnv testEnv, final HashSet TEST_SCHEMAS) {
+ configJson = TestDataFactory.createJsonConfig(
+ cassandraContainer.getUsername(),
+ cassandraContainer.getPassword(),
+ HostPortResolver.resolveHost(cassandraContainer),
+ HostPortResolver.resolvePort(cassandraContainer));
+ final var cassandraConfig = new CassandraConfig(configJson);
+ cassandraCqlProvider = new CassandraCqlProvider(cassandraConfig);
+ cassandraNameTransformer = new CassandraNameTransformer(cassandraConfig);
+ }
+
+ @Override
+ protected void tearDown(final TestDestinationEnv testEnv) {
+ cassandraCqlProvider.retrieveMetadata().forEach(meta -> {
+ final var keyspace = meta.value1();
+ meta.value2().forEach(table -> cassandraCqlProvider.truncate(keyspace, table));
+ });
+ }
+
+ @Override
+ protected String getImageName() {
+ return "airbyte/destination-cassandra:dev";
+ }
+
+ @Override
+ protected JsonNode getConfig() {
+ return configJson;
+ }
+
+ @Override
+ protected boolean implementsNamespaces() {
+ return true;
+ }
+
+ @Override
+ protected JsonNode getFailCheckConfig() {
+ return TestDataFactory.createJsonConfig(
+ "usr",
+ "pw",
+ "127.0.192.1",
+ 8080);
+ }
+
+ @Override
+ protected List retrieveRecords(final TestDestinationEnv testEnv,
+ final String streamName,
+ final String namespace,
+ final JsonNode streamSchema) {
+ final var keyspace = cassandraNameTransformer.outputKeyspace(namespace);
+ final var table = cassandraNameTransformer.outputTable(streamName);
+ return cassandraCqlProvider.select(keyspace, table).stream()
+ .sorted(Comparator.comparing(CassandraRecord::getTimestamp))
+ .map(CassandraRecord::getData)
+ .map(Jsons::deserialize)
+ .collect(Collectors.toList());
+ }
+
+}
diff --git a/airbyte-integrations/connectors/destination-cassandra/src/test-integration/java/io/airbyte/integrations/destination/cassandra/CassandraDestinationIT.java b/airbyte-integrations/connectors/destination-cassandra/src/test-integration/java/io/airbyte/integrations/destination/cassandra/CassandraDestinationIT.java
new file mode 100644
index 0000000..ea30e16
--- /dev/null
+++ b/airbyte-integrations/connectors/destination-cassandra/src/test-integration/java/io/airbyte/integrations/destination/cassandra/CassandraDestinationIT.java
@@ -0,0 +1,57 @@
+/*
+ * Copyright (c) 2023 Airbyte, Inc., all rights reserved.
+ */
+
+package io.airbyte.integrations.destination.cassandra;
+
+import static org.assertj.core.api.Assertions.assertThat;
+
+import io.airbyte.integrations.destination.cassandra.CassandraContainerInitializr.ConfiguredCassandraContainer;
+import io.airbyte.protocol.models.v0.AirbyteConnectionStatus;
+import org.junit.jupiter.api.BeforeAll;
+import org.junit.jupiter.api.Test;
+import org.junit.jupiter.api.TestInstance;
+
+@TestInstance(TestInstance.Lifecycle.PER_CLASS)
+class CassandraDestinationIT {
+
+ private CassandraDestination cassandraDestination;
+
+ private ConfiguredCassandraContainer cassandraContainer;
+
+ @BeforeAll
+ void setup() {
+ this.cassandraContainer = CassandraContainerInitializr.initContainer();
+ this.cassandraDestination = new CassandraDestination();
+ }
+
+ @Test
+ void testCheckWithStatusSucceeded() {
+
+ var jsonConfiguration = TestDataFactory.createJsonConfig(
+ cassandraContainer.getUsername(),
+ cassandraContainer.getPassword(),
+ cassandraContainer.getHost(),
+ cassandraContainer.getFirstMappedPort());
+
+ var connectionStatus = cassandraDestination.check(jsonConfiguration);
+
+ assertThat(connectionStatus.getStatus()).isEqualTo(AirbyteConnectionStatus.Status.SUCCEEDED);
+ }
+
+ @Test
+ void testCheckWithStatusFailed() {
+
+ var jsonConfiguration = TestDataFactory.createJsonConfig(
+ "usr",
+ "pw",
+ "192.0.2.1",
+ 8080);
+
+ var connectionStatus = cassandraDestination.check(jsonConfiguration);
+
+ assertThat(connectionStatus.getStatus()).isEqualTo(AirbyteConnectionStatus.Status.FAILED);
+
+ }
+
+}
diff --git a/airbyte-integrations/connectors/destination-cassandra/src/test-integration/java/io/airbyte/integrations/destination/cassandra/CassandraMessageConsumerIT.java b/airbyte-integrations/connectors/destination-cassandra/src/test-integration/java/io/airbyte/integrations/destination/cassandra/CassandraMessageConsumerIT.java
new file mode 100644
index 0000000..678301d
--- /dev/null
+++ b/airbyte-integrations/connectors/destination-cassandra/src/test-integration/java/io/airbyte/integrations/destination/cassandra/CassandraMessageConsumerIT.java
@@ -0,0 +1,134 @@
+/*
+ * Copyright (c) 2023 Airbyte, Inc., all rights reserved.
+ */
+
+package io.airbyte.integrations.destination.cassandra;
+
+import static org.assertj.core.api.Assertions.assertThat;
+import static org.junit.jupiter.api.Assertions.assertDoesNotThrow;
+
+import com.fasterxml.jackson.databind.JsonNode;
+import com.google.common.collect.ImmutableMap;
+import io.airbyte.commons.json.Jsons;
+import io.airbyte.protocol.models.v0.AirbyteMessage;
+import io.airbyte.protocol.models.v0.DestinationSyncMode;
+import java.util.function.Function;
+import org.junit.jupiter.api.AfterAll;
+import org.junit.jupiter.api.BeforeAll;
+import org.junit.jupiter.api.MethodOrderer.OrderAnnotation;
+import org.junit.jupiter.api.Order;
+import org.junit.jupiter.api.Test;
+import org.junit.jupiter.api.TestInstance;
+import org.junit.jupiter.api.TestMethodOrder;
+
+@TestMethodOrder(OrderAnnotation.class)
+@TestInstance(TestInstance.Lifecycle.PER_CLASS)
+class CassandraMessageConsumerIT {
+
+ private static final String AIRBYTE_NAMESPACE_1 = "airbyte_namespace_1";
+ private static final String AIRBYTE_NAMESPACE_2 = "airbyte_namespace_2";
+
+ private static final String AIRBYTE_STREAM_1 = "airbyte_stream_1";
+ private static final String AIRBYTE_STREAM_2 = "airbyte_stream_2";
+
+ private CassandraContainerInitializr.ConfiguredCassandraContainer cassandraContainer;
+
+ private CassandraConfig cassandraConfig;
+
+ private CassandraMessageConsumer cassandraMessageConsumer;
+
+ private CassandraNameTransformer nameTransformer;
+
+ @BeforeAll
+ void setup() {
+ cassandraContainer = CassandraContainerInitializr.initContainer();
+ cassandraConfig = TestDataFactory.createCassandraConfig(
+ cassandraContainer.getUsername(),
+ cassandraContainer.getPassword(),
+ cassandraContainer.getHost(),
+ cassandraContainer.getFirstMappedPort());
+
+ final var stream1 = TestDataFactory.createAirbyteStream(AIRBYTE_STREAM_1, AIRBYTE_NAMESPACE_1);
+ final var stream2 = TestDataFactory.createAirbyteStream(AIRBYTE_STREAM_2, AIRBYTE_NAMESPACE_2);
+
+ final var cStream1 = TestDataFactory.createConfiguredAirbyteStream(DestinationSyncMode.APPEND, stream1);
+ final var cStream2 = TestDataFactory.createConfiguredAirbyteStream(DestinationSyncMode.OVERWRITE, stream2);
+
+ final var catalog = TestDataFactory.createConfiguredAirbyteCatalog(cStream1, cStream2);
+
+ final CassandraCqlProvider cassandraCqlProvider = new CassandraCqlProvider(cassandraConfig);
+ cassandraMessageConsumer = new CassandraMessageConsumer(cassandraConfig, catalog, cassandraCqlProvider, message -> {});
+ nameTransformer = new CassandraNameTransformer(cassandraConfig);
+ }
+
+ @AfterAll
+ void close() {
+ cassandraContainer.close();
+ }
+
+ @Test
+ @Order(1)
+ void testStartTracked() {
+ assertDoesNotThrow(() -> cassandraMessageConsumer.startTracked());
+ }
+
+ @Test
+ @Order(2)
+ void testAcceptTracked() {
+
+ final Function function =
+ data -> Jsons.jsonNode(ImmutableMap.builder().put("property", data).build());
+
+ assertDoesNotThrow(() -> {
+ cassandraMessageConsumer.acceptTracked(
+ TestDataFactory.createAirbyteMessage(AirbyteMessage.Type.RECORD, AIRBYTE_STREAM_1, AIRBYTE_NAMESPACE_1,
+ function.apply("data1")));
+ cassandraMessageConsumer.acceptTracked(
+ TestDataFactory.createAirbyteMessage(AirbyteMessage.Type.RECORD, AIRBYTE_STREAM_1, AIRBYTE_NAMESPACE_1,
+ function.apply("data2")));
+ cassandraMessageConsumer.acceptTracked(
+ TestDataFactory.createAirbyteMessage(AirbyteMessage.Type.RECORD, AIRBYTE_STREAM_2, AIRBYTE_NAMESPACE_2,
+ function.apply("data3")));
+ cassandraMessageConsumer.acceptTracked(
+ TestDataFactory.createAirbyteMessage(AirbyteMessage.Type.RECORD, AIRBYTE_STREAM_2, AIRBYTE_NAMESPACE_2,
+ function.apply("data4")));
+ cassandraMessageConsumer.acceptTracked(
+ TestDataFactory.createAirbyteMessage(AirbyteMessage.Type.STATE, AIRBYTE_STREAM_2, AIRBYTE_NAMESPACE_2,
+ function.apply("data5")));
+ });
+
+ }
+
+ @Test
+ @Order(3)
+ void testClose() {
+
+ assertDoesNotThrow(() -> cassandraMessageConsumer.close(false));
+
+ }
+
+ @Test
+ @Order(4)
+ void testFinalState() {
+ final var keyspace1 = nameTransformer.outputKeyspace(AIRBYTE_NAMESPACE_1);
+ final var keyspace2 = nameTransformer.outputKeyspace(AIRBYTE_NAMESPACE_2);
+ final var table1 = nameTransformer.outputTable(AIRBYTE_STREAM_1);
+ final var table2 = nameTransformer.outputTable(AIRBYTE_STREAM_2);
+ try (final var cassandraCqlProvider = new CassandraCqlProvider(cassandraConfig)) {
+ final var resultSet1 = cassandraCqlProvider.select(keyspace1, table1);
+ final var resultSet2 = cassandraCqlProvider.select(keyspace2, table2);
+ assertThat(resultSet1)
+ .isNotNull()
+ .hasSize(2)
+ .anyMatch(r -> r.getData().equals("{\"property\":\"data1\"}"))
+ .anyMatch(r -> r.getData().equals("{\"property\":\"data2\"}"));
+
+ assertThat(resultSet2)
+ .isNotNull()
+ .hasSize(2)
+ .anyMatch(r -> r.getData().equals("{\"property\":\"data3\"}"))
+ .anyMatch(r -> r.getData().equals("{\"property\":\"data4\"}"));
+ }
+ }
+
+}
diff --git a/airbyte-integrations/connectors/destination-cassandra/src/test-integration/java/io/airbyte/integrations/destination/cassandra/TestDataFactory.java b/airbyte-integrations/connectors/destination-cassandra/src/test-integration/java/io/airbyte/integrations/destination/cassandra/TestDataFactory.java
new file mode 100644
index 0000000..da3af9e
--- /dev/null
+++ b/airbyte-integrations/connectors/destination-cassandra/src/test-integration/java/io/airbyte/integrations/destination/cassandra/TestDataFactory.java
@@ -0,0 +1,80 @@
+/*
+ * Copyright (c) 2023 Airbyte, Inc., all rights reserved.
+ */
+
+package io.airbyte.integrations.destination.cassandra;
+
+import com.fasterxml.jackson.databind.JsonNode;
+import com.google.common.collect.ImmutableMap;
+import com.google.common.collect.Lists;
+import io.airbyte.commons.json.Jsons;
+import io.airbyte.protocol.models.v0.AirbyteMessage;
+import io.airbyte.protocol.models.v0.AirbyteRecordMessage;
+import io.airbyte.protocol.models.v0.AirbyteStream;
+import io.airbyte.protocol.models.v0.ConfiguredAirbyteCatalog;
+import io.airbyte.protocol.models.v0.ConfiguredAirbyteStream;
+import io.airbyte.protocol.models.v0.DestinationSyncMode;
+import io.airbyte.protocol.models.v0.SyncMode;
+import java.time.Instant;
+import java.util.List;
+
+public class TestDataFactory {
+
+ private TestDataFactory() {
+
+ }
+
+ static CassandraConfig createCassandraConfig(String username, String password, String address, int port) {
+ return new CassandraConfig(
+ "default_keyspace",
+ username,
+ password,
+ address,
+ port,
+ "datacenter1",
+ 1);
+ }
+
+ static JsonNode createJsonConfig(String username, String password, String address, int port) {
+ return Jsons.jsonNode(ImmutableMap.builder()
+ .put("keyspace", "default_keyspace")
+ .put("username", username)
+ .put("password", password)
+ .put("address", address)
+ .put("port", port)
+ .put("datacenter", "datacenter1")
+ .put("replication", 1)
+ .build());
+ }
+
+ static AirbyteMessage createAirbyteMessage(AirbyteMessage.Type type,
+ String streamName,
+ String namespace,
+ JsonNode data) {
+ return new AirbyteMessage()
+ .withType(type)
+ .withRecord(new AirbyteRecordMessage()
+ .withStream(streamName)
+ .withNamespace(namespace)
+ .withData(data)
+ .withEmittedAt(Instant.now().toEpochMilli()));
+ }
+
+ static AirbyteStream createAirbyteStream(String name, String namespace) {
+ return new AirbyteStream()
+ .withName(name)
+ .withNamespace(namespace)
+ .withSupportedSyncModes(Lists.newArrayList(SyncMode.FULL_REFRESH));
+ }
+
+ static ConfiguredAirbyteStream createConfiguredAirbyteStream(DestinationSyncMode syncMode, AirbyteStream stream) {
+ return new ConfiguredAirbyteStream()
+ .withDestinationSyncMode(syncMode)
+ .withStream(stream);
+ }
+
+ static ConfiguredAirbyteCatalog createConfiguredAirbyteCatalog(ConfiguredAirbyteStream... configuredStreams) {
+ return new ConfiguredAirbyteCatalog().withStreams(List.of(configuredStreams));
+ }
+
+}
diff --git a/airbyte-integrations/connectors/destination-cassandra/src/test/java/io/airbyte/integrations/destination/cassandra/CassandraConfigTest.java b/airbyte-integrations/connectors/destination-cassandra/src/test/java/io/airbyte/integrations/destination/cassandra/CassandraConfigTest.java
new file mode 100644
index 0000000..c425481
--- /dev/null
+++ b/airbyte-integrations/connectors/destination-cassandra/src/test/java/io/airbyte/integrations/destination/cassandra/CassandraConfigTest.java
@@ -0,0 +1,40 @@
+/*
+ * Copyright (c) 2023 Airbyte, Inc., all rights reserved.
+ */
+
+package io.airbyte.integrations.destination.cassandra;
+
+import static org.assertj.core.api.Assertions.assertThat;
+
+import org.junit.jupiter.api.BeforeEach;
+import org.junit.jupiter.api.Test;
+
+class CassandraConfigTest {
+
+ private CassandraConfig cassandraConfig;
+
+ @BeforeEach
+ void setup() {
+ var jsonNode = TestDataFactory.createJsonConfig(
+ "usr",
+ "pw",
+ "127.0.0.1",
+ 9042);
+ this.cassandraConfig = new CassandraConfig(jsonNode);
+ }
+
+ @Test
+ void testConfig() {
+
+ assertThat(cassandraConfig)
+ .hasFieldOrPropertyWithValue("keyspace", "default_keyspace")
+ .hasFieldOrPropertyWithValue("username", "usr")
+ .hasFieldOrPropertyWithValue("password", "pw")
+ .hasFieldOrPropertyWithValue("address", "127.0.0.1")
+ .hasFieldOrPropertyWithValue("port", 9042)
+ .hasFieldOrPropertyWithValue("datacenter", "datacenter1")
+ .hasFieldOrPropertyWithValue("replication", 1);
+
+ }
+
+}
diff --git a/airbyte-integrations/connectors/destination-cassandra/src/test/java/io/airbyte/integrations/destination/cassandra/CassandraNameTransformerTest.java b/airbyte-integrations/connectors/destination-cassandra/src/test/java/io/airbyte/integrations/destination/cassandra/CassandraNameTransformerTest.java
new file mode 100644
index 0000000..6922de7
--- /dev/null
+++ b/airbyte-integrations/connectors/destination-cassandra/src/test/java/io/airbyte/integrations/destination/cassandra/CassandraNameTransformerTest.java
@@ -0,0 +1,64 @@
+/*
+ * Copyright (c) 2023 Airbyte, Inc., all rights reserved.
+ */
+
+package io.airbyte.integrations.destination.cassandra;
+
+import static org.assertj.core.api.Assertions.assertThat;
+
+import org.junit.jupiter.api.BeforeAll;
+import org.junit.jupiter.api.Test;
+import org.junit.jupiter.api.TestInstance;
+
+@TestInstance(TestInstance.Lifecycle.PER_CLASS)
+class CassandraNameTransformerTest {
+
+ private CassandraNameTransformer cassandraNameTransformer;
+
+ @BeforeAll
+ void setup() {
+ var cassandraConfig = TestDataFactory.createCassandraConfig(
+ "usr",
+ "pw",
+ "127.0.0.1",
+ 9042);
+ this.cassandraNameTransformer = new CassandraNameTransformer(cassandraConfig);
+ }
+
+ @Test
+ void testOutputTable() {
+
+ var table = cassandraNameTransformer.outputTable("stream_name");
+
+ assertThat(table).matches("airbyte_raw_stream_name");
+
+ }
+
+ @Test
+ void testOutputTmpTable() {
+
+ var table = cassandraNameTransformer.outputTmpTable("stream_name");
+
+ assertThat(table).matches("airbyte_tmp_+[a-z]+_stream_name");
+
+ }
+
+ @Test
+ void testOutputKeyspace() {
+
+ var keyspace = cassandraNameTransformer.outputKeyspace("***keyspace^h");
+
+ assertThat(keyspace).matches("keyspace_h");
+
+ }
+
+ @Test
+ void outputColumn() {
+
+ var column = cassandraNameTransformer.outputColumn("_airbyte_data");
+
+ assertThat(column).matches("\"_airbyte_data\"");
+
+ }
+
+}
diff --git a/airbyte-integrations/connectors/destination-cassandra/src/test/java/io/airbyte/integrations/destination/cassandra/CassandraRecordConsumerTest.java b/airbyte-integrations/connectors/destination-cassandra/src/test/java/io/airbyte/integrations/destination/cassandra/CassandraRecordConsumerTest.java
new file mode 100644
index 0000000..dc35e4b
--- /dev/null
+++ b/airbyte-integrations/connectors/destination-cassandra/src/test/java/io/airbyte/integrations/destination/cassandra/CassandraRecordConsumerTest.java
@@ -0,0 +1,48 @@
+/*
+ * Copyright (c) 2023 Airbyte, Inc., all rights reserved.
+ */
+
+package io.airbyte.integrations.destination.cassandra;
+
+import io.airbyte.cdk.integrations.base.FailureTrackingAirbyteMessageConsumer;
+import io.airbyte.cdk.integrations.standardtest.destination.PerStreamStateMessageTest;
+import io.airbyte.protocol.models.v0.AirbyteMessage;
+import io.airbyte.protocol.models.v0.ConfiguredAirbyteCatalog;
+import java.util.function.Consumer;
+import org.junit.jupiter.api.BeforeEach;
+import org.junit.jupiter.api.extension.ExtendWith;
+import org.mockito.InjectMocks;
+import org.mockito.Mock;
+import org.mockito.junit.jupiter.MockitoExtension;
+
+@ExtendWith(MockitoExtension.class)
+public class CassandraRecordConsumerTest extends PerStreamStateMessageTest {
+
+ @Mock
+ private Consumer outputRecordCollector;
+
+ @InjectMocks
+ private CassandraMessageConsumer consumer;
+ @Mock
+ private CassandraConfig config;
+ @Mock
+ private ConfiguredAirbyteCatalog catalog;
+ @Mock
+ private CassandraCqlProvider provider;
+
+ @BeforeEach
+ public void init() {
+ consumer = new CassandraMessageConsumer(config, catalog, provider, outputRecordCollector);
+ }
+
+ @Override
+ protected Consumer getMockedConsumer() {
+ return outputRecordCollector;
+ }
+
+ @Override
+ protected FailureTrackingAirbyteMessageConsumer getMessageConsumer() {
+ return consumer;
+ }
+
+}
diff --git a/airbyte-integrations/connectors/destination-cassandra/src/test/java/io/airbyte/integrations/destination/cassandra/TestDataFactory.java b/airbyte-integrations/connectors/destination-cassandra/src/test/java/io/airbyte/integrations/destination/cassandra/TestDataFactory.java
new file mode 100644
index 0000000..da3af9e
--- /dev/null
+++ b/airbyte-integrations/connectors/destination-cassandra/src/test/java/io/airbyte/integrations/destination/cassandra/TestDataFactory.java
@@ -0,0 +1,80 @@
+/*
+ * Copyright (c) 2023 Airbyte, Inc., all rights reserved.
+ */
+
+package io.airbyte.integrations.destination.cassandra;
+
+import com.fasterxml.jackson.databind.JsonNode;
+import com.google.common.collect.ImmutableMap;
+import com.google.common.collect.Lists;
+import io.airbyte.commons.json.Jsons;
+import io.airbyte.protocol.models.v0.AirbyteMessage;
+import io.airbyte.protocol.models.v0.AirbyteRecordMessage;
+import io.airbyte.protocol.models.v0.AirbyteStream;
+import io.airbyte.protocol.models.v0.ConfiguredAirbyteCatalog;
+import io.airbyte.protocol.models.v0.ConfiguredAirbyteStream;
+import io.airbyte.protocol.models.v0.DestinationSyncMode;
+import io.airbyte.protocol.models.v0.SyncMode;
+import java.time.Instant;
+import java.util.List;
+
+public class TestDataFactory {
+
+ private TestDataFactory() {
+
+ }
+
+ static CassandraConfig createCassandraConfig(String username, String password, String address, int port) {
+ return new CassandraConfig(
+ "default_keyspace",
+ username,
+ password,
+ address,
+ port,
+ "datacenter1",
+ 1);
+ }
+
+ static JsonNode createJsonConfig(String username, String password, String address, int port) {
+ return Jsons.jsonNode(ImmutableMap.builder()
+ .put("keyspace", "default_keyspace")
+ .put("username", username)
+ .put("password", password)
+ .put("address", address)
+ .put("port", port)
+ .put("datacenter", "datacenter1")
+ .put("replication", 1)
+ .build());
+ }
+
+ static AirbyteMessage createAirbyteMessage(AirbyteMessage.Type type,
+ String streamName,
+ String namespace,
+ JsonNode data) {
+ return new AirbyteMessage()
+ .withType(type)
+ .withRecord(new AirbyteRecordMessage()
+ .withStream(streamName)
+ .withNamespace(namespace)
+ .withData(data)
+ .withEmittedAt(Instant.now().toEpochMilli()));
+ }
+
+ static AirbyteStream createAirbyteStream(String name, String namespace) {
+ return new AirbyteStream()
+ .withName(name)
+ .withNamespace(namespace)
+ .withSupportedSyncModes(Lists.newArrayList(SyncMode.FULL_REFRESH));
+ }
+
+ static ConfiguredAirbyteStream createConfiguredAirbyteStream(DestinationSyncMode syncMode, AirbyteStream stream) {
+ return new ConfiguredAirbyteStream()
+ .withDestinationSyncMode(syncMode)
+ .withStream(stream);
+ }
+
+ static ConfiguredAirbyteCatalog createConfiguredAirbyteCatalog(ConfiguredAirbyteStream... configuredStreams) {
+ return new ConfiguredAirbyteCatalog().withStreams(List.of(configuredStreams));
+ }
+
+}
diff --git a/airbyte-integrations/connectors/destination-cumulio/Dockerfile b/airbyte-integrations/connectors/destination-cumulio/Dockerfile
new file mode 100644
index 0000000..90e3f08
--- /dev/null
+++ b/airbyte-integrations/connectors/destination-cumulio/Dockerfile
@@ -0,0 +1,42 @@
+FROM python:3.9.11 as base
+# FROM python:3.9.11-alpine3.15 as base
+# switched from alpine as there were tons of errors (in case you want to switch back to alpine)
+# - https://stackoverflow.com/a/57485724/5246670
+# - numpy error: https://stackoverflow.com/a/22411624/5246670
+# - libstdc++ https://github.com/amancevice/docker-pandas/issues/12#issuecomment-717215043
+# - musl-dev linux-headers g++ because of: https://stackoverflow.com/a/40407099/5246670
+
+# build and load all requirements
+FROM base as builder
+WORKDIR /airbyte/integration_code
+
+# upgrade pip to the latest version
+RUN apt-get update && apt-get -y upgrade \
+ && pip install --upgrade pip
+
+COPY setup.py ./
+# install necessary packages to a temporary folder
+RUN pip install --prefix=/install .
+# build a clean environment
+FROM base
+# RUN conda install -c conda-forge python-duckdb
+WORKDIR /airbyte/integration_code
+
+# copy all loaded and built libraries to a pure basic image
+COPY --from=builder /install /usr/local
+# add default timezone settings
+COPY --from=builder /usr/share/zoneinfo/Etc/UTC /etc/localtime
+RUN echo "Etc/UTC" > /etc/timezone
+
+#adding duckdb manually (outside of setup.py - lots of errors)
+RUN pip install duckdb
+
+# copy payload code only
+COPY main.py ./
+COPY destination_cumulio ./destination_cumulio
+
+ENV AIRBYTE_ENTRYPOINT "python /airbyte/integration_code/main.py"
+ENTRYPOINT ["python", "/airbyte/integration_code/main.py"]
+
+LABEL io.airbyte.version=0.1.0
+LABEL io.airbyte.name=airbyte/destination-cumulio
diff --git a/airbyte-integrations/connectors/destination-cumulio/README.md b/airbyte-integrations/connectors/destination-cumulio/README.md
new file mode 100644
index 0000000..6226110
--- /dev/null
+++ b/airbyte-integrations/connectors/destination-cumulio/README.md
@@ -0,0 +1,98 @@
+# Cumulio Destination
+
+This is the repository for the Cumulio destination connector, written in Python.
+For information about how to use this connector within Airbyte, see [the documentation](https://docs.airbyte.com/integrations/destinations/cumulio).
+
+## Local development
+
+### Prerequisites
+**To iterate on this connector, make sure to complete this prerequisites section.**
+
+#### Minimum Python version required `= 3.7.0`
+
+#### Build & Activate Virtual Environment and install dependencies
+From this connector directory, create a virtual environment:
+```
+python -m venv .venv
+```
+
+This will generate a virtualenv for this module in `.venv/`. Make sure this venv is active in your
+development environment of choice. To activate it from the terminal, run:
+```
+source .venv/bin/activate
+pip install -r requirements.txt
+```
+If you are in an IDE, follow your IDE's instructions to activate the virtualenv.
+
+Note that while we are installing dependencies from `requirements.txt`, you should only edit `setup.py` for your dependencies. `requirements.txt` is
+used for editable installs (`pip install -e`) to pull in Python dependencies from the monorepo and will call `setup.py`.
+If this is mumbo jumbo to you, don't worry about it, just put your deps in `setup.py` but install using `pip install -r requirements.txt` and everything
+should work as you expect.
+
+#### Create credentials
+**If you are a community contributor**, follow the instructions in the [documentation](https://docs.airbyte.com/integrations/destinations/cumulio)
+to generate the necessary credentials. Then create a file `secrets/config.json` conforming to the `destination_cumulio/spec.json` file.
+Note that the `secrets` directory is gitignored by default, so there is no danger of accidentally checking in sensitive information.
+See `integration_tests/sample_config.json` for a sample config file.
+
+**If you are an Airbyte core member**, copy the credentials in Lastpass under the secret name `destination cumulio test creds`
+and place them into `secrets/config.json`.
+
+### Locally running the connector
+```
+python main.py spec
+python main.py check --config secrets/config.json
+python main.py write --config secrets/config.json --catalog integration_tests/configured_catalog.json
+```
+
+### Locally running the connector docker image
+
+
+#### Build
+**Via [`airbyte-ci`](https://github.com/airbytehq/airbyte/blob/master/airbyte-ci/connectors/pipelines/README.md) (recommended):**
+```bash
+airbyte-ci connectors --name=destination-cumulio build
+```
+
+An image will be built with the tag `airbyte/destination-cumulio:dev`.
+
+**Via `docker build`:**
+```bash
+docker build -t airbyte/destination-cumulio:dev .
+```
+
+#### Run
+Then run any of the connector commands as follows:
+```
+docker run --rm airbyte/destination-cumulio:dev spec
+docker run --rm -v $(pwd)/secrets:/secrets airbyte/destination-cumulio:dev check --config /secrets/config.json
+# messages.jsonl is a file containing line-separated JSON representing AirbyteMessages
+cat messages.jsonl | docker run --rm -v $(pwd)/secrets:/secrets -v $(pwd)/integration_tests:/integration_tests airbyte/destination-cumulio:dev write --config /secrets/config.json --catalog /integration_tests/configured_catalog.json
+```
+
+## Testing
+You can run our full test suite locally using [`airbyte-ci`](https://github.com/airbytehq/airbyte/blob/master/airbyte-ci/connectors/pipelines/README.md):
+```bash
+airbyte-ci connectors --name=destination-cumulio test
+```
+
+### Customizing acceptance Tests
+Customize `acceptance-test-config.yml` file to configure tests. See [Connector Acceptance Tests](https://docs.airbyte.com/connector-development/testing-connectors/connector-acceptance-tests-reference) for more information.
+If your connector requires to create or destroy resources for use during acceptance tests create fixtures for it and place them inside integration_tests/acceptance.py.
+
+## Dependency Management
+All of your dependencies should go in `setup.py`, NOT `requirements.txt`. The requirements file is only used to connect internal Airbyte dependencies in the monorepo for local development.
+We split dependencies between two groups, dependencies that are:
+* required for your connector to work need to go to `MAIN_REQUIREMENTS` list.
+* required for the testing need to go to `TEST_REQUIREMENTS` list
+
+### Publishing a new version of the connector
+You've checked out the repo, implemented a million dollar feature, and you're ready to share your changes with the world. Now what?
+1. Make sure your changes are passing our test suite: `airbyte-ci connectors --name=destination-cumulio test`
+2. Bump the connector version in `metadata.yaml`: increment the `dockerImageTag` value. Please follow [semantic versioning for connectors](https://docs.airbyte.com/contributing-to-airbyte/resources/pull-requests-handbook/#semantic-versioning-for-connectors).
+3. Make sure the `metadata.yaml` content is up to date.
+4. Make the connector documentation and its changelog is up to date (`docs/integrations/destinations/cumulio.md`).
+5. Create a Pull Request: use [our PR naming conventions](https://docs.airbyte.com/contributing-to-airbyte/resources/pull-requests-handbook/#pull-request-title-convention).
+6. Pat yourself on the back for being an awesome contributor.
+7. Someone from Airbyte will take a look at your PR and iterate with you to merge it into master.
+
diff --git a/airbyte-integrations/connectors/destination-cumulio/destination_cumulio/__init__.py b/airbyte-integrations/connectors/destination-cumulio/destination_cumulio/__init__.py
new file mode 100644
index 0000000..5dda7de
--- /dev/null
+++ b/airbyte-integrations/connectors/destination-cumulio/destination_cumulio/__init__.py
@@ -0,0 +1,8 @@
+#
+# Copyright (c) 2022 Airbyte, Inc., all rights reserved.
+#
+
+
+from .destination import DestinationCumulio
+
+__all__ = ["DestinationCumulio"]
diff --git a/airbyte-integrations/connectors/destination-cumulio/destination_cumulio/client.py b/airbyte-integrations/connectors/destination-cumulio/destination_cumulio/client.py
new file mode 100644
index 0000000..10728e3
--- /dev/null
+++ b/airbyte-integrations/connectors/destination-cumulio/destination_cumulio/client.py
@@ -0,0 +1,367 @@
+#
+# Copyright (c) 2023 Airbyte, Inc., all rights reserved.
+#
+
+
+import time
+from logging import Logger
+from typing import Any, Mapping
+
+from cumulio.cumulio import Cumulio # type: ignore
+
+# def _retry_with_backoff(
+# fn: Callable,
+# backoff_times_in_seconds: list[int]
+# ):
+# while True:
+# try:
+# return fn()
+
+
+class CumulioClient:
+ # Cumul.io will auto-generate a UUID that is unique to the dataset created.
+ # To ensure a consistent flow to the same dataset, we'll add a tag to the dataset:
+ # the tag is a combination of the prefix below and the stream name.
+ # This allows us to retrieve the same dataset resource upon further sync schedules.
+ TAG_PREFIX = "[AIRBYTE - DO NOT DELETE] - "
+
+ REPLACE_TAG = "REPLACE DATA"
+
+ INITIAL_DATASET_NAME_PREFIX = "Airbyte - "
+
+ BACKOFF_TIMES_IN_SECONDS = [300, 600, 1200]
+
+ def __init__(self, config: Mapping[str, Any], logger: Logger):
+ self.logger = logger
+ self.client = Cumulio(config["api_key"], config["api_token"], config["api_host"])
+
+ def batch_write(
+ self,
+ stream_name: str,
+ write_buffer: list,
+ column_headers: list,
+ is_in_overwrite_sync_mode: bool,
+ is_first_batch: bool,
+ update_metadata: bool,
+ ):
+ """Write a list of data (array of arrays) in a specific sync mode to Cumul.io."""
+ if len(write_buffer) == 0 or (len(write_buffer) == 1 and len(write_buffer[0]) == 0):
+ return
+
+ dataset_id = self._get_dataset_id_from_stream_name(stream_name)
+ if dataset_id is None:
+ dataset_id = self._push_batch_to_new_dataset(stream_name, write_buffer, column_headers)
+ else:
+ is_in_replace_mode = self._dataset_contains_replace_tag(dataset_id)
+ first_batch_replace = is_first_batch and (is_in_overwrite_sync_mode or is_in_replace_mode)
+ self._push_batch_to_existing_dataset(
+ dataset_id,
+ write_buffer,
+ column_headers,
+ first_batch_replace,
+ update_metadata,
+ )
+
+ self.logger.info(f"Successfully pushed {len(write_buffer)} rows to Cumul.io's data warehouse in a dataset with id {dataset_id}.")
+
+ def test_api_token(self):
+ """Test an API key and token by retrieving it."""
+ self.logger.info("Checking API host, key and token.")
+ data = self.client.get("authorization", {"where": {"type": "api"}})
+ # if response contains a count 0, the API host, key and token combination is unknown to Cumul.io.
+ if data["count"] == 0:
+ raise Exception(
+ "Unknown combination of API host, key and token. Can you verify whether you've specified the correct combination of "
+ "Cumul.io API host, key, and token?"
+ )
+ self.logger.info("API host, key and token combination is valid.")
+
+ def test_data_push(self, stream_name: str, data: list[list[Any]], columns: list[str]):
+ """[DEPRECATED] This method is no longer in use as it results in a lot of overhead.
+ Test pushing dummy data into a dataset, and delete the dataset afterwards."""
+
+ self.logger.info("Starting data push of dummy data.")
+ self.batch_write(stream_name, data, columns, True, True, True)
+ self.logger.info("Finished data push of dummy data. Will delete dummy dataset.")
+
+ self.delete_dataset(stream_name)
+ self.logger.info("Finished deleting dummy dataset.")
+
+ def delete_dataset(self, stream_name: str):
+ """Delete a dataset in Cumul.io.
+ This should only be used for testing purposes. Currently used in:
+ - Integration tests
+ - When pushing dummy data to an example dataset during "check" of Airbyte destination connector (see destination.py check method)
+ """
+ dataset_id = self._get_dataset_id_from_stream_name(stream_name)
+ if dataset_id is not None:
+ return self.client.delete("securable", dataset_id)
+
+ self.logger.info(f"No dataset for stream {stream_name} found to delete.")
+
+ def get_ordered_columns(self, stream_name: str):
+ """Return a list of ordered columns (based on their order in Cumul.io).
+ The dataset is retrieved based on a Cumul.io tag that includes the stream_name.
+ """
+ dataset_and_columns = self.get_dataset_and_columns_from_stream_name(stream_name)
+ if dataset_and_columns is None:
+ # Dataset hasn't been created yet on Cumul.io's side.
+ return []
+ # Sort columns based on the order property.
+ order_sorted_columns = sorted(dataset_and_columns["columns"], key=lambda x: x["order"])
+ # Return a list of column source names.
+ return [column["source_name"] for column in order_sorted_columns]
+
+ def get_dataset_and_columns_from_stream_name(self, stream_name: str):
+ """Return a dataset and its columns based on a Cumul.io tag that includes the stream_name."""
+ result = self.client.get(
+ "securable",
+ {
+ "where": {"type": "dataset"},
+ "attributes": ["id", "name"],
+ "include": [
+ {
+ "model": "Tag",
+ "where": {"tag": self.TAG_PREFIX + stream_name},
+ "attributes": ["id", "tag"],
+ "jointype": "inner",
+ },
+ {
+ "model": "Column",
+ "attributes": ["id", "source_name", "order"],
+ "jointype": "inner",
+ },
+ ],
+ },
+ )
+ if result["count"] > 1:
+ raise Exception(
+ f"More than one dataset has been returned, could you verify whether the tag for stream {stream_name} is set up "
+ f"correctly in Cumul.io (expected a tag '{self.TAG_PREFIX}{stream_name}')?"
+ )
+ # A count of zero means that the dataset has not been created on Cumul.io's side yet.
+ # We'll return None to indicate this.
+ elif result["count"] == 0:
+ return None
+ # return dataset and its columns.
+ return result["rows"][0]
+
+ def set_replace_tag_on_dataset(self, stream_name: str):
+ """Add a "replace" tag to a specific dataset based on the stream_name.
+ The "replace" tag is used to ensure that the next sync will replace the existing data.
+ """
+ dataset_id = self._get_dataset_id_from_stream_name(stream_name)
+ if dataset_id is not None:
+ self.logger.info(
+ f"A tag will be added to the dataset with id {dataset_id} to replace the existing data upon next sync. "
+ f"As a result, the existing data will not be replaced until the next sync has ran. "
+ f"This avoids empty datasets which cause 'No data' to be displayed upon querying them."
+ )
+ return self._associate_tag_dataset_id(self.REPLACE_TAG, dataset_id)
+ self.logger.debug(
+ f"No dataset found to set Replace tag on (looking for stream name '{stream_name}'), "
+ f"this might be due to the dataset not existing yet on Cumul.io's side."
+ )
+
+ def _push_batch_to_new_dataset(self, stream_name: str, write_buffer: list[list[Any]], column_headers: list[str]):
+ properties = {
+ "type": "create",
+ "data": write_buffer,
+ "options": {
+ "header": column_headers,
+ "update_metadata": True,
+ "name": {"en": self.INITIAL_DATASET_NAME_PREFIX + stream_name},
+ },
+ }
+ result: Mapping[str, Any] = {}
+ data_is_pushed = False
+ try_count = 0
+ while (not data_is_pushed) and try_count < len(self.BACKOFF_TIMES_IN_SECONDS):
+ try:
+ self.logger.info(
+ f"Pushing {len(write_buffer)} rows to Cumul.io's data warehouse in a new Cumul.io dataset "
+ f"with name {self.INITIAL_DATASET_NAME_PREFIX}{stream_name}."
+ )
+
+ result = self.client.create("data", properties)
+ data_is_pushed = True
+
+ except Exception as e:
+ if "Unauthorized" in str(e):
+ raise Exception(
+ f"Not able to push a batch of data to a new dataset due to an 'Unauthorized' error. "
+ f"Please verify that your API key and token are still valid!"
+ f"Error: {e}"
+ )
+ elif try_count + 1 >= len(self.BACKOFF_TIMES_IN_SECONDS):
+ raise Exception(f"Exception while creating new dataset after {len(self.BACKOFF_TIMES_IN_SECONDS)} retries: {e}")
+
+ seconds_to_backoff = self.BACKOFF_TIMES_IN_SECONDS[try_count]
+ try_count += 1
+ self.logger.info(
+ f"Error pushing data to a new dataset during try {try_count}, retrying in {seconds_to_backoff} seconds. Error: {e}"
+ )
+ time.sleep(seconds_to_backoff)
+
+ dataset_id = result["rows"][0]["id"]
+ try:
+ # Add a tag to the dataset to allow retrieving it upon further syncs / batch writes
+ self._associate_tag_dataset_id(stream_name, dataset_id)
+ except Exception as e:
+ raise Exception(
+ f"The data has been stored successfully, but an error occurred while associating a required tag to the "
+ f"dataset (id: {dataset_id}). This will likely cause issues upon further synchronizations. The following "
+ f"error occurred: ",
+ e,
+ )
+
+ return dataset_id
+
+ def _push_batch_to_existing_dataset(
+ self,
+ dataset_id: str,
+ write_buffer: list[list[Any]],
+ column_headers: list[str],
+ first_batch_replace: bool,
+ update_metadata: bool,
+ ):
+ cumulio_sync_type = "replace" if first_batch_replace else "append"
+
+ properties = {
+ "type": cumulio_sync_type,
+ "data": write_buffer,
+ "securable_id": dataset_id,
+ "options": {
+ "header": column_headers,
+ "update_metadata": update_metadata,
+ },
+ }
+ data_is_pushed = False
+ try_count = 0
+ while (not data_is_pushed) and try_count < len(self.BACKOFF_TIMES_IN_SECONDS):
+ try:
+ self.logger.info(
+ f"Pushing {len(write_buffer)} rows to Cumul.io dataset with id {dataset_id} in {cumulio_sync_type} mode, "
+ f"{'while' if update_metadata else 'not'} updating the columns of that dataset."
+ )
+ self.client.create("data", properties)
+
+ data_is_pushed = True
+
+ if first_batch_replace:
+ # Try to remove replace tag to ensure next syncs do not replace existing data.
+ self._remove_replace_tag_dataset_id_association(dataset_id)
+
+ except RuntimeError as e:
+ if "Unauthorized" in str(e):
+ raise Exception(
+ f"Not able to push a batch of data to dataset {dataset_id} due to an 'Unauthorized' error. "
+ f"Please verify that your API key and token are still valid!"
+ f"Error: {e}"
+ )
+ elif try_count + 1 >= len(self.BACKOFF_TIMES_IN_SECONDS):
+ raise Exception(
+ f"Exception while pushing to existing dataset {dataset_id} after {len(self.BACKOFF_TIMES_IN_SECONDS)} retries: ",
+ e,
+ )
+
+ seconds_to_backoff = self.BACKOFF_TIMES_IN_SECONDS[try_count]
+ try_count += 1
+
+ self.logger.info(
+ f"Error pushing data to existing dataset {dataset_id} during try {try_count}, retrying in {seconds_to_backoff} seconds."
+ )
+
+ time.sleep(seconds_to_backoff)
+
+ def _dataset_contains_replace_tag(self, dataset_id: str):
+ """Return a boolean to indicate whether a dataset contains the "replace" tag."""
+ result = self.client.get(
+ "securable",
+ {
+ "where": {"type": "dataset", "id": dataset_id},
+ "attributes": ["id", "name"],
+ "include": [
+ {
+ "model": "Tag",
+ "where": {"tag": self.TAG_PREFIX + self.REPLACE_TAG},
+ "attributes": ["id", "tag"],
+ "jointype": "inner",
+ }
+ ],
+ },
+ )
+ return False if result["count"] == 0 else True
+
+ def _remove_replace_tag_dataset_id_association(self, dataset_id: str):
+ """Remove the "replace" tag from a specific dataset."""
+ tag_id = self._get_tag_id(self.REPLACE_TAG)
+ if tag_id is not None:
+ return self._dissociate_tag_with_dataset_id(tag_id, dataset_id)
+ self.logger.debug(
+ f"No replace tag found, so could not remove for Cumul.io dataset with id {dataset_id}."
+ f"This could be expected as the stream might be configured in overwrite mode."
+ )
+
+ def _get_dataset_id_from_stream_name(self, stream_name: str):
+ """Return a dataset ID based on a Cumul.io tag that includes the stream_name."""
+ result = self.client.get(
+ "securable",
+ {
+ "where": {"type": "dataset"},
+ "attributes": ["id", "name"],
+ "include": [
+ {
+ "model": "Tag",
+ "where": {"tag": self.TAG_PREFIX + stream_name},
+ "attributes": ["id", "tag"],
+ "jointype": "inner",
+ }
+ ],
+ },
+ )
+ if result["count"] > 1:
+ raise Exception(
+ f"More than one dataset has been found, could you verify whether the tag for stream {stream_name} is set up "
+ f"correctly in Cumul.io (expected a tag '{self.TAG_PREFIX}{stream_name}' on a single dataset)?"
+ )
+ # A count of zero means that the dataset has not been created on Cumul.io's side yet.
+ # We'll return None to indicate this.
+ elif result["count"] == 0:
+ return None
+ # return dataset ID
+ return result["rows"][0]["id"]
+
+ def _associate_tag_dataset_id(self, tag_name: str, dataset_id: str):
+ """Ensure that a specific stream name tag is associated to a dataset ID.
+ Optionally the Tag is created and associated if not existing yet.
+ """
+ # A tag should be unique and cannot be created multiple times.
+ # In order to ensure that the association doesn't fail,
+ # we'll first try to retrieve the tag and then either
+ # associate it with the newly created securable,
+ # or create & associate it.
+ tag_id = self._get_tag_id(tag_name)
+ if tag_id is not None:
+ return self._associate_tag_with_dataset_id(tag_id, dataset_id)
+ return self._create_and_associate_stream_name_tag_with_dataset_id(tag_name, dataset_id)
+
+ def _get_tag_id(self, tag_name: str):
+ """Return a Tag ID using the stream name."""
+ result = self.client.get("tag", {"where": {"tag": self.TAG_PREFIX + tag_name}})
+ if result["count"] == 0:
+ return None
+ return result["rows"][0]["id"]
+
+ def _associate_tag_with_dataset_id(self, tag_id: str, dataset_id: str):
+ return self.client.associate("tag", tag_id, "Securables", dataset_id)
+
+ def _dissociate_tag_with_dataset_id(self, tag_id: str, dataset_id: str):
+ return self.client.dissociate("tag", tag_id, "Securables", dataset_id)
+
+ def _create_and_associate_stream_name_tag_with_dataset_id(self, tag_name: str, dataset_id: str):
+ return self.client.create(
+ "tag",
+ {"tag": self.TAG_PREFIX + tag_name},
+ [{"role": "Securables", "id": dataset_id}],
+ )
diff --git a/airbyte-integrations/connectors/destination-cumulio/destination_cumulio/destination.py b/airbyte-integrations/connectors/destination-cumulio/destination_cumulio/destination.py
new file mode 100644
index 0000000..61c6c5a
--- /dev/null
+++ b/airbyte-integrations/connectors/destination-cumulio/destination_cumulio/destination.py
@@ -0,0 +1,101 @@
+#
+# Copyright (c) 2023 Airbyte, Inc., all rights reserved.
+#
+
+
+from logging import Logger, getLogger
+from typing import Any, Iterable, Mapping
+
+from airbyte_cdk.destinations import Destination
+from airbyte_cdk.models import AirbyteConnectionStatus, AirbyteMessage, ConfiguredAirbyteCatalog, DestinationSyncMode, Status, Type
+from destination_cumulio.client import CumulioClient
+from destination_cumulio.writer import CumulioWriter
+
+logger = getLogger("airbyte")
+
+
+class DestinationCumulio(Destination):
+ def write(
+ self,
+ config: Mapping[str, Any],
+ configured_catalog: ConfiguredAirbyteCatalog,
+ input_messages: Iterable[AirbyteMessage],
+ ) -> Iterable[AirbyteMessage]:
+ """Reads the input stream of messages, config, and catalog to write data to the destination.
+
+ This method returns an iterable (typically a generator of AirbyteMessages via yield) containing state messages received in the
+ input message stream. Outputting a state message means that every AirbyteRecordMessage which came before it has been successfully
+ persisted to the destination. This is used to ensure fault tolerance in the case that a sync fails before fully completing,
+ then the source is given the last state message output from this method as the starting point of the next sync.
+
+ :param config: dict of JSON configuration matching the configuration declared in spec.json. Current format:
+ {
+ 'api_host': '',
+ 'api_key': '',
+ 'api_token': ''
+ }
+ :param configured_catalog: schema of the data being received and how it should be persisted in the destination.
+ :param input_messages: stream of input messages received from the source.
+
+ :return: Iterable of AirbyteStateMessages wrapped in AirbyteMessage structs.
+ """
+ writer = CumulioWriter(config, configured_catalog, logger)
+
+ for configured_stream in configured_catalog.streams:
+ # Cumul.io does not support removing all data from an existing dataset, and removing the dataset itself will break existing
+ # dashboards built on top of it.
+ # Instead, the connector will make sure to push the first batch of data as a "replace" action: this will cause all existing data
+ # to be replaced with the first batch of data. All next batches will be pushed as an "append" action.
+ if configured_stream.destination_sync_mode == DestinationSyncMode.overwrite:
+ writer.delete_stream_entries(configured_stream.stream.name)
+
+ for message in input_messages:
+ if message.type == Type.STATE:
+ # Yielding a state message indicates that all records which came before it have been written to the destination.
+ # We flush all write buffers in the writer, and then output the state message itself.
+ writer.flush_all()
+ yield message
+ elif message.type == Type.RECORD:
+ record = message.record
+ assert record is not None
+ assert record.stream is not None
+ assert record.data is not None
+ writer.queue_write_operation(record.stream, record.data)
+ else:
+ # ignore other message types for now
+ continue
+
+ # Make sure to flush any records still in the queue
+ writer.flush_all()
+
+ def check(self, logger: Logger, config: Mapping[str, Any]) -> AirbyteConnectionStatus:
+ """Tests if the input configuration can be used to successfully connect to the destination with the needed permissions.
+
+ This will test whether the combination of the Cumul.io API host, API key and API token is valid.
+
+ :param logger: Logging object to display debug/info/error to the logs
+ (logs will not be accessible via airbyte UI if they are not passed to this logger)
+ :param config: Json object containing the configuration of this destination, content of this json is as specified in
+ the properties of the spec.json file
+
+ :return: AirbyteConnectionStatus indicating a Success or Failure
+ """
+ try:
+ client = CumulioClient(config, logger)
+ # Verify access by hitting Cumul.io authentication endpoint
+ client.test_api_token()
+
+ # We're no longer using testing a data push as this might take some time.
+ # If the API host, key, and token are valid, we can assume Data can be pushed using it.
+
+ return AirbyteConnectionStatus(status=Status.SUCCEEDED)
+ except Exception as e:
+ # The Cumul.io Python SDK currently returns a generic error message when an issue occurs during the request,
+ # or when the request return e.g. a 401 Unauthorized HTTP response code.
+ # We'll assume that either the API host is incorrect, or the API key and token are no longer valid.
+ if not e == "Something went wrong":
+ return AirbyteConnectionStatus(status=Status.FAILED, message=f"An exception occurred: {repr(e)}")
+ return AirbyteConnectionStatus(
+ status=Status.FAILED,
+ message="An exception occurred: could it be that the API host is incorrect, or the API key and token are no longer valid?",
+ )
diff --git a/airbyte-integrations/connectors/destination-cumulio/destination_cumulio/spec.json b/airbyte-integrations/connectors/destination-cumulio/destination_cumulio/spec.json
new file mode 100644
index 0000000..dff9ec3
--- /dev/null
+++ b/airbyte-integrations/connectors/destination-cumulio/destination_cumulio/spec.json
@@ -0,0 +1,37 @@
+{
+ "documentationUrl": "https://docs.airbyte.com/integrations/destinations/cumulio",
+ "supported_destination_sync_modes": ["overwrite", "append"],
+ "supportsIncremental": true,
+ "supportsDBT": false,
+ "supportsNormalization": false,
+ "connectionSpecification": {
+ "$schema": "http://json-schema.org/draft-07/schema#",
+ "title": "Destination Cumulio",
+ "type": "object",
+ "required": ["api_host", "api_key", "api_token"],
+ "additionalProperties": true,
+ "properties": {
+ "api_host": {
+ "title": "Cumul.io API Host URL",
+ "description": "URL of the Cumul.io API (e.g. 'https://api.cumul.io', 'https://api.us.cumul.io', or VPC-specific API url). Defaults to 'https://api.cumul.io'.",
+ "default": "https://api.cumul.io",
+ "type": "string",
+ "order": 0
+ },
+ "api_key": {
+ "title": "Cumul.io API Key",
+ "description": "An API key generated in Cumul.io's platform (can be generated here: https://app.cumul.io/start/profile/integration).",
+ "type": "string",
+ "airbyte_secret": true,
+ "order": 1
+ },
+ "api_token": {
+ "title": "Cumul.io API Token",
+ "description": "The corresponding API token generated in Cumul.io's platform (can be generated here: https://app.cumul.io/start/profile/integration).",
+ "type": "string",
+ "airbyte_secret": true,
+ "order": 2
+ }
+ }
+ }
+}
diff --git a/airbyte-integrations/connectors/destination-cumulio/destination_cumulio/writer.py b/airbyte-integrations/connectors/destination-cumulio/destination_cumulio/writer.py
new file mode 100644
index 0000000..93c8d05
--- /dev/null
+++ b/airbyte-integrations/connectors/destination-cumulio/destination_cumulio/writer.py
@@ -0,0 +1,205 @@
+#
+# Copyright (c) 2023 Airbyte, Inc., all rights reserved.
+#
+
+
+import json
+from logging import Logger
+from typing import Any, Mapping
+
+from airbyte_cdk.models import ConfiguredAirbyteCatalog, ConfiguredAirbyteStream, DestinationSyncMode
+from destination_cumulio.client import CumulioClient
+
+
+def _convert_airbyte_configured_stream_into_headers_dict(
+ configured_stream: ConfiguredAirbyteStream,
+):
+ """Return a dict of column names and types based on the configured Airbyte stream.
+ Note that the Airbyte types are currently not used due to Cumul.io's Data API Service not supporting specifying column types.
+ """
+ column_headers = {}
+ for column_header in configured_stream.stream.json_schema["properties"]:
+ if "airbyte-type" in configured_stream.stream.json_schema["properties"][column_header]:
+ column_headers[column_header] = {
+ "airbyte-type": configured_stream.stream.json_schema["properties"][column_header]["airbyte-type"]
+ }
+ else:
+ column_headers[column_header] = {"airbyte-type": configured_stream.stream.json_schema["properties"][column_header]["type"]}
+ return column_headers
+
+
+class CumulioWriter:
+ # Cumul.io's Data API service has a limit of pushing 10 000 data points (i.e. rows) in a single request.
+ # (see note here: https://developer.cumul.io/?shell#data_create)
+ FLUSH_INTERVAL = 10000
+
+ def __init__(
+ self,
+ config: Mapping[str, Any],
+ configured_catalog: ConfiguredAirbyteCatalog,
+ logger: Logger,
+ ):
+ """Create a single Cumul.io Client and a dict of writers.
+ The Cumul.io Client will be used to send API requests to Cumul.io's API.
+ The writers dict will contain one element for each configured_stream in the connection.
+ Each of these dicts have a stream-specific configuration and write buffer.
+ """
+ self.logger = logger
+ self.client = CumulioClient(config, logger)
+ self.writers = self._create_writers(configured_catalog)
+
+ def queue_write_operation(self, stream_name: str, data: Mapping):
+ """Queue data in a specific writer buffer.
+ It flushes the buffer in case it has reached the flush interval.
+ """
+ cumulio_data = self.transform_data(stream_name, data)
+ self.writers[stream_name]["write_buffer"].append(cumulio_data)
+ if len(self.writers[stream_name]["write_buffer"]) == self.FLUSH_INTERVAL:
+ self.flush(stream_name)
+
+ def flush_all(self):
+ """Flush all writer buffers."""
+ for stream_name in self.writers:
+ self.flush(stream_name)
+
+ def flush(self, stream_name: str):
+ """Write a batch of data from the write buffer using the Cumul.io client."""
+ self.client.batch_write(
+ stream_name,
+ self.writers[stream_name]["write_buffer"],
+ [column_header["name"] for column_header in self.writers[stream_name]["column_headers"]],
+ self.writers[stream_name]["is_in_overwrite_sync_mode"],
+ self.writers[stream_name]["is_first_batch"],
+ self.writers[stream_name]["update_metadata"],
+ )
+ self.writers[stream_name]["write_buffer"].clear()
+ if self.writers[stream_name]["is_first_batch"]:
+ self.writers[stream_name]["is_first_batch"] = False
+
+ def transform_data(self, stream_name: str, airbyte_data: Mapping) -> list[Any]:
+ """Transform Airbyte data (one row) into Cumul.io's expected data format (a list in the appropriate order).
+ If data for a specific column is not included in the Airbyte data, the value will be None.
+ If data for a specific column in the Airbyte data is not recognized, it will be ignored as extraneous.
+ (see here: https://docs.airbyte.com/understanding-airbyte/airbyte-protocol/#output-4)
+ """
+ try:
+ self.writers[stream_name]
+ except KeyError:
+ raise Exception(f"The stream {stream_name} is not defined in the configured_catalog and won't thus be streamed.")
+
+ data: list[Any] = [None for i in range(len(self.writers[stream_name]["column_headers"]))]
+ for column in airbyte_data:
+ unknown_data = True
+ index: int = 0
+ for column_header in self.writers[stream_name]["column_headers"]:
+ if column_header["name"] == column:
+ unknown_data = False
+ # Cumul.io doesn't support storing or querying nested (list, dict) or boolean data.
+ # we'll stringify this data via json.dumps
+ if (
+ isinstance(airbyte_data[column], list)
+ or isinstance(airbyte_data[column], dict)
+ or isinstance(airbyte_data[column], bool)
+ ):
+ data[index] = json.dumps(airbyte_data[column])
+ else:
+ data[index] = airbyte_data[column]
+ index += 1
+ if unknown_data:
+ self.logger.debug(
+ f"The value with name {column} has not been defined in the ConfiguredAirbyteStream and will thus be "
+ f"ignored as extraneous."
+ )
+ return data
+
+ def delete_stream_entries(self, stream_name: str):
+ """Set a "replace" tag on a dataset to ensure all existing data will be replaced upon next synchronization."""
+ return self.client.set_replace_tag_on_dataset(stream_name)
+
+ def _create_writers(self, configured_catalog: ConfiguredAirbyteCatalog):
+ """Return a set of writers, one for each stream in the configured_catalog.
+ This method will also merge the Cumul.io columns for the stream's dataset, if existing."""
+ writers = {}
+ for configured_stream in configured_catalog.streams:
+ result = self._merge_cumulio_and_airbyte_column_headers(configured_stream)
+ writers[configured_stream.stream.name] = {
+ "write_buffer": [],
+ "column_headers": result["sorted_column_headers"],
+ "is_in_overwrite_sync_mode": configured_stream.destination_sync_mode == DestinationSyncMode.overwrite,
+ "is_first_batch": True,
+ "update_metadata": result["update_metadata"],
+ }
+ return writers
+
+ def _merge_cumulio_and_airbyte_column_headers(self, configured_stream: ConfiguredAirbyteStream):
+ """Merge columns known by Airbyte and Cumul.io.
+ - If the dataset does not yet exist in Cumul.io (i.e. the first sync), the columns order will be based on "for el in dict" order.
+ - Upon next synchronizations, the dataset exists in Cumul.io. Its column order will be used to send data in the corresponding order.
+ - If a new column is added to the source table (i.e. this column doesn't exist yet in Cumul.io),
+ it will be added at the end of the dataset's columns upon next synchronization.
+ - If an existing column is removed from the source:
+ 1. If the next synchronization for this stream runs in "overwrite" mode (or a "replace" tag is set), the Cumul.io dataset will
+ no longer contain the original column.
+ 2. If the next synchronization for this stream runs in "append" mode, the Cumul.io dataset will
+ contain empty values for the non-existing columns for all appended rows.
+ Note that Airbyte recommends a reset upon changes to source schema(s). In that case, the first batch will be synced
+ using the "overwrite" mode (due to setting a reset tag on the dataset, see delete_stream_entries implementation).
+ """
+ cumulio_column_headers = self.client.get_ordered_columns(configured_stream.stream.name)
+ airbyte_column_headers = _convert_airbyte_configured_stream_into_headers_dict(configured_stream)
+
+ update_metadata = False
+
+ merged_column_headers = []
+ new_column_count = 0
+ for airbyte_column_header in airbyte_column_headers:
+ merged_column_header = {
+ "name": airbyte_column_header,
+ "airbyte-type": airbyte_column_headers[airbyte_column_header]["airbyte-type"],
+ }
+
+ try:
+ # Add an order based on the order of the column in the Cumul.io dataset
+ merged_column_header["order"] = cumulio_column_headers.index(airbyte_column_header)
+ except ValueError:
+ # Add an appropriate order to ensure the column appears at the end of the data
+ new_column_count += 1
+ merged_column_header["order"] = len(cumulio_column_headers) + new_column_count
+
+ merged_column_headers.append(merged_column_header)
+
+ sorted_column_headers = sorted(merged_column_headers, key=lambda x: x["order"])
+ if new_column_count > 0:
+ update_metadata = True
+
+ if len(cumulio_column_headers) > 0:
+ self.logger.info(
+ f"One or more columns defined in stream {configured_stream.stream.name} are not yet present in Cumul.io, "
+ f"and will added upon next successful synchronization."
+ )
+ else:
+ self.logger.info(
+ f"The dataset for stream {configured_stream.stream.name} doesn't seem to exist in Cumul.io. "
+ f"The next sync for this stream will create it."
+ )
+ elif not update_metadata:
+ # Validate whether all columns in Cumul.io are still part of the configured airbyte catalog definition.
+ for cumulio_column_header in cumulio_column_headers:
+ try:
+ # Try to find the Cumul.io column header in the Airbyte columns
+ airbyte_column_headers[cumulio_column_header]
+ except KeyError:
+ # Cumul.io's column hasn't been found, so we'll need to update the dataset's metadata upon next sync.
+ if configured_stream.destination_sync_mode == DestinationSyncMode.overwrite:
+ self.logger.info(
+ f"The source column {cumulio_column_header} in Cumul.io is no longer present in the configured "
+ f"stream {configured_stream.stream.name} (i.e. in the source). As the stream synchronization is "
+ f"in overwrite mode, the existing column in Cumul.io will be deleted upon next sync. Check "
+ f"carefully whether this column is used in any existing Cumul.io dashboards!"
+ )
+ update_metadata = True
+
+ return {
+ "sorted_column_headers": sorted_column_headers,
+ "update_metadata": update_metadata,
+ }
diff --git a/airbyte-integrations/connectors/destination-cumulio/icon.svg b/airbyte-integrations/connectors/destination-cumulio/icon.svg
new file mode 100644
index 0000000..85cf3ee
--- /dev/null
+++ b/airbyte-integrations/connectors/destination-cumulio/icon.svg
@@ -0,0 +1 @@
+
\ No newline at end of file
diff --git a/airbyte-integrations/connectors/destination-cumulio/integration_tests/configured_catalog.json b/airbyte-integrations/connectors/destination-cumulio/integration_tests/configured_catalog.json
new file mode 100644
index 0000000..844c37f
--- /dev/null
+++ b/airbyte-integrations/connectors/destination-cumulio/integration_tests/configured_catalog.json
@@ -0,0 +1,29 @@
+{
+ "streams": [
+ {
+ "stream": {
+ "name": "cumulio_example_table",
+ "supported_sync_modes": ["full_refresh"],
+ "source_defined_cursor": false,
+ "json_schema": {
+ "type": "object",
+ "properties": {
+ "hierarchy_column": {
+ "type": "string"
+ },
+ "numeric_column": {
+ "type": "number"
+ },
+ "datetime_column": {
+ "type": "string",
+ "format": "date-time",
+ "airbyte_type": "timestamp_with_timezone"
+ }
+ }
+ }
+ },
+ "sync_mode": "full_refresh",
+ "destination_sync_mode": "overwrite"
+ }
+ ]
+}
diff --git a/airbyte-integrations/connectors/destination-cumulio/integration_tests/integration_test.py b/airbyte-integrations/connectors/destination-cumulio/integration_tests/integration_test.py
new file mode 100644
index 0000000..545241d
--- /dev/null
+++ b/airbyte-integrations/connectors/destination-cumulio/integration_tests/integration_test.py
@@ -0,0 +1,276 @@
+#
+# Copyright (c) 2023 Airbyte, Inc., all rights reserved.
+#
+
+import json
+import time
+from logging import Logger, getLogger
+from typing import Any, Dict, Mapping
+
+import pytest
+from airbyte_cdk.models import (
+ AirbyteMessage,
+ AirbyteRecordMessage,
+ AirbyteStateMessage,
+ AirbyteStream,
+ ConfiguredAirbyteCatalog,
+ ConfiguredAirbyteStream,
+ DestinationSyncMode,
+ Status,
+ SyncMode,
+ Type,
+)
+from destination_cumulio import DestinationCumulio
+from destination_cumulio.client import CumulioClient
+
+
+@pytest.fixture(name="logger")
+def logger_fixture() -> Logger:
+ return getLogger("airbyte")
+
+
+@pytest.fixture(name="config")
+def config_fixture() -> Mapping[str, Any]:
+ with open("secrets/config.json", "r") as f:
+ return json.loads(f.read())
+
+
+@pytest.fixture(name="configured_catalog")
+def configured_catalog_fixture() -> ConfiguredAirbyteCatalog:
+ stream_schema = {
+ "type": "object",
+ "properties": {
+ "string_col": {"type": "str"},
+ "int_col": {"type": "integer"},
+ "obj_col": {"type": "object"},
+ "arr_col": {"type": "array"},
+ },
+ }
+
+ append_stream = ConfiguredAirbyteStream(
+ stream=AirbyteStream(
+ name="append_integration_test_stream",
+ json_schema=stream_schema,
+ supported_sync_modes=[SyncMode.incremental],
+ ),
+ sync_mode=SyncMode.incremental,
+ destination_sync_mode=DestinationSyncMode.append,
+ )
+
+ overwrite_stream = ConfiguredAirbyteStream(
+ stream=AirbyteStream(
+ name="overwrite_integration_test_stream",
+ json_schema=stream_schema,
+ supported_sync_modes=[SyncMode.incremental],
+ ),
+ sync_mode=SyncMode.incremental,
+ destination_sync_mode=DestinationSyncMode.overwrite,
+ )
+
+ return ConfiguredAirbyteCatalog(streams=[append_stream, overwrite_stream])
+
+
+@pytest.fixture(autouse=True)
+def delete_datasets(config: Mapping, configured_catalog: ConfiguredAirbyteCatalog, logger: Logger):
+ cumulio_client = CumulioClient(config, logger)
+ for stream in configured_catalog.streams:
+ dataset = cumulio_client.get_dataset_and_columns_from_stream_name(stream.stream.name)
+ if dataset:
+ logger.info(
+ f"Existing integration test dataset found. Will delete Cumul.io dataset for integration test stream {stream.stream.name}."
+ )
+ try:
+ cumulio_client.client.delete("securable", dataset["id"])
+ except Exception as e:
+ logger.info(
+ f"The following exception occurred when trying to delete the dataset "
+ f"for integration test stream {stream.stream.name}: {e}"
+ )
+
+
+def test_check_valid_config(config: Mapping, logger: Logger):
+ outcome = DestinationCumulio().check(logger, config)
+ assert outcome.status == Status.SUCCEEDED
+
+
+def test_check_incomplete_config(logger: Logger):
+ outcome = DestinationCumulio().check(logger, {"api_host": "https://api.cumul.io"})
+ assert outcome.status == Status.FAILED
+
+
+def test_check_invalid_config(logger: Logger):
+ outcome = DestinationCumulio().check(
+ logger,
+ {
+ "api_host": ".invalid.url",
+ "api_key": "invalid_key",
+ "api_token": "invalid_token",
+ },
+ )
+ assert outcome.status == Status.FAILED
+
+
+def _state(data: Dict[str, Any]) -> AirbyteMessage:
+ return AirbyteMessage(type=Type.STATE, state=AirbyteStateMessage(data=data))
+
+
+def _record(stream_name: str, str_value: str, int_value: int, obj_value: dict, arr_value: list) -> AirbyteMessage:
+ return AirbyteMessage(
+ type=Type.RECORD,
+ record=AirbyteRecordMessage(
+ stream=stream_name,
+ data={
+ "string_col": str_value,
+ "int_col": int_value,
+ "obj_col": obj_value,
+ "arr_col": arr_value,
+ },
+ emitted_at=0,
+ ),
+ )
+
+
+def _retrieve_all_records(cumulio_client, stream_name):
+ dataset_and_columns = cumulio_client.get_dataset_and_columns_from_stream_name(stream_name)
+ # Wait 5 seconds before trying to retrieve the data to ensure it can be properly retrieved
+ time.sleep(5)
+ if dataset_and_columns is not None:
+ ordered_columns = cumulio_client.get_ordered_columns(stream_name)
+ dimension_columns = list(
+ map(
+ lambda x, y: {
+ "dataset_id": dataset_and_columns["id"],
+ "column_id": y["id"],
+ },
+ ordered_columns,
+ dataset_and_columns["columns"],
+ )
+ )
+ int_col_ind = ordered_columns.index("int_col")
+
+ raw_data_query = {
+ "dimensions": dimension_columns,
+ "options": {"rollup_data": False},
+ "order": [
+ {
+ "dataset_id": dataset_and_columns["id"],
+ "column_id": dataset_and_columns["columns"][int_col_ind]["id"],
+ "order": "asc",
+ }
+ ],
+ }
+ raw_data = cumulio_client.client.get("data", raw_data_query)
+ airbyte_data_to_return = []
+ for row in raw_data["data"]:
+ airbyte_data_row = {}
+ for col_ind, column in enumerate(dataset_and_columns["columns"]):
+ if isinstance(row[col_ind], dict):
+ airbyte_data_row[column["source_name"]] = row[col_ind]["id"]
+ else:
+ airbyte_data_row[column["source_name"]] = row[col_ind]
+ airbyte_data_to_return.append(
+ AirbyteMessage(
+ type=Type.RECORD,
+ record=AirbyteRecordMessage(stream=stream_name, data=airbyte_data_row, emitted_at=0),
+ )
+ )
+ return airbyte_data_to_return
+ return None
+
+
+def test_write_append(
+ config: Mapping,
+ configured_catalog: ConfiguredAirbyteCatalog,
+ logger: Logger,
+):
+ """
+ This test verifies that:
+ - Writing a stream in "append" mode appends new records while preserving existing data.
+ - The correct state message is output by the connector at the end of the sync.
+ - Object and Array data is appropriately stringified in Cumul.io.
+ """
+ stream_name = configured_catalog.streams[0].stream.name
+ destination = DestinationCumulio()
+
+ state_message = _state({"state": "3"})
+ record_chunk_1 = [_record(stream_name, "test-" + str(i), i, {"test": i}, ["test", i]) for i in range(1, 3)]
+
+ output_states_1 = list(destination.write(config, configured_catalog, [*record_chunk_1, state_message]))
+ assert [state_message] == output_states_1
+
+ record_chunk_2 = [_record(stream_name, "test-" + str(i), i, {"test": i}, ["test", i]) for i in range(3, 5)]
+
+ output_states_2 = list(destination.write(config, configured_catalog, [*record_chunk_2, state_message]))
+ assert [state_message] == output_states_2
+
+ cumulio_client = CumulioClient(config, logger)
+
+ records_in_destination = _retrieve_all_records(cumulio_client, stream_name)
+
+ expected_records = [
+ AirbyteMessage(
+ type=Type.RECORD,
+ record=AirbyteRecordMessage(
+ stream=stream_name,
+ data={
+ "string_col": "test-" + str(i),
+ "int_col": i,
+ "obj_col": json.dumps({"test": i}),
+ "arr_col": json.dumps(["test", i]),
+ },
+ emitted_at=0,
+ ),
+ )
+ for i in range(1, 5)
+ ]
+
+ assert expected_records == records_in_destination
+
+
+def test_write_overwrite(
+ config: Mapping[str, Any],
+ configured_catalog: ConfiguredAirbyteCatalog,
+ logger: Logger,
+):
+ """
+ This test verifies that:
+ - writing a stream in "append" mode overwrite all exiting data.
+ - the correct state message is output by the connector at the end of the sync.
+ - Object and Array data is appropriately stringified in Cumul.io.
+ """
+ stream_name = configured_catalog.streams[1].stream.name
+ destination = DestinationCumulio()
+
+ state_message = _state({"state": "3"})
+ record_chunk_1 = [_record(stream_name, "oldtest-" + str(i), i, {"oldtest": i}, ["oldtest", i]) for i in range(1, 3)]
+
+ output_states_1 = list(destination.write(config, configured_catalog, [*record_chunk_1, state_message]))
+ assert [state_message] == output_states_1
+
+ record_chunk_2 = [_record(stream_name, "newtest-" + str(i), i, {"newtest": i}, ["newtest", i]) for i in range(1, 3)]
+
+ output_states_2 = list(destination.write(config, configured_catalog, [*record_chunk_2, state_message]))
+ assert [state_message] == output_states_2
+
+ cumulio_client = CumulioClient(config, logger)
+
+ records_in_destination = _retrieve_all_records(cumulio_client, stream_name)
+
+ expected_records = [
+ AirbyteMessage(
+ type=Type.RECORD,
+ record=AirbyteRecordMessage(
+ stream=stream_name,
+ data={
+ "string_col": "newtest-" + str(i),
+ "int_col": i,
+ "obj_col": json.dumps({"newtest": i}),
+ "arr_col": json.dumps(["newtest", i]),
+ },
+ emitted_at=0,
+ ),
+ )
+ for i in range(1, 3)
+ ]
+
+ assert expected_records == records_in_destination
diff --git a/airbyte-integrations/connectors/destination-cumulio/integration_tests/sample_config.json b/airbyte-integrations/connectors/destination-cumulio/integration_tests/sample_config.json
new file mode 100644
index 0000000..2a1ca74
--- /dev/null
+++ b/airbyte-integrations/connectors/destination-cumulio/integration_tests/sample_config.json
@@ -0,0 +1,5 @@
+{
+ "api_host": "https://api.cumul.io",
+ "api_key": "CUMULIO_API_KEY",
+ "api_token": "CUMULIO_API_TOKEN"
+}
diff --git a/airbyte-integrations/connectors/destination-cumulio/main.py b/airbyte-integrations/connectors/destination-cumulio/main.py
new file mode 100644
index 0000000..3ad0d71
--- /dev/null
+++ b/airbyte-integrations/connectors/destination-cumulio/main.py
@@ -0,0 +1,11 @@
+#
+# Copyright (c) 2023 Airbyte, Inc., all rights reserved.
+#
+
+
+import sys
+
+from destination_cumulio import DestinationCumulio
+
+if __name__ == "__main__":
+ DestinationCumulio().run(sys.argv[1:])
diff --git a/airbyte-integrations/connectors/destination-cumulio/metadata.yaml b/airbyte-integrations/connectors/destination-cumulio/metadata.yaml
new file mode 100644
index 0000000..bef0bae
--- /dev/null
+++ b/airbyte-integrations/connectors/destination-cumulio/metadata.yaml
@@ -0,0 +1,24 @@
+data:
+ connectorType: destination
+ definitionId: e088acb6-9780-4568-880c-54c2dd7f431b
+ dockerImageTag: 0.1.0
+ dockerRepository: airbyte/destination-cumulio
+ githubIssueLabel: destination-cumulio
+ connectorSubtype: api
+ icon: cumulio.svg
+ license: MIT
+ name: Cumul.io
+ registries:
+ cloud:
+ enabled: false
+ oss:
+ enabled: false
+ releaseStage: alpha
+ documentationUrl: https://docs.airbyte.com/integrations/destinations/cumulio
+ tags:
+ - language:python
+ ab_internal:
+ sl: 100
+ ql: 100
+ supportLevel: archived
+metadataSpecVersion: "1.0"
diff --git a/airbyte-integrations/connectors/destination-cumulio/requirements.txt b/airbyte-integrations/connectors/destination-cumulio/requirements.txt
new file mode 100644
index 0000000..d6e1198
--- /dev/null
+++ b/airbyte-integrations/connectors/destination-cumulio/requirements.txt
@@ -0,0 +1 @@
+-e .
diff --git a/airbyte-integrations/connectors/destination-cumulio/setup.py b/airbyte-integrations/connectors/destination-cumulio/setup.py
new file mode 100644
index 0000000..e613da7
--- /dev/null
+++ b/airbyte-integrations/connectors/destination-cumulio/setup.py
@@ -0,0 +1,23 @@
+#
+# Copyright (c) 2023 Airbyte, Inc., all rights reserved.
+#
+
+
+from setuptools import find_packages, setup
+
+MAIN_REQUIREMENTS = ["airbyte-cdk", "cumulio"]
+
+TEST_REQUIREMENTS = ["pytest~=6.2"]
+
+setup(
+ name="destination_cumulio",
+ description="Airbyte destination connector implementation for Cumul.io.",
+ author="Cumul.io",
+ author_email="support@cumul.io",
+ packages=find_packages(),
+ install_requires=MAIN_REQUIREMENTS,
+ package_data={"": ["*.json"]},
+ extras_require={
+ "tests": TEST_REQUIREMENTS,
+ },
+)
diff --git a/airbyte-integrations/connectors/destination-cumulio/unit_tests/test_client.py b/airbyte-integrations/connectors/destination-cumulio/unit_tests/test_client.py
new file mode 100644
index 0000000..258e8ff
--- /dev/null
+++ b/airbyte-integrations/connectors/destination-cumulio/unit_tests/test_client.py
@@ -0,0 +1,629 @@
+#
+# Copyright (c) 2023 Airbyte, Inc., all rights reserved.
+#
+
+
+from typing import Any, Mapping
+from unittest.mock import ANY, MagicMock, patch
+
+import pytest
+from destination_cumulio.client import CumulioClient
+
+# "# type: ignore" was added in several places to avoid mypy complaining about patching functions with MagicMock
+
+
+@pytest.fixture(name="logger")
+def logger_fixture() -> MagicMock:
+ return MagicMock()
+
+
+@pytest.fixture(name="cumulio_client")
+def cumulio_client_fixture(logger: MagicMock) -> CumulioClient:
+ # Create a mock configuration dictionary
+ config = {
+ "api_key": "123456",
+ "api_token": "abcdef",
+ "api_host": "https://api.cumul.io",
+ }
+ # Initialize a CumulioClient object with the mock configuration for the Cumulio class
+ with patch("destination_cumulio.client.Cumulio", MagicMock()):
+ return CumulioClient(config, logger)
+
+
+@pytest.fixture(name="dummy_data")
+def dummy_data_fixture() -> Mapping[str, Any]:
+ return {
+ "data": [
+ [
+ "Text value 1",
+ 1,
+ "2022-01-01T00:00:00.000Z",
+ ],
+ ["Text value 2", 2, "2022-02-01T00:00:00.000Z"],
+ ["Text value 3", 3, "2022-03-01T00:00:00.000Z"],
+ ],
+ "columns": ["Text column", "Numeric column", "Datetime column"],
+ }
+
+
+# tests for batch_write method
+
+
+def test_batch_write_append_empty_write_buffer(cumulio_client: CumulioClient):
+ cumulio_client._get_dataset_id_from_stream_name = MagicMock(return_value="dataset_id") # type: ignore
+ cumulio_client._push_batch_to_new_dataset = MagicMock() # type: ignore
+ cumulio_client._push_batch_to_existing_dataset = MagicMock() # type: ignore
+
+ cumulio_client.batch_write(
+ stream_name="test-stream",
+ write_buffer=[],
+ column_headers=["test-column"],
+ is_in_overwrite_sync_mode=False,
+ is_first_batch=True,
+ update_metadata=True,
+ )
+
+ cumulio_client._get_dataset_id_from_stream_name.assert_not_called()
+ cumulio_client._push_batch_to_new_dataset.assert_not_called()
+ cumulio_client._push_batch_to_existing_dataset.assert_not_called()
+
+ cumulio_client.batch_write(
+ stream_name="test-stream",
+ write_buffer=[[]],
+ column_headers=["test-column"],
+ is_in_overwrite_sync_mode=False,
+ is_first_batch=True,
+ update_metadata=True,
+ )
+
+ cumulio_client._get_dataset_id_from_stream_name.assert_not_called()
+ cumulio_client._push_batch_to_new_dataset.assert_not_called()
+ cumulio_client._push_batch_to_existing_dataset.assert_not_called()
+
+
+def test_batch_write_append_no_existing_dataset(cumulio_client: CumulioClient, dummy_data: Mapping[str, Any]):
+ cumulio_client.client.get = MagicMock(return_value={"count": 0, "Rows": []})
+ cumulio_client._push_batch_to_new_dataset = MagicMock() # type: ignore
+ cumulio_client._push_batch_to_existing_dataset = MagicMock() # type: ignore
+
+ stream_name = "test-stream"
+
+ cumulio_client.batch_write(
+ stream_name=stream_name,
+ write_buffer=dummy_data["data"],
+ column_headers=dummy_data["columns"],
+ is_in_overwrite_sync_mode=False,
+ is_first_batch=True,
+ update_metadata=True,
+ )
+
+ expected_properties = {
+ "where": {"type": "dataset"},
+ "attributes": ["id", "name"],
+ "include": [
+ {
+ "model": "Tag",
+ "where": {"tag": cumulio_client.TAG_PREFIX + stream_name},
+ "attributes": ["id", "tag"],
+ "jointype": "inner",
+ }
+ ],
+ }
+
+ cumulio_client.client.get.assert_called_once_with("securable", expected_properties)
+
+ cumulio_client._push_batch_to_existing_dataset.assert_not_called()
+
+ cumulio_client._push_batch_to_new_dataset.assert_called_once_with(stream_name, dummy_data["data"], dummy_data["columns"])
+
+
+def test_batch_write_existing_dataset_no_first_batch_replace(cumulio_client: CumulioClient, dummy_data: Mapping[str, Any]):
+ cumulio_client._get_dataset_id_from_stream_name = MagicMock(return_value="dataset_id") # type: ignore
+ cumulio_client._push_batch_to_new_dataset = MagicMock() # type: ignore
+ cumulio_client._push_batch_to_existing_dataset = MagicMock() # type: ignore
+ cumulio_client._dataset_contains_replace_tag = MagicMock(return_value=False) # type: ignore
+
+ stream_name = "test-stream"
+
+ cumulio_client.batch_write(
+ stream_name=stream_name,
+ write_buffer=dummy_data["data"],
+ column_headers=dummy_data["columns"],
+ is_in_overwrite_sync_mode=False,
+ is_first_batch=True,
+ update_metadata=True,
+ )
+ cumulio_client._push_batch_to_new_dataset.assert_not_called()
+ cumulio_client._dataset_contains_replace_tag.assert_called_once_with("dataset_id")
+ cumulio_client._push_batch_to_existing_dataset.assert_called_once_with(
+ "dataset_id", dummy_data["data"], dummy_data["columns"], False, True
+ )
+
+
+def test_batch_write_existing_dataset_first_batch_replace_overwrite_mode(cumulio_client: CumulioClient, dummy_data: Mapping[str, Any]):
+ cumulio_client._get_dataset_id_from_stream_name = MagicMock(return_value="dataset_id") # type: ignore
+ cumulio_client._push_batch_to_new_dataset = MagicMock() # type: ignore
+ cumulio_client._push_batch_to_existing_dataset = MagicMock() # type: ignore
+ cumulio_client._dataset_contains_replace_tag = MagicMock(return_value=False) # type: ignore
+
+ stream_name = "test-stream"
+
+ cumulio_client.batch_write(
+ stream_name=stream_name,
+ write_buffer=dummy_data["data"],
+ column_headers=dummy_data["columns"],
+ is_in_overwrite_sync_mode=True,
+ is_first_batch=True,
+ update_metadata=True,
+ )
+ cumulio_client._push_batch_to_new_dataset.assert_not_called()
+ cumulio_client._dataset_contains_replace_tag.assert_called_once_with("dataset_id")
+ cumulio_client._push_batch_to_existing_dataset.assert_called_once_with(
+ "dataset_id", dummy_data["data"], dummy_data["columns"], True, True
+ )
+
+
+def test_batch_write_existing_dataset_first_batch_replace_tag(cumulio_client: CumulioClient, dummy_data: Mapping[str, Any]):
+ cumulio_client._get_dataset_id_from_stream_name = MagicMock(return_value="dataset_id") # type: ignore
+ cumulio_client._push_batch_to_new_dataset = MagicMock() # type: ignore
+ cumulio_client._push_batch_to_existing_dataset = MagicMock() # type: ignore
+ cumulio_client._dataset_contains_replace_tag = MagicMock(return_value=True) # type: ignore
+
+ stream_name = "test-stream"
+
+ cumulio_client.batch_write(
+ stream_name=stream_name,
+ write_buffer=dummy_data["data"],
+ column_headers=dummy_data["columns"],
+ is_in_overwrite_sync_mode=False,
+ is_first_batch=True,
+ update_metadata=True,
+ )
+ cumulio_client._push_batch_to_new_dataset.assert_not_called()
+ cumulio_client._dataset_contains_replace_tag.assert_called_once_with("dataset_id")
+ cumulio_client._push_batch_to_existing_dataset.assert_called_once_with(
+ "dataset_id", dummy_data["data"], dummy_data["columns"], True, True
+ )
+
+
+def test_batch_write_existing_dataset_non_first_batch(cumulio_client: CumulioClient, dummy_data: Mapping[str, Any]):
+ cumulio_client._get_dataset_id_from_stream_name = MagicMock(return_value="dataset_id") # type: ignore
+ cumulio_client._push_batch_to_new_dataset = MagicMock() # type: ignore
+ cumulio_client._push_batch_to_existing_dataset = MagicMock() # type: ignore
+ cumulio_client._dataset_contains_replace_tag = MagicMock(return_value=True) # type: ignore
+
+ stream_name = "test-stream"
+
+ cumulio_client.batch_write(
+ stream_name=stream_name,
+ write_buffer=dummy_data["data"],
+ column_headers=dummy_data["columns"],
+ is_in_overwrite_sync_mode=True,
+ is_first_batch=False,
+ update_metadata=True,
+ )
+ cumulio_client._push_batch_to_new_dataset.assert_not_called()
+ cumulio_client._dataset_contains_replace_tag.assert_called_once_with("dataset_id")
+ cumulio_client._push_batch_to_existing_dataset.assert_called_once_with(
+ "dataset_id", dummy_data["data"], dummy_data["columns"], False, True
+ )
+
+
+# tests for test_api_token method
+
+
+def test_api_token_unknown_combination(cumulio_client: CumulioClient):
+ """ "Test that the test_api_token method correctly throws an error upon an invalid combination"""
+ cumulio_client.client.get = MagicMock(return_value={"count": 0})
+ with pytest.raises(Exception):
+ cumulio_client.test_api_token()
+
+
+def test_api_token_api_call(cumulio_client: CumulioClient):
+ """ "Test that the test_api_token method makes an API request to the authorization endpoint"""
+ cumulio_client.client.get = MagicMock(return_value={"count": 1})
+ cumulio_client.test_api_token()
+ cumulio_client.client.get.assert_called_with("authorization", {"where": {"type": "api"}})
+
+
+def test_test_data_push_method(cumulio_client: CumulioClient, dummy_data: Mapping[str, Any]):
+ """ "Test that the test_data_push method deletes the dataset afterwards"""
+ cumulio_client.batch_write = MagicMock() # type: ignore
+ cumulio_client.delete_dataset = MagicMock() # type: ignore
+
+ stream_name = "test-stream"
+
+ cumulio_client.test_data_push(stream_name, dummy_data["data"], dummy_data["columns"])
+
+ cumulio_client.delete_dataset.assert_called_once_with("test-stream")
+
+
+# tests for delete_dataset method
+
+
+def test_delete_dataset_no_dataset_found(cumulio_client: CumulioClient):
+ cumulio_client.client.delete = MagicMock()
+ cumulio_client._get_dataset_id_from_stream_name = MagicMock(return_value=None) # type: ignore
+
+ cumulio_client.delete_dataset("stream_name")
+
+ # assert that the _get_dataset_id_from_stream_name method was called once with the correct arguments
+ cumulio_client._get_dataset_id_from_stream_name.assert_called_once_with("stream_name")
+
+ # assert that the client.delete method is not called as no dataset was found
+ cumulio_client.client.delete.assert_not_called()
+
+
+def test_delete_dataset_dataset_found(cumulio_client: CumulioClient):
+ cumulio_client.client.delete = MagicMock()
+ cumulio_client._get_dataset_id_from_stream_name = MagicMock( # type: ignore
+ return_value="dataset_id"
+ ) # type: ignore
+
+ cumulio_client.delete_dataset("stream_name")
+
+ # assert that the _get_dataset_id_from_stream_name method was called once with the correct arguments
+ cumulio_client._get_dataset_id_from_stream_name.assert_called_once_with("stream_name")
+
+ # assert that the client.delete method was called once with the correct arguments
+ cumulio_client.client.delete.assert_called_once_with("securable", "dataset_id")
+
+
+# tests for get_ordered_columns method
+
+
+def test_get_ordered_columns_dataset_not_created(cumulio_client: CumulioClient):
+ cumulio_client.get_dataset_and_columns_from_stream_name = MagicMock(return_value=None) # type: ignore
+ result = cumulio_client.get_ordered_columns("stream_name")
+ assert result == []
+
+
+def test_get_ordered_columns_same_order(cumulio_client: CumulioClient):
+ cumulio_dataset_and_columns = {
+ "id": "dataset_id",
+ "columns": [
+ {"source_name": "column1", "order": 2},
+ {"source_name": "column2", "order": 1},
+ ],
+ }
+ cumulio_client.get_dataset_and_columns_from_stream_name = MagicMock(return_value=cumulio_dataset_and_columns) # type: ignore
+ result = cumulio_client.get_ordered_columns("stream_name")
+ assert result == ["column2", "column1"]
+
+
+# tests for _push_batch_to_new_dataset method
+
+
+def test_push_batch_to_new_dataset(cumulio_client: CumulioClient, dummy_data: Mapping[str, Any]):
+ cumulio_client.client.create = MagicMock(return_value={"rows": [{"id": "new_dataset_id"}]})
+ cumulio_client._associate_tag_dataset_id = MagicMock() # type: ignore
+
+ stream_name = "test_stream"
+
+ expected_request_properties = {
+ "type": "create",
+ "data": dummy_data["data"],
+ "options": {
+ "header": dummy_data["columns"],
+ "update_metadata": True,
+ "name": {"en": cumulio_client.INITIAL_DATASET_NAME_PREFIX + stream_name},
+ },
+ }
+ cumulio_client._push_batch_to_new_dataset(stream_name, dummy_data["data"], dummy_data["columns"])
+ cumulio_client.client.create.assert_called_once_with("data", expected_request_properties)
+ cumulio_client._associate_tag_dataset_id.assert_called_once_with(stream_name, "new_dataset_id")
+
+
+def test_push_batch_to_new_dataset_all_retries_error(cumulio_client: CumulioClient, dummy_data: Mapping[str, Any]):
+ cumulio_client.client.create = MagicMock(side_effect=RuntimeError("Internal Server Error"))
+ stream_name = "test_stream"
+
+ with patch("destination_cumulio.client.time", MagicMock()):
+ with pytest.raises(Exception):
+ cumulio_client._push_batch_to_new_dataset(stream_name, dummy_data["data"], dummy_data["columns"])
+
+
+def test_push_batch_to_new_dataset_first_try_fails(cumulio_client: CumulioClient, dummy_data: Mapping[str, Any]):
+ effects = iter([RuntimeError("Internal Server Error")])
+
+ def side_effect(*_):
+ try:
+ raise next(effects)
+ except StopIteration:
+ return {"rows": [{"id": "new_dataset_id"}]}
+
+ cumulio_client.client.create = MagicMock(side_effect=side_effect)
+ cumulio_client._associate_tag_dataset_id = MagicMock() # type: ignore
+
+ stream_name = "test_stream"
+
+ expected_request_properties = {
+ "type": "create",
+ "data": dummy_data["data"],
+ "options": {
+ "header": dummy_data["columns"],
+ "update_metadata": True,
+ "name": {"en": cumulio_client.INITIAL_DATASET_NAME_PREFIX + stream_name},
+ },
+ }
+
+ with patch("destination_cumulio.client.time", MagicMock()):
+ cumulio_client._push_batch_to_new_dataset(stream_name, dummy_data["data"], dummy_data["columns"])
+ cumulio_client.client.create.assert_called_with("data", expected_request_properties)
+
+ assert cumulio_client.client.create.call_count == 2
+
+ cumulio_client._associate_tag_dataset_id.assert_called_once_with(stream_name, "new_dataset_id")
+
+
+# tests for _push_batch_to_existing_dataset method
+
+
+def test_push_batch_to_existing_dataset_all_retries_error(cumulio_client: CumulioClient, dummy_data: Mapping[str, Any]):
+ cumulio_client.client.create = MagicMock(side_effect=RuntimeError("Internal Server Error"))
+ cumulio_client._remove_replace_tag_dataset_id_association = MagicMock() # type: ignore
+
+ dataset_id = "dataset_id"
+
+ with patch("destination_cumulio.client.time", MagicMock()):
+ with pytest.raises(Exception):
+ cumulio_client._push_batch_to_existing_dataset(dataset_id, dummy_data["data"], dummy_data["columns"], False, True)
+
+
+def test_push_batch_to_existing_dataset_first_try_fails(cumulio_client: CumulioClient, dummy_data: Mapping[str, Any]):
+ effects = iter([RuntimeError("Internal Server Error")])
+
+ def side_effect(*_):
+ try:
+ raise next(effects)
+ except StopIteration:
+ return None
+
+ cumulio_client.client.create = MagicMock(side_effect=side_effect)
+ cumulio_client._remove_replace_tag_dataset_id_association = MagicMock() # type: ignore
+
+ dataset_id = "dataset_id"
+
+ expected_request_properties = {
+ "type": "append",
+ "data": dummy_data["data"],
+ "securable_id": dataset_id,
+ "options": {
+ "header": dummy_data["columns"],
+ "update_metadata": True,
+ },
+ }
+
+ with patch("destination_cumulio.client.time", MagicMock()):
+ cumulio_client._push_batch_to_existing_dataset(dataset_id, dummy_data["data"], dummy_data["columns"], False, True)
+ cumulio_client.client.create.assert_called_with("data", expected_request_properties)
+
+ assert cumulio_client.client.create.call_count == 2
+
+ cumulio_client._remove_replace_tag_dataset_id_association.assert_not_called()
+
+
+def test_push_batch_to_existing_dataset_no_first_batch_replace(cumulio_client: CumulioClient, dummy_data: Mapping[str, Any]):
+ cumulio_client.client.create = MagicMock()
+ cumulio_client._remove_replace_tag_dataset_id_association = MagicMock() # type: ignore
+
+ dataset_id = "dataset_id"
+
+ expected_request_properties = {
+ "type": "append",
+ "data": dummy_data["data"],
+ "securable_id": dataset_id,
+ "options": {
+ "header": dummy_data["columns"],
+ "update_metadata": True,
+ },
+ }
+
+ cumulio_client._push_batch_to_existing_dataset(dataset_id, dummy_data["data"], dummy_data["columns"], False, True)
+ cumulio_client.client.create.assert_called_once_with("data", expected_request_properties)
+ cumulio_client._remove_replace_tag_dataset_id_association.assert_not_called()
+
+
+def test_push_batch_to_existing_dataset_first_batch_replace(cumulio_client: CumulioClient, dummy_data: Mapping[str, Any]):
+ cumulio_client.client.create = MagicMock()
+ cumulio_client._remove_replace_tag_dataset_id_association = MagicMock() # type: ignore
+
+ dataset_id = "dataset_id"
+
+ expected_request_properties = {
+ "type": "replace",
+ "data": dummy_data["data"],
+ "securable_id": dataset_id,
+ "options": {
+ "header": dummy_data["columns"],
+ "update_metadata": True,
+ },
+ }
+
+ cumulio_client._push_batch_to_existing_dataset(dataset_id, dummy_data["data"], dummy_data["columns"], True, True)
+ cumulio_client.client.create.assert_called_once_with("data", expected_request_properties)
+ cumulio_client._remove_replace_tag_dataset_id_association.assert_called_once_with(dataset_id)
+
+
+# tests for _dataset_contains_replace_tag method
+
+
+def test_get_dataset_and_columns_from_stream_name_no_dataset(
+ cumulio_client: CumulioClient,
+):
+ cumulio_dataset_and_columns_result = {"count": 0, "rows": []}
+
+ # Test when no dataset is found
+ cumulio_client.client.get = MagicMock(return_value=cumulio_dataset_and_columns_result)
+ result = cumulio_client.get_dataset_and_columns_from_stream_name("test_stream")
+ assert result is None
+
+
+def test_get_dataset_and_columns_from_stream_name_single_existing_dataset(
+ cumulio_client: CumulioClient,
+):
+ cumulio_dataset_and_columns_result: Mapping[str, Any] = {
+ "count": 1,
+ "rows": [
+ {
+ "id": "dataset_id",
+ "columns": [
+ {"source_name": "column1", "order": 2},
+ {"source_name": "column2", "order": 1},
+ ],
+ }
+ ],
+ }
+ # Test when dataset is found
+ cumulio_client.client.get = MagicMock(return_value=cumulio_dataset_and_columns_result)
+ result = cumulio_client.get_dataset_and_columns_from_stream_name("test_stream")
+ assert result["id"] == cumulio_dataset_and_columns_result["rows"][0]["id"]
+ assert result["columns"] == cumulio_dataset_and_columns_result["rows"][0]["columns"]
+
+
+def test_get_dataset_and_columns_from_stream_name_multiple_existing_datasets(
+ cumulio_client: CumulioClient,
+):
+ """Tests whether an exception is thrown when multiple datasets are returned for a stream name"""
+ cumulio_dataset_and_columns_result = {
+ "count": 2,
+ "rows": [
+ {
+ "id": "dataset_id_1",
+ "columns": [
+ {"source_name": "column1", "order": 2},
+ {"source_name": "column2", "order": 1},
+ ],
+ },
+ {
+ "id": "dataset_id_2",
+ "columns": [
+ {"source_name": "column1", "order": 1},
+ {"source_name": "column2", "order": 2},
+ ],
+ },
+ ],
+ }
+ # Test when multiple datasets are found
+ cumulio_client.client.get = MagicMock(return_value=cumulio_dataset_and_columns_result)
+ with pytest.raises(Exception):
+ cumulio_client.get_dataset_and_columns_from_stream_name("test_stream")
+
+
+# tests for the set_replace_tag_on_dataset method
+
+
+def test_set_replace_tag_on_dataset_no_dataset_found(cumulio_client: CumulioClient):
+ cumulio_client._get_dataset_id_from_stream_name = MagicMock(return_value=None) # type: ignore
+ cumulio_client._associate_tag_dataset_id = MagicMock() # type: ignore
+
+ cumulio_client.set_replace_tag_on_dataset("stream_name")
+
+ cumulio_client._get_dataset_id_from_stream_name.assert_called_once_with("stream_name")
+ cumulio_client._associate_tag_dataset_id.assert_not_called()
+
+
+def test_set_replace_tag_on_dataset_existing_dataset(cumulio_client: CumulioClient):
+ cumulio_client._get_dataset_id_from_stream_name = MagicMock(return_value="dataset_id") # type: ignore
+ cumulio_client._associate_tag_dataset_id = MagicMock() # type: ignore
+
+ cumulio_client.set_replace_tag_on_dataset("stream_name")
+
+ cumulio_client._get_dataset_id_from_stream_name.assert_called_once_with("stream_name")
+ cumulio_client._associate_tag_dataset_id.assert_called_once_with(cumulio_client.REPLACE_TAG, "dataset_id")
+
+
+# tests for _dataset_contains_replace_tag method
+
+
+def test_dataset_contains_replace_tag(cumulio_client: CumulioClient):
+ dataset_id = "123"
+ cumulio_client.client.get = MagicMock(return_value={"count": 1})
+ assert cumulio_client._dataset_contains_replace_tag(dataset_id) is True
+
+
+def test_dataset_does_not_contain_replace_tag(cumulio_client: CumulioClient):
+ dataset_id = "123"
+ cumulio_client.client.get = MagicMock(return_value={"count": 0})
+ assert cumulio_client._dataset_contains_replace_tag(dataset_id) is False
+
+
+# tests for _get_dataset_id_from_stream_name method
+
+
+def test_get_dataset_id_from_stream_name_no_dataset(cumulio_client: CumulioClient):
+ cumulio_client.client.get.return_value = {"count": 0, "rows": []}
+ dataset_id = cumulio_client._get_dataset_id_from_stream_name("test_stream")
+ assert dataset_id is None
+
+
+def test_get_dataset_id_from_stream_name_single_dataset(cumulio_client: CumulioClient):
+ cumulio_client.client.get.return_value = {
+ "count": 1,
+ "rows": [{"id": "dataset_id", "name": "Test dataset"}],
+ }
+ dataset_id = cumulio_client._get_dataset_id_from_stream_name("test_stream")
+ assert dataset_id == "dataset_id"
+
+
+def test_get_dataset_id_from_stream_name_multiple_datasets(
+ cumulio_client: CumulioClient,
+):
+ """Tests whether an exception is thrown when multiple datasets are returned for a stream name"""
+ cumulio_client.client.get.return_value = {
+ "count": 2,
+ "rows": [
+ {"id": "dataset_id_1", "name": "Test dataset 1"},
+ {"id": "dataset_id_2", "name": "Test dataset 2"},
+ ],
+ }
+ with pytest.raises(Exception):
+ cumulio_client._get_dataset_id_from_stream_name("test_stream")
+
+
+# tests for _associate_tag_dataset_id method
+
+
+def test_associate_tag_dataset_id_no_tag_found(cumulio_client: CumulioClient):
+ cumulio_client._get_tag_id = MagicMock(return_value=None) # type: ignore
+ cumulio_client._create_and_associate_stream_name_tag_with_dataset_id = MagicMock() # type: ignore
+ cumulio_client._associate_tag_with_dataset_id = MagicMock() # type: ignore
+
+ cumulio_client._associate_tag_dataset_id("test_stream", "test_dataset_id")
+
+ cumulio_client._create_and_associate_stream_name_tag_with_dataset_id.assert_called_once_with("test_stream", "test_dataset_id")
+ cumulio_client._associate_tag_with_dataset_id.assert_not_called()
+
+
+def test_associate_tag_dataset_id_tag_found(cumulio_client: CumulioClient):
+ cumulio_client._get_tag_id = MagicMock(return_value="tag_id") # type: ignore
+ cumulio_client._create_and_associate_stream_name_tag_with_dataset_id = MagicMock() # type: ignore
+ cumulio_client._associate_tag_with_dataset_id = MagicMock() # type: ignore
+
+ cumulio_client._associate_tag_dataset_id("test_stream", "test_dataset_id")
+
+ cumulio_client._associate_tag_with_dataset_id.assert_called_once_with("tag_id", "test_dataset_id")
+ cumulio_client._create_and_associate_stream_name_tag_with_dataset_id.assert_not_called()
+
+
+# tests for _get_tag_id method
+
+
+def test_get_tag_id_no_tag_found(cumulio_client: CumulioClient):
+ tag_api_response = {"count": 0, "rows": []}
+ cumulio_client.client.get = MagicMock(return_value=tag_api_response)
+
+ result = cumulio_client._get_tag_id("test_stream")
+
+ cumulio_client.client.get.assert_called_once_with("tag", ANY)
+ assert result is None
+
+
+def test_get_tag_id_tag_found(cumulio_client: CumulioClient):
+ tag_api_response: Mapping[str, Any] = {"count": 1, "rows": [{"id": "test_tag_id"}]}
+ cumulio_client.client.get = MagicMock(return_value=tag_api_response)
+
+ result = cumulio_client._get_tag_id("test_stream")
+
+ cumulio_client.client.get.assert_called_once_with("tag", ANY)
+ assert result == tag_api_response["rows"][0]["id"]
diff --git a/airbyte-integrations/connectors/destination-cumulio/unit_tests/test_destination.py b/airbyte-integrations/connectors/destination-cumulio/unit_tests/test_destination.py
new file mode 100644
index 0000000..4805fb5
--- /dev/null
+++ b/airbyte-integrations/connectors/destination-cumulio/unit_tests/test_destination.py
@@ -0,0 +1,155 @@
+#
+# Copyright (c) 2023 Airbyte, Inc., all rights reserved.
+#
+
+
+from datetime import datetime
+from logging import Logger, getLogger
+from typing import Any, Mapping
+from unittest.mock import MagicMock, call, patch
+
+import pytest
+from airbyte_cdk.models import (
+ AirbyteMessage,
+ AirbyteRecordMessage,
+ AirbyteStateMessage,
+ AirbyteStream,
+ ConfiguredAirbyteCatalog,
+ ConfiguredAirbyteStream,
+ DestinationSyncMode,
+ SyncMode,
+ Type,
+)
+from destination_cumulio.destination import DestinationCumulio
+
+
+@pytest.fixture(name="logger")
+def logger_fixture() -> Logger:
+ return getLogger("airbyte")
+
+
+@pytest.fixture(name="config")
+def config_fixture() -> Mapping[str, Any]:
+ return {
+ "api_key": "123abc",
+ "api_token": "456def",
+ "api_host": "https://api.cumul.io",
+ }
+
+
+@pytest.fixture(name="configured_catalog")
+def configured_catalog_fixture() -> ConfiguredAirbyteCatalog:
+ stream_schema = {
+ "type": "object",
+ "properties": {
+ "string_column": {"type": "integer"},
+ "int_column": {"type": "integer"},
+ },
+ }
+
+ append_stream = ConfiguredAirbyteStream(
+ stream=AirbyteStream(
+ name="append_stream",
+ json_schema=stream_schema,
+ supported_sync_modes=[SyncMode.incremental],
+ ),
+ sync_mode=SyncMode.incremental,
+ destination_sync_mode=DestinationSyncMode.append,
+ )
+
+ overwrite_stream = ConfiguredAirbyteStream(
+ stream=AirbyteStream(
+ name="overwrite_stream",
+ json_schema=stream_schema,
+ supported_sync_modes=[SyncMode.incremental],
+ ),
+ sync_mode=SyncMode.incremental,
+ destination_sync_mode=DestinationSyncMode.overwrite,
+ )
+
+ return ConfiguredAirbyteCatalog(streams=[append_stream, overwrite_stream])
+
+
+@pytest.fixture(name="airbyte_message_1")
+def airbyte_message_1_fixture() -> AirbyteMessage:
+ return AirbyteMessage(
+ type=Type.RECORD,
+ record=AirbyteRecordMessage(
+ stream="append_stream",
+ data={"string_column": "value_1", "int_column": 1},
+ emitted_at=int(datetime.now().timestamp()) * 1000,
+ ),
+ )
+
+
+@pytest.fixture(name="airbyte_message_2")
+def airbyte_message_2_fixture() -> AirbyteMessage:
+ return AirbyteMessage(
+ type=Type.RECORD,
+ record=AirbyteRecordMessage(
+ stream="overwrite_stream",
+ data={"string_column": "value_2", "int_column": 2},
+ emitted_at=int(datetime.now().timestamp()) * 1000,
+ ),
+ )
+
+
+@pytest.fixture(name="airbyte_state_message")
+def airbyte_state_message_fixture() -> AirbyteMessage:
+ return AirbyteMessage(type=Type.STATE, state=AirbyteStateMessage(data={}))
+
+
+def test_check(config: Mapping[str, Any], logger: MagicMock):
+ with patch("destination_cumulio.destination.CumulioClient") as cumulio_client:
+ destination_cumulio = DestinationCumulio()
+ destination_cumulio.check(logger, config)
+ assert cumulio_client.mock_calls == [
+ call(config, logger),
+ call().test_api_token(),
+ ]
+
+
+def test_write_no_input_messages(
+ config: Mapping[str, Any],
+ configured_catalog: ConfiguredAirbyteCatalog,
+ airbyte_message_1: AirbyteMessage,
+ airbyte_message_2: AirbyteMessage,
+ airbyte_state_message: AirbyteMessage,
+ logger: MagicMock,
+):
+ with patch("destination_cumulio.destination.CumulioWriter") as cumulio_writer:
+ destination_cumulio = DestinationCumulio()
+
+ input_messages = [airbyte_state_message]
+ result = list(destination_cumulio.write(config, configured_catalog, input_messages))
+ assert result == [airbyte_state_message]
+
+ assert cumulio_writer.mock_calls == [
+ call(config, configured_catalog, logger),
+ call().delete_stream_entries("overwrite_stream"),
+ call().flush_all(), # The first flush_all is called before yielding the state message
+ call().flush_all(), # The second flush_all is called after going through all input messages
+ ]
+
+
+def test_write(
+ config: Mapping[str, Any],
+ configured_catalog: ConfiguredAirbyteCatalog,
+ airbyte_message_1: AirbyteMessage,
+ airbyte_message_2: AirbyteMessage,
+ airbyte_state_message: AirbyteMessage,
+ logger: MagicMock,
+):
+ with patch("destination_cumulio.destination.CumulioWriter") as cumulio_writer:
+ input_messages = [airbyte_message_1, airbyte_message_2, airbyte_state_message]
+ destination_cumulio = DestinationCumulio()
+ result = list(destination_cumulio.write(config, configured_catalog, input_messages))
+ assert result == [airbyte_state_message]
+ assert cumulio_writer.mock_calls == [
+ call(config, configured_catalog, logger),
+ call().delete_stream_entries("overwrite_stream"),
+ call().queue_write_operation("append_stream", {"string_column": "value_1", "int_column": 1}),
+ call().queue_write_operation("overwrite_stream", {"string_column": "value_2", "int_column": 2}),
+ call().flush_all(), # The first flush_all is called before yielding the state message
+ call().flush_all(), # The second flush_all is called after going through all input messages
+ ]
diff --git a/airbyte-integrations/connectors/destination-cumulio/unit_tests/test_writer.py b/airbyte-integrations/connectors/destination-cumulio/unit_tests/test_writer.py
new file mode 100644
index 0000000..ac921c7
--- /dev/null
+++ b/airbyte-integrations/connectors/destination-cumulio/unit_tests/test_writer.py
@@ -0,0 +1,512 @@
+#
+# Copyright (c) 2023 Airbyte, Inc., all rights reserved.
+#
+
+
+import unittest
+from typing import Any, Mapping
+from unittest.mock import MagicMock, patch
+
+import pytest
+from airbyte_cdk.models import AirbyteStream, ConfiguredAirbyteCatalog, ConfiguredAirbyteStream, DestinationSyncMode, SyncMode
+from destination_cumulio.writer import CumulioWriter
+
+
+@pytest.fixture(name="logger")
+def logger_fixture() -> MagicMock:
+ return MagicMock()
+
+
+@pytest.fixture(name="config")
+def config_fixture() -> Mapping[str, Any]:
+ return {
+ "api_key": "123abc",
+ "api_token": "456def",
+ "api_host": "https://api.cumul.io",
+ }
+
+
+@pytest.fixture(name="configured_catalog")
+def configured_catalog_fixture() -> ConfiguredAirbyteCatalog:
+ orders_stream_schema = {
+ "type": "object",
+ "properties": {
+ "order_id": {"type": "integer"},
+ "amount": {"type": "integer"},
+ "customer_id": {"type": "string"},
+ },
+ }
+ products_stream_schema = {
+ "type": "object",
+ "properties": {"product_id": {"type": "integer"}},
+ }
+
+ orders_append_stream = ConfiguredAirbyteStream(
+ stream=AirbyteStream(
+ name="orders",
+ json_schema=orders_stream_schema,
+ supported_sync_modes=[SyncMode.incremental],
+ ),
+ sync_mode=SyncMode.incremental,
+ destination_sync_mode=DestinationSyncMode.append,
+ )
+
+ products_overwrite_stream = ConfiguredAirbyteStream(
+ stream=AirbyteStream(
+ name="products",
+ json_schema=products_stream_schema,
+ supported_sync_modes=[SyncMode.incremental],
+ ),
+ sync_mode=SyncMode.incremental,
+ destination_sync_mode=DestinationSyncMode.overwrite,
+ )
+
+ return ConfiguredAirbyteCatalog(streams=[orders_append_stream, products_overwrite_stream])
+
+
+@pytest.fixture(name="writer")
+def writer_no_existing_cumulio_columns(
+ config: Mapping[str, Any],
+ configured_catalog: ConfiguredAirbyteCatalog,
+ logger: MagicMock,
+) -> CumulioWriter:
+ """Returns a CumulioWriter using MagicMock, and mocking the return_value of all used CumulioClient methods."""
+ with patch("destination_cumulio.writer.CumulioClient", MagicMock()) as cumulio_client_mock:
+ # Mock get_ordered_columns to return no existing Cumul.io columns (dataset hasn't been created yet --> first sync)
+ cumulio_client_mock.return_value.get_ordered_columns.return_value = []
+ # cumulio_client_mock.return_value.batch_write.return_value = None
+ # cumulio_client_mock.return_value.set_replace_tag_on_dataset.return_value = None
+ return CumulioWriter(config, configured_catalog, logger)
+
+
+def test_small_enough_data_point_limit(writer: CumulioWriter):
+ """Tests whether the FLUSH_INTERVAL variable is smaller than the maximum amount of data points Cumul.io supports."""
+ assert writer.FLUSH_INTERVAL <= 10000
+
+
+def test_init(writer: CumulioWriter):
+ """Tests whether CumulioWriter is correctly initialized for streams with no known Cumulio dataset (i.e. first sync for each stream)."""
+
+ # Assert each stream is correctly initializing writers
+ assert "orders" in writer.writers
+ assert "products" in writer.writers
+
+ # Assert each stream is correctly initializing empty write buffer
+ assert len(writer.writers["orders"]["write_buffer"]) == 0
+ assert len(writer.writers["products"]["write_buffer"]) == 0
+
+ # Assert each stream is correctly initializing is_in_overwrite_sync_mode
+ assert writer.writers["orders"]["is_in_overwrite_sync_mode"] is False
+ assert writer.writers["products"]["is_in_overwrite_sync_mode"] is True
+
+ # Assert each stream is correctly initializing is_first_batch to True
+ assert writer.writers["orders"]["is_first_batch"] is True
+ assert writer.writers["products"]["is_first_batch"] is True
+
+ # Assert each stream is correctly initializing update_metadata (due to no columns from Cumul.io in this writer, both are True)
+ assert writer.writers["orders"]["update_metadata"] is True
+ assert writer.writers["products"]["update_metadata"] is True
+
+
+def test_transform_data(writer: CumulioWriter):
+ case = unittest.TestCase()
+
+ data = {"order_id": 1, "amount": 100.0, "customer_id": "cust_1"}
+ transformed_data = writer.transform_data("orders", data)
+ case.assertCountEqual(transformed_data, ["cust_1", 1, 100.0])
+
+
+def test_transform_data_missing_data(writer: CumulioWriter):
+ case = unittest.TestCase()
+
+ missing_data = {"order_id": 1, "customer_id": "cust_1"}
+ transformed_data = writer.transform_data("orders", missing_data)
+ case.assertCountEqual(transformed_data, ["cust_1", 1, None])
+
+
+def test_transform_data_additional_data(writer: CumulioWriter):
+ case = unittest.TestCase()
+
+ additional_data = {
+ "order_id": 1,
+ "amount": 100.0,
+ "customer_id": "cust_1",
+ "custmer_name": "Customer 1",
+ }
+ transformed_data = writer.transform_data("orders", additional_data)
+ case.assertCountEqual(transformed_data, ["cust_1", 1, 100.0])
+
+
+def test_transform_data_bool_data(writer: CumulioWriter):
+ case = unittest.TestCase()
+
+ bool_data = {"order_id": 1, "amount": 100.0, "customer_id": True}
+ transformed_data = writer.transform_data("orders", bool_data)
+ case.assertCountEqual(transformed_data, ["true", 1, 100.0])
+
+
+def test_transform_data_dict_data(writer: CumulioWriter):
+ case = unittest.TestCase()
+
+ dict_data = {"order_id": 1, "amount": 100.0, "customer_id": {"key": "value"}}
+ transformed_data = writer.transform_data("orders", dict_data)
+ case.assertCountEqual(transformed_data, ['{"key": "value"}', 1, 100.0])
+
+
+def test_transform_data_arr_data(writer: CumulioWriter):
+ case = unittest.TestCase()
+
+ arr_data = {"order_id": 1, "amount": 100.0, "customer_id": ["test1", "test2"]}
+ transformed_data = writer.transform_data("orders", arr_data)
+ case.assertCountEqual(transformed_data, ['["test1", "test2"]', 1, 100.0])
+
+
+def test_queue_write_operation(writer: CumulioWriter):
+ # Set flush interval to max value to avoid flushing data
+ writer.FLUSH_INTERVAL = 10000
+
+ writer.client.batch_write = MagicMock() # type: ignore
+
+ case = unittest.TestCase()
+
+ order_data = {"order_id": 1, "amount": 100.0, "customer_id": "customer_1"}
+ writer.queue_write_operation("orders", order_data)
+
+ # Assert that write_buffer from the orders stream contains a single value
+ assert len(writer.writers["orders"]["write_buffer"]) == 1
+ case.assertCountEqual(writer.writers["orders"]["write_buffer"][0], ["customer_1", 1, 100.0])
+
+
+def test_queue_write_operation_two_streams(writer: CumulioWriter):
+ # Set flush interval to max value to avoid flushing data
+ writer.FLUSH_INTERVAL = 10000
+
+ writer.client.batch_write = MagicMock() # type: ignore
+
+ order_data = {"order_id": 1, "amount": 100.0, "customer_id": "customer_1"}
+ writer.queue_write_operation("orders", order_data)
+
+ # Assert that write_buffer from the orders stream contains a single value
+ assert len(writer.writers["orders"]["write_buffer"]) == 1
+
+ product_data = {"product_id": 1}
+ writer.queue_write_operation("products", product_data)
+
+ # Assert that the orders write_buffer isn't influenced by write operations from the products stream
+ assert len(writer.writers["orders"]["write_buffer"]) == 1
+
+ # Assert that write_buffer from the products stream contains a single value
+ assert len(writer.writers["products"]["write_buffer"]) == 1
+ assert writer.writers["products"]["write_buffer"] == [[1]]
+
+ product_data = {"product_id": 2}
+ writer.queue_write_operation("products", product_data)
+ # Assert that write_buffer from the orders stream contains two values
+ assert writer.writers["products"]["write_buffer"] == [[1], [2]]
+
+
+def test_queue_write_operation_non_existing_stream(writer: CumulioWriter):
+ # Set flush interval to max value to avoid flushing data
+ writer.FLUSH_INTERVAL = 10000
+
+ writer.client.batch_write = MagicMock() # type: ignore
+
+ with pytest.raises(Exception):
+ # Assert that an Exception is thrown upon trying to write to a non-existing stream
+ writer.queue_write_operation("non_existing_stream", {"column": "value"})
+
+
+def test_flush(writer: CumulioWriter):
+ writer.client.batch_write = MagicMock() # type: ignore
+
+ writer.writers["orders"]["write_buffer"] = [["customer_1", 1, 100.0]]
+ writer.flush("orders")
+ assert writer.writers["orders"]["write_buffer"] == []
+
+
+def test_queue_write_flush_operation(writer: CumulioWriter):
+ # Set flush interval to 2 to cause flush after second row has been added to buffer
+ writer.FLUSH_INTERVAL = 2
+
+ writer.client.batch_write = MagicMock() # type: ignore
+
+ product_data = {"product_id": 1}
+ writer.queue_write_operation("products", product_data)
+ assert writer.writers["products"]["write_buffer"] == [[1]]
+
+ product_data = {"product_id": 2}
+ writer.queue_write_operation("products", product_data)
+ assert writer.writers["products"]["write_buffer"] == []
+ assert writer.writers["products"]["is_first_batch"] is False
+
+ product_data = {"product_id": 3}
+ writer.queue_write_operation("products", product_data)
+ assert writer.writers["products"]["write_buffer"] == [[3]]
+
+
+def test_flush_all(writer: CumulioWriter):
+ writer.client.batch_write = MagicMock() # type: ignore
+
+ writer.writers["orders"]["write_buffer"] = [["cust_1", 1, 100.0]]
+ writer.writers["products"]["write_buffer"] = [["cust_1", 1, 100.0]]
+ writer.flush_all()
+ assert writer.writers["orders"]["write_buffer"] == []
+ assert writer.writers["products"]["write_buffer"] == []
+
+
+def test_delete_stream_entries(writer: CumulioWriter):
+ writer.client.set_replace_tag_on_dataset = MagicMock() # type: ignore
+ writer.delete_stream_entries("stream_name")
+ writer.client.set_replace_tag_on_dataset.assert_called_once_with("stream_name")
+
+
+def _get_cumulio_and_merged_columns(writer: CumulioWriter) -> Mapping[str, Any]:
+ if len(writer.writers) < 0:
+ raise Exception("No streams defined for writer")
+
+ result = {}
+
+ for stream_name in writer.writers:
+ cumulio_columns = writer.client.get_ordered_columns(stream_name)
+ merged_columns = writer.writers[stream_name]["column_headers"]
+ result[stream_name] = {
+ "cumulio_columns": cumulio_columns,
+ "merged_columns": merged_columns,
+ }
+ return result
+
+
+@pytest.fixture
+def writer_existing_cumulio_columns(
+ config: Mapping[str, Any],
+ configured_catalog: ConfiguredAirbyteCatalog,
+ logger: MagicMock,
+) -> CumulioWriter:
+ """This will return a CumulioWriter that mocks airbyte stream catalogs that contains the same columns as those existing in Cumul.io."""
+ existing_cumulio_columns = {}
+ for configured_stream in configured_catalog.streams:
+ existing_cumulio_columns[configured_stream.stream.name] = [
+ column_name for column_name in configured_stream.stream.json_schema["properties"]
+ ]
+
+ def get_existing_cumulio_columns(stream_name):
+ return existing_cumulio_columns[stream_name]
+
+ with patch("destination_cumulio.writer.CumulioClient", MagicMock()) as cumulio_client_mock:
+ # Mock get_ordered_columns to return existing_cumulio_columns
+ cumulio_client_mock.return_value.get_ordered_columns = MagicMock(side_effect=get_existing_cumulio_columns)
+ return CumulioWriter(config, configured_catalog, logger)
+
+
+def test_init_existing_cumulio_columns(writer_existing_cumulio_columns: CumulioWriter):
+ """Tests whether each stream is correctly initializing update_metadata.
+ Due to identical columns in Cumul.io for this writer, both are False.
+ """
+ assert writer_existing_cumulio_columns.writers["orders"]["update_metadata"] is False
+ assert writer_existing_cumulio_columns.writers["products"]["update_metadata"] is False
+
+
+def test_equal_cumulio_and_merged_columns(
+ writer_existing_cumulio_columns: CumulioWriter,
+):
+ result = _get_cumulio_and_merged_columns(writer_existing_cumulio_columns)
+
+ for stream_name in result:
+ for index, column in enumerate(result[stream_name]["merged_columns"]):
+ # Assert that merged_columns are in same order as columns defined on Cumul.io's side.
+ assert result[stream_name]["cumulio_columns"][index] == column["name"]
+
+
+def test_queue_write_operation_with_correct_data_order(
+ writer_existing_cumulio_columns: CumulioWriter,
+):
+ writer_existing_cumulio_columns.client.batch_write = MagicMock() # type: ignore
+
+ result = _get_cumulio_and_merged_columns(writer_existing_cumulio_columns)
+ # Set flush interval to max value to avoid flushing data
+ writer_existing_cumulio_columns.FLUSH_INTERVAL = 10000
+
+ order_data = {"order_id": 1, "amount": 100.0, "customer_id": "cust_1"}
+ writer_existing_cumulio_columns.queue_write_operation("orders", order_data)
+ expected_data = []
+ for column in result["orders"]["merged_columns"]:
+ expected_data.append(order_data[column["name"]])
+ assert writer_existing_cumulio_columns.writers["orders"]["write_buffer"][0] == expected_data
+
+
+@pytest.fixture(name="configured_catalog_with_new_column")
+def configured_catalog_with_new_column_fixture() -> ConfiguredAirbyteCatalog:
+ """Creates a ConfiguredAirbyteCatalog that will be used to mock a new column."""
+ # The stream should have at least 2 schema properties (i.e. columns) defined.
+ orders_stream_schema = {
+ "type": "object",
+ "properties": {
+ "order_id": {"type": "integer"},
+ "amount": {"type": "integer"},
+ "customer_id": {"type": "string"},
+ "customer_name": {"type": "string"},
+ },
+ }
+
+ orders_append_stream = ConfiguredAirbyteStream(
+ stream=AirbyteStream(
+ name="orders_append",
+ json_schema=orders_stream_schema,
+ supported_sync_modes=[SyncMode.incremental],
+ ),
+ sync_mode=SyncMode.incremental,
+ destination_sync_mode=DestinationSyncMode.append,
+ )
+
+ orders_overwrite_stream = ConfiguredAirbyteStream(
+ stream=AirbyteStream(
+ name="orders_overwrite",
+ json_schema=orders_stream_schema,
+ supported_sync_modes=[SyncMode.incremental],
+ ),
+ sync_mode=SyncMode.incremental,
+ destination_sync_mode=DestinationSyncMode.overwrite,
+ )
+
+ return ConfiguredAirbyteCatalog(streams=[orders_append_stream, orders_overwrite_stream])
+
+
+@pytest.fixture
+def writer_new_airbyte_column(
+ config: Mapping[str, Any],
+ configured_catalog_with_new_column: ConfiguredAirbyteCatalog,
+ logger: MagicMock,
+) -> CumulioWriter:
+ """This will return a CumulioWriter that mocks airbyte stream catalogs that contains one column that does not exist in Cumul.io."""
+ existing_cumulio_columns = {}
+ for configured_stream in configured_catalog_with_new_column.streams:
+ columns = [column_name for column_name in configured_stream.stream.json_schema["properties"]]
+ # get rid of the second element to mimic a new column being defined in configured_stream
+ del columns[1]
+ existing_cumulio_columns[configured_stream.stream.name] = columns
+
+ def get_existing_cumulio_columns(stream_name):
+ return existing_cumulio_columns[stream_name]
+
+ with patch("destination_cumulio.writer.CumulioClient", MagicMock()) as cumulio_client_mock:
+ # Mock get_ordered_columns to return existing_cumulio_columns (which does not include one column defined in configured stream)
+ cumulio_client_mock.return_value.get_ordered_columns = MagicMock(side_effect=get_existing_cumulio_columns)
+ cumulio_client_mock.return_value.batch_writer.return_value = None
+ cumulio_client_mock.return_value.set_replace_tag_on_dataset.return_value = None
+ return CumulioWriter(config, configured_catalog_with_new_column, logger)
+
+
+def test_init_new_airbyte_column(writer_new_airbyte_column: CumulioWriter):
+ """Tests whether each stream is correctly initializing update_metadata (due to new Column in Airbyte for this writer, both are True)"""
+ assert writer_new_airbyte_column.writers["orders_append"]["update_metadata"] is True
+ assert writer_new_airbyte_column.writers["orders_overwrite"]["update_metadata"] is True
+
+
+def test_new_column_update_metadata(writer_new_airbyte_column: CumulioWriter):
+ """Tests whether Airbyte streams with at least one new column defined results in update_metadata,
+ to inform Cumul.io about new column data being pushed."""
+ for stream_name in writer_new_airbyte_column.writers:
+ assert writer_new_airbyte_column.writers[stream_name]["update_metadata"] is True
+
+
+def test_new_column_appended(writer_new_airbyte_column: CumulioWriter):
+ """Tests whether the Airbyte streams with one new column appends it at the end of the column list"""
+ result = _get_cumulio_and_merged_columns(writer_new_airbyte_column)
+ for stream_name in result:
+ assert len(result[stream_name]["merged_columns"]) == len(result[stream_name]["cumulio_columns"]) + 1
+ for index, column in enumerate(result[stream_name]["cumulio_columns"]):
+ # Assert that merged_columns are in same order as columns defined on Cumul.io's side.
+ assert result[stream_name]["merged_columns"][index]["name"] == column
+ with pytest.raises(Exception):
+ # Test whether last element of merged_columns is the column that is not defined on Cumul.io's end.
+ result[stream_name]["cumulio_columns"].index(result[stream_name]["merged_columns"][-1]["name"])
+
+
+@pytest.fixture(name="configured_catalog_with_deleted_column")
+def configured_catalog_with_deleted_column_fixture() -> ConfiguredAirbyteCatalog:
+ """Creates a ConfiguredAirbyteCatalog that will be used to mock a deleted column."""
+ orders_stream_schema = {
+ "type": "object",
+ "properties": {"order_id": {"type": "integer"}, "amount": {"type": "integer"}},
+ }
+
+ orders_append_stream = ConfiguredAirbyteStream(
+ stream=AirbyteStream(
+ name="orders_append",
+ json_schema=orders_stream_schema,
+ supported_sync_modes=[SyncMode.incremental],
+ ),
+ sync_mode=SyncMode.incremental,
+ destination_sync_mode=DestinationSyncMode.append,
+ )
+
+ orders_overwrite_stream = ConfiguredAirbyteStream(
+ stream=AirbyteStream(
+ name="orders_overwrite",
+ json_schema=orders_stream_schema,
+ supported_sync_modes=[SyncMode.incremental],
+ ),
+ sync_mode=SyncMode.incremental,
+ destination_sync_mode=DestinationSyncMode.overwrite,
+ )
+
+ return ConfiguredAirbyteCatalog(streams=[orders_append_stream, orders_overwrite_stream])
+
+
+@pytest.fixture
+def writer_deleted_airbyte_column(
+ config: Mapping[str, Any],
+ configured_catalog_with_deleted_column: ConfiguredAirbyteCatalog,
+ logger: MagicMock,
+) -> CumulioWriter:
+ """This will return a CumulioWriter that mocks airbyte stream catalogs that doesn't contain one column that does exist in Cumul.io."""
+ existing_cumulio_columns = {}
+ for configured_stream in configured_catalog_with_deleted_column.streams:
+ columns = [column_name for column_name in configured_stream.stream.json_schema["properties"]]
+ # Add customer_name column as second element to mimic a deleted column being defined in configured_stream
+ columns.insert(1, "customer_name")
+ existing_cumulio_columns[configured_stream.stream.name] = columns
+
+ def get_existing_cumulio_columns(stream_name):
+ return existing_cumulio_columns[stream_name]
+
+ with patch("destination_cumulio.writer.CumulioClient", MagicMock()) as cumulio_client_mock:
+ # Mock get_ordered_columns to return existing_cumulio_columns (which does not include one column defined in configured stream)
+ cumulio_client_mock.return_value.get_ordered_columns = MagicMock(side_effect=get_existing_cumulio_columns)
+ cumulio_client_mock.return_value.batch_writer.return_value = None
+ cumulio_client_mock.return_value.set_replace_tag_on_dataset.return_value = None
+ return CumulioWriter(config, configured_catalog_with_deleted_column, logger)
+
+
+def test_init_deleted_airbyte_column(writer_deleted_airbyte_column: CumulioWriter):
+ """Assert each stream is correctly initializing update_metadata.
+ Due to deleted Column in Airbyte for this writer:
+ - the update_metadata property for the orders dataset is set to False, as it's in append mode and thus should keep existing structure
+ - the update_metadata property for the orders dataset is set to True, as it's in overwrite mode
+ """
+ assert writer_deleted_airbyte_column.writers["orders_append"]["update_metadata"] is False
+ assert writer_deleted_airbyte_column.writers["orders_overwrite"]["update_metadata"] is True
+
+
+def test_deleted_column_update_metadata(writer_deleted_airbyte_column: CumulioWriter):
+ """Tests whether Airbyte streams that do not contain a column defined on Cumul.io's side results in update_metadata for only
+ overwrite streams (to inform Cumul.io about new column data being pushed)"""
+ assert writer_deleted_airbyte_column.writers["orders_append"]["update_metadata"] is False
+ assert writer_deleted_airbyte_column.writers["orders_overwrite"]["update_metadata"] is True
+
+
+def test_merged_columns_order_for_deleted_column(
+ writer_deleted_airbyte_column: CumulioWriter,
+):
+ """Tests whether Airbyte streams that do not contain a column defined on Cumul.io's side still correctly puts the other columns in
+ the right order"""
+ result = _get_cumulio_and_merged_columns(writer_deleted_airbyte_column)
+ for stream_name in result:
+ # Test whether merged_columns contains one less element
+ assert len(result[stream_name]["merged_columns"]) == len(result[stream_name]["cumulio_columns"]) - 1
+
+ cumulio_columns_without_deleted = [
+ column_name for column_name in result[stream_name]["cumulio_columns"] if column_name != "customer_name"
+ ]
+ # Test whether elements, without deleted column, are equal and in the same position
+ assert cumulio_columns_without_deleted == [column["name"] for column in result[stream_name]["merged_columns"]]
diff --git a/airbyte-integrations/connectors/destination-databend/.dockerignore b/airbyte-integrations/connectors/destination-databend/.dockerignore
new file mode 100644
index 0000000..57f4cf3
--- /dev/null
+++ b/airbyte-integrations/connectors/destination-databend/.dockerignore
@@ -0,0 +1,5 @@
+*
+!Dockerfile
+!main.py
+!destination_databend
+!setup.py
diff --git a/airbyte-integrations/connectors/destination-kvdb/Dockerfile b/airbyte-integrations/connectors/destination-databend/Dockerfile
similarity index 90%
rename from airbyte-integrations/connectors/destination-kvdb/Dockerfile
rename to airbyte-integrations/connectors/destination-databend/Dockerfile
index 31fd4d4..df2af68 100644
--- a/airbyte-integrations/connectors/destination-kvdb/Dockerfile
+++ b/airbyte-integrations/connectors/destination-databend/Dockerfile
@@ -29,10 +29,10 @@ RUN apk --no-cache add bash
# copy payload code only
COPY main.py ./
-COPY destination_kvdb ./destination_kvdb
+COPY destination_databend ./destination_databend
ENV AIRBYTE_ENTRYPOINT "python /airbyte/integration_code/main.py"
ENTRYPOINT ["python", "/airbyte/integration_code/main.py"]
LABEL io.airbyte.version=0.1.2
-LABEL io.airbyte.name=airbyte/destination-kvdb
+LABEL io.airbyte.name=airbyte/destination-databend
diff --git a/airbyte-integrations/connectors/destination-databend/README.md b/airbyte-integrations/connectors/destination-databend/README.md
new file mode 100644
index 0000000..9b50cd9
--- /dev/null
+++ b/airbyte-integrations/connectors/destination-databend/README.md
@@ -0,0 +1,99 @@
+# Databend Destination
+
+This is the repository for the Databend destination connector, written in Python.
+For information about how to use this connector within Airbyte, see [the documentation](https://docs.airbyte.io/integrations/destinations/databend).
+
+## Local development
+
+### Prerequisites
+**To iterate on this connector, make sure to complete this prerequisites section.**
+
+#### Minimum Python version required `= 3.7.0`
+
+#### Build & Activate Virtual Environment and install dependencies
+From this connector directory, create a virtual environment:
+```
+python -m venv .venv
+```
+
+This will generate a virtualenv for this module in `.venv/`. Make sure this venv is active in your
+development environment of choice. To activate it from the terminal, run:
+```
+source .venv/bin/activate
+pip install -r requirements.txt
+```
+If you are in an IDE, follow your IDE's instructions to activate the virtualenv.
+
+Note that while we are installing dependencies from `requirements.txt`, you should only edit `setup.py` for your dependencies. `requirements.txt` is
+used for editable installs (`pip install -e`) to pull in Python dependencies from the monorepo and will call `setup.py`.
+If this is mumbo jumbo to you, don't worry about it, just put your deps in `setup.py` but install using `pip install -r requirements.txt` and everything
+should work as you expect.
+
+#### Create credentials
+**If you are a community contributor**, follow the instructions in the [documentation](https://docs.airbyte.io/integrations/destinations/databend)
+to generate the necessary credentials. Then create a file `secrets/config.json` conforming to the `destination_databend/spec.json` file.
+Note that the `secrets` directory is gitignored by default, so there is no danger of accidentally checking in sensitive information.
+See `integration_tests/sample_config.json` for a sample config file.
+
+**If you are an Airbyte core member**, copy the credentials in Lastpass under the secret name `destination databend test creds`
+and place them into `secrets/config.json`.
+
+### Locally running the connector
+```
+python main.py spec
+python main.py check --config secrets/config.json
+python main.py discover --config secrets/config.json
+python main.py read --config secrets/config.json --catalog integration_tests/configured_catalog.json
+```
+
+### Locally running the connector docker image
+
+
+#### Build
+**Via [`airbyte-ci`](https://github.com/airbytehq/airbyte/blob/master/airbyte-ci/connectors/pipelines/README.md) (recommended):**
+```bash
+airbyte-ci connectors --name=destination-databend build
+```
+
+An image will be built with the tag `airbyte/destination-databend:dev`.
+
+**Via `docker build`:**
+```bash
+docker build -t airbyte/destination-databend:dev .
+```
+
+#### Run
+Then run any of the connector commands as follows:
+```
+docker run --rm airbyte/destination-databend:dev spec
+docker run --rm -v $(pwd)/secrets:/secrets airbyte/destination-databend:dev check --config /secrets/config.json
+# messages.jsonl is a file containing line-separated JSON representing AirbyteMessages
+cat messages.jsonl | docker run --rm -v $(pwd)/secrets:/secrets -v $(pwd)/integration_tests:/integration_tests airbyte/destination-databend:dev write --config /secrets/config.json --catalog /integration_tests/configured_catalog.json
+```
+
+## Testing
+You can run our full test suite locally using [`airbyte-ci`](https://github.com/airbytehq/airbyte/blob/master/airbyte-ci/connectors/pipelines/README.md):
+```bash
+airbyte-ci connectors --name=destination-databend test
+```
+
+### Customizing acceptance Tests
+Customize `acceptance-test-config.yml` file to configure tests. See [Connector Acceptance Tests](https://docs.airbyte.com/connector-development/testing-connectors/connector-acceptance-tests-reference) for more information.
+If your connector requires to create or destroy resources for use during acceptance tests create fixtures for it and place them inside integration_tests/acceptance.py.
+
+## Dependency Management
+All of your dependencies should go in `setup.py`, NOT `requirements.txt`. The requirements file is only used to connect internal Airbyte dependencies in the monorepo for local development.
+We split dependencies between two groups, dependencies that are:
+* required for your connector to work need to go to `MAIN_REQUIREMENTS` list.
+* required for the testing need to go to `TEST_REQUIREMENTS` list
+
+### Publishing a new version of the connector
+You've checked out the repo, implemented a million dollar feature, and you're ready to share your changes with the world. Now what?
+1. Make sure your changes are passing our test suite: `airbyte-ci connectors --name=destination-databend test`
+2. Bump the connector version in `metadata.yaml`: increment the `dockerImageTag` value. Please follow [semantic versioning for connectors](https://docs.airbyte.com/contributing-to-airbyte/resources/pull-requests-handbook/#semantic-versioning-for-connectors).
+3. Make sure the `metadata.yaml` content is up to date.
+4. Make the connector documentation and its changelog is up to date (`docs/integrations/destinations/databend.md`).
+5. Create a Pull Request: use [our PR naming conventions](https://docs.airbyte.com/contributing-to-airbyte/resources/pull-requests-handbook/#pull-request-title-convention).
+6. Pat yourself on the back for being an awesome contributor.
+7. Someone from Airbyte will take a look at your PR and iterate with you to merge it into master.
+
diff --git a/airbyte-integrations/connectors/destination-databend/destination_databend/__init__.py b/airbyte-integrations/connectors/destination-databend/destination_databend/__init__.py
new file mode 100644
index 0000000..5be4069
--- /dev/null
+++ b/airbyte-integrations/connectors/destination-databend/destination_databend/__init__.py
@@ -0,0 +1,8 @@
+#
+# Copyright (c) 2023 Airbyte, Inc., all rights reserved.
+#
+
+
+from .destination import DestinationDatabend
+
+__all__ = ["DestinationDatabend"]
diff --git a/airbyte-integrations/connectors/destination-databend/destination_databend/__init__.pyc b/airbyte-integrations/connectors/destination-databend/destination_databend/__init__.pyc
new file mode 100644
index 0000000..4538a0a
Binary files /dev/null and b/airbyte-integrations/connectors/destination-databend/destination_databend/__init__.pyc differ
diff --git a/airbyte-integrations/connectors/destination-databend/destination_databend/client.py b/airbyte-integrations/connectors/destination-databend/destination_databend/client.py
new file mode 100644
index 0000000..1764093
--- /dev/null
+++ b/airbyte-integrations/connectors/destination-databend/destination_databend/client.py
@@ -0,0 +1,20 @@
+#
+# Copyright (c) 2023 Airbyte, Inc., all rights reserved.
+#
+
+from databend_sqlalchemy import connector
+
+
+class DatabendClient:
+ def __init__(self, host: str, port: int, database: str, table: str, username: str, password: str = None):
+ self.host = host
+ self.port = port
+ self.database = database
+ self.table = table
+ self.username = username
+ self.password = password
+
+ def open(self):
+ handle = connector.connect(f"https://{self.username}:{self.password}@{self.host}:{self.port}/{self.database}").cursor()
+
+ return handle
diff --git a/airbyte-integrations/connectors/destination-databend/destination_databend/destination.py b/airbyte-integrations/connectors/destination-databend/destination_databend/destination.py
new file mode 100644
index 0000000..365575e
--- /dev/null
+++ b/airbyte-integrations/connectors/destination-databend/destination_databend/destination.py
@@ -0,0 +1,89 @@
+#
+# Copyright (c) 2023 Airbyte, Inc., all rights reserved.
+#
+
+
+import json
+from datetime import datetime
+from logging import getLogger
+from typing import Any, Iterable, Mapping
+from uuid import uuid4
+
+from airbyte_cdk import AirbyteLogger
+from airbyte_cdk.destinations import Destination
+from airbyte_cdk.models import AirbyteConnectionStatus, AirbyteMessage, ConfiguredAirbyteCatalog, DestinationSyncMode, Status, Type
+from destination_databend.client import DatabendClient
+
+from .writer import create_databend_wirter
+
+logger = getLogger("airbyte")
+
+
+class DestinationDatabend(Destination):
+ def write(
+ self, config: Mapping[str, Any], configured_catalog: ConfiguredAirbyteCatalog, input_messages: Iterable[AirbyteMessage]
+ ) -> Iterable[AirbyteMessage]:
+
+ """
+ TODO
+ Reads the input stream of messages, config, and catalog to write data to the destination.
+
+ This method returns an iterable (typically a generator of AirbyteMessages via yield) containing state messages received
+ in the input message stream. Outputting a state message means that every AirbyteRecordMessage which came before it has been
+ successfully persisted to the destination. This is used to ensure fault tolerance in the case that a sync fails before fully completing,
+ then the source is given the last state message output from this method as the starting point of the next sync.
+
+ :param config: dict of JSON configuration matching the configuration declared in spec.json
+ :param configured_catalog: The Configured Catalog describing the schema of the data being received and how it should be persisted in the
+ destination
+ :param input_messages: The stream of input messages received from the source
+ :return: Iterable of AirbyteStateMessages wrapped in AirbyteMessage structs
+ """
+ streams = {s.stream.name for s in configured_catalog.streams}
+ client = DatabendClient(**config)
+
+ writer = create_databend_wirter(client, logger)
+
+ for configured_stream in configured_catalog.streams:
+ if configured_stream.destination_sync_mode == DestinationSyncMode.overwrite:
+ writer.delete_table(configured_stream.stream.name)
+ logger.info(f"Stream {configured_stream.stream.name} is wiped.")
+ writer.create_raw_table(configured_stream.stream.name)
+
+ for message in input_messages:
+ if message.type == Type.STATE:
+ yield message
+ elif message.type == Type.RECORD:
+ data = message.record.data
+ stream = message.record.stream
+ # Skip unselected streams
+ if stream not in streams:
+ logger.debug(f"Stream {stream} was not present in configured streams, skipping")
+ continue
+ writer.queue_write_data(stream, str(uuid4()), datetime.now(), json.dumps(data))
+
+ # Flush any leftover messages
+ writer.flush()
+
+ def check(self, logger: AirbyteLogger, config: Mapping[str, Any]) -> AirbyteConnectionStatus:
+ """
+ Tests if the input configuration can be used to successfully connect to the destination with the needed permissions
+ e.g: if a provided API token or password can be used to connect and write to the destination.
+
+ :param logger: Logging object to display debug/info/error to the logs
+ (logs will not be accessible via airbyte UI if they are not passed to this logger)
+ :param config: Json object containing the configuration of this destination, content of this json is as specified in
+ the properties of the spec.json file
+
+ :return: AirbyteConnectionStatus indicating a Success or Failure
+ """
+ try:
+ client = DatabendClient(**config)
+ cursor = client.open()
+ cursor.execute("DROP TABLE IF EXISTS test")
+ cursor.execute("CREATE TABLE if not exists test (x Int32,y VARCHAR)")
+ cursor.execute("INSERT INTO test (x,y) VALUES (%,%)", [1, "yy", 2, "xx"])
+ cursor.execute("DROP TABLE IF EXISTS test")
+ return AirbyteConnectionStatus(status=Status.SUCCEEDED)
+ except Exception as e:
+ return AirbyteConnectionStatus(status=Status.FAILED, message=f"An exception occurred: {repr(e)}")
diff --git a/airbyte-integrations/connectors/destination-databend/destination_databend/spec.json b/airbyte-integrations/connectors/destination-databend/destination_databend/spec.json
new file mode 100644
index 0000000..e77d330
--- /dev/null
+++ b/airbyte-integrations/connectors/destination-databend/destination_databend/spec.json
@@ -0,0 +1,57 @@
+{
+ "documentationUrl": "https://docs.airbyte.com/integrations/destinations/databend",
+ "supported_destination_sync_modes": ["overwrite", "append"],
+ "supportsIncremental": true,
+ "connectionSpecification": {
+ "$schema": "http://json-schema.org/draft-07/schema#",
+ "title": "Destination Databend",
+ "type": "object",
+ "required": ["host", "username", "database"],
+ "additionalProperties": true,
+ "properties": {
+ "host": {
+ "title": "Host",
+ "description": "Hostname of the database.",
+ "type": "string",
+ "order": 0
+ },
+ "port": {
+ "title": "Port",
+ "description": "Port of the database.",
+ "type": "integer",
+ "minimum": 0,
+ "maximum": 65536,
+ "default": 443,
+ "examples": ["443"],
+ "order": 2
+ },
+ "database": {
+ "title": "DB Name",
+ "description": "Name of the database.",
+ "type": "string",
+ "order": 3
+ },
+ "table": {
+ "title": "Default Table",
+ "description": "The default table was written to.",
+ "type": "string",
+ "examples": ["default"],
+ "default": "default",
+ "order": 4
+ },
+ "username": {
+ "title": "User",
+ "description": "Username to use to access the database.",
+ "type": "string",
+ "order": 5
+ },
+ "password": {
+ "title": "Password",
+ "description": "Password associated with the username.",
+ "type": "string",
+ "airbyte_secret": true,
+ "order": 6
+ }
+ }
+ }
+}
diff --git a/airbyte-integrations/connectors/destination-databend/destination_databend/writer.py b/airbyte-integrations/connectors/destination-databend/destination_databend/writer.py
new file mode 100644
index 0000000..006ff96
--- /dev/null
+++ b/airbyte-integrations/connectors/destination-databend/destination_databend/writer.py
@@ -0,0 +1,134 @@
+#
+# Copyright (c) 2023 Airbyte, Inc., all rights reserved.
+#
+
+from collections import defaultdict
+from datetime import datetime
+from itertools import chain
+
+from airbyte_cdk import AirbyteLogger
+from airbyte_cdk.models import AirbyteConnectionStatus, Status
+from destination_databend.client import DatabendClient
+
+
+class DatabendWriter:
+ """
+ Base class for shared writer logic.
+ """
+
+ flush_interval = 1000
+
+ def __init__(self, client: DatabendClient) -> None:
+ """
+ :param client: Databend SDK connection class with established connection
+ to the databse.
+ """
+ try:
+ # open a cursor and do some work with it
+ self.client = client
+ self.cursor = client.open()
+ self._buffer = defaultdict(list)
+ self._values = 0
+ except Exception as e:
+ # handle the exception
+ raise AirbyteConnectionStatus(status=Status.FAILED, message=f"An exception occurred: {repr(e)}")
+ finally:
+ # close the cursor
+ self.cursor.close()
+
+ def delete_table(self, name: str) -> None:
+ """
+ Delete the resulting table.
+ Primarily used in Overwrite strategy to clean up previous data.
+
+ :param name: table name to delete.
+ """
+ self.cursor.execute(f"DROP TABLE IF EXISTS _airbyte_raw_{name}")
+
+ def create_raw_table(self, name: str):
+ """
+ Create the resulting _airbyte_raw table.
+
+ :param name: table name to create.
+ """
+ query = f"""
+ CREATE TABLE IF NOT EXISTS _airbyte_raw_{name} (
+ _airbyte_ab_id TEXT,
+ _airbyte_emitted_at TIMESTAMP,
+ _airbyte_data TEXT
+ )
+ """
+ cursor = self.cursor
+ cursor.execute(query)
+
+ def queue_write_data(self, stream_name: str, id: str, time: datetime, record: str) -> None:
+ """
+ Queue up data in a buffer in memory before writing to the database.
+ When flush_interval is reached data is persisted.
+
+ :param stream_name: name of the stream for which the data corresponds.
+ :param id: unique identifier of this data row.
+ :param time: time of writing.
+ :param record: string representation of the json data payload.
+ """
+ self._buffer[stream_name].append((id, time, record))
+ self._values += 1
+ if self._values == self.flush_interval:
+ self._flush()
+
+ def _flush(self):
+ """
+ Stub for the intermediate data flush that's triggered during the
+ buffering operation.
+ """
+ raise NotImplementedError()
+
+ def flush(self):
+ """
+ Stub for the data flush at the end of writing operation.
+ """
+ raise NotImplementedError()
+
+
+class DatabendSQLWriter(DatabendWriter):
+ """
+ Data writer using the SQL writing strategy. Data is buffered in memory
+ and flushed using INSERT INTO SQL statement.
+ """
+
+ flush_interval = 1000
+
+ def __init__(self, client: DatabendClient) -> None:
+ """
+ :param client: Databend SDK connection class with established connection
+ to the databse.
+ """
+ super().__init__(client)
+
+ def _flush(self) -> None:
+ """
+ Intermediate data flush that's triggered during the
+ buffering operation. Writes data stored in memory via SQL commands.
+ databend connector insert into table using stage
+ """
+ cursor = self.cursor
+ # id, written_at, data
+ for table, data in self._buffer.items():
+ cursor.execute(
+ f"INSERT INTO _airbyte_raw_{table} (_airbyte_ab_id,_airbyte_emitted_at,_airbyte_data) VALUES (%, %, %)",
+ list(chain.from_iterable(data)),
+ )
+ self._buffer.clear()
+ self._values = 0
+
+ def flush(self) -> None:
+ """
+ Final data flush after all data has been written to memory.
+ """
+ self._flush()
+
+
+def create_databend_wirter(client: DatabendClient, logger: AirbyteLogger) -> DatabendWriter:
+ logger.info("Using the SQL writing strategy")
+ writer = DatabendSQLWriter(client)
+ return writer
diff --git a/airbyte-integrations/connectors/destination-databend/icon.svg b/airbyte-integrations/connectors/destination-databend/icon.svg
new file mode 100644
index 0000000..b6afca7
--- /dev/null
+++ b/airbyte-integrations/connectors/destination-databend/icon.svg
@@ -0,0 +1 @@
+
\ No newline at end of file
diff --git a/airbyte-integrations/connectors/destination-kvdb/integration_tests/integration_test.py b/airbyte-integrations/connectors/destination-databend/integration_tests/integration_test.py
similarity index 62%
rename from airbyte-integrations/connectors/destination-kvdb/integration_tests/integration_test.py
rename to airbyte-integrations/connectors/destination-databend/integration_tests/integration_test.py
index 5e083ac..a40494c 100644
--- a/airbyte-integrations/connectors/destination-kvdb/integration_tests/integration_test.py
+++ b/airbyte-integrations/connectors/destination-databend/integration_tests/integration_test.py
@@ -3,10 +3,10 @@
#
import json
+import logging
from typing import Any, Dict, List, Mapping
import pytest
-from airbyte_cdk import AirbyteLogger
from airbyte_cdk.models import (
AirbyteMessage,
AirbyteRecordMessage,
@@ -19,11 +19,11 @@
SyncMode,
Type,
)
-from destination_kvdb import DestinationKvdb
-from destination_kvdb.client import KvDbClient
+from destination_databend import DestinationDatabend
+from destination_databend.client import DatabendClient
-@pytest.fixture(name="config")
+@pytest.fixture(name="databendConfig")
def config_fixture() -> Mapping[str, Any]:
with open("secrets/config.json", "r") as f:
return json.loads(f.read())
@@ -49,24 +49,25 @@ def configured_catalog_fixture() -> ConfiguredAirbyteCatalog:
@pytest.fixture(autouse=True)
-def teardown(config: Mapping):
+def teardown(databendConfig: Mapping):
yield
- client = KvDbClient(**config)
- client.delete(list(client.list_keys()))
+ client = DatabendClient(**databendConfig)
+ cursor = client.open()
+ cursor.close()
@pytest.fixture(name="client")
-def client_fixture(config) -> KvDbClient:
- return KvDbClient(**config)
+def client_fixture(databendConfig) -> DatabendClient:
+ return DatabendClient(**databendConfig)
-def test_check_valid_config(config: Mapping):
- outcome = DestinationKvdb().check(AirbyteLogger(), config)
+def test_check_valid_config(databendConfig: Mapping):
+ outcome = DestinationDatabend().check(logging.getLogger("airbyte"), databendConfig)
assert outcome.status == Status.SUCCEEDED
def test_check_invalid_config():
- outcome = DestinationKvdb().check(AirbyteLogger(), {"bucket_id": "not_a_real_id"})
+ outcome = DestinationDatabend().check(logging.getLogger("airbyte"), {"bucket_id": "not_a_real_id"})
assert outcome.status == Status.FAILED
@@ -80,19 +81,29 @@ def _record(stream: str, str_value: str, int_value: int) -> AirbyteMessage:
)
-def retrieve_all_records(client: KvDbClient) -> List[AirbyteRecordMessage]:
- """retrieves and formats all records in kvdb as Airbyte messages"""
- all_records = client.list_keys(list_values=True)
+def retrieve_records(stream_name: str, client: DatabendClient) -> List[AirbyteRecordMessage]:
+ cursor = client.open()
+ cursor.execute(f"select * from _airbyte_raw_{stream_name}")
+ all_records = cursor.fetchall()
out = []
for record in all_records:
- key = record[0]
- stream = key.split("__ab__")[0]
- value = record[1]
- out.append(_record(stream, value["str_col"], value["int_col"]))
+ # key = record[0]
+ # stream = key.split("__ab__")[0]
+ value = json.loads(record[2])
+ out.append(_record(stream_name, value["str_col"], value["int_col"]))
return out
-def test_write(config: Mapping, configured_catalog: ConfiguredAirbyteCatalog, client: KvDbClient):
+def retrieve_all_records(client: DatabendClient) -> List[AirbyteRecordMessage]:
+ """retrieves and formats all records in databend as Airbyte messages"""
+ overwrite_stream = "overwrite_stream"
+ append_stream = "append_stream"
+ overwrite_out = retrieve_records(overwrite_stream, client)
+ append_out = retrieve_records(append_stream, client)
+ return overwrite_out + append_out
+
+
+def test_write(databendConfig: Mapping, configured_catalog: ConfiguredAirbyteCatalog, client: DatabendClient):
"""
This test verifies that:
1. writing a stream in "overwrite" mode overwrites any existing data for that stream
@@ -108,19 +119,19 @@ def test_write(config: Mapping, configured_catalog: ConfiguredAirbyteCatalog, cl
_record(overwrite_stream, str(i), i) for i in range(5, 10)
]
- destination = DestinationKvdb()
+ destination = DestinationDatabend()
expected_states = [first_state_message, second_state_message]
output_states = list(
destination.write(
- config, configured_catalog, [*first_record_chunk, first_state_message, *second_record_chunk, second_state_message]
+ databendConfig, configured_catalog, [*first_record_chunk, first_state_message, *second_record_chunk, second_state_message]
)
)
assert expected_states == output_states, "Checkpoint state messages were expected from the destination"
expected_records = [_record(append_stream, str(i), i) for i in range(10)] + [_record(overwrite_stream, str(i), i) for i in range(10)]
records_in_destination = retrieve_all_records(client)
- assert expected_records == records_in_destination, "Records in destination should match records expected"
+ assert len(expected_records) == len(records_in_destination), "Records in destination should match records expected"
# After this sync we expect the append stream to have 15 messages and the overwrite stream to have 5
third_state_message = _state({"state": "3"})
@@ -128,11 +139,21 @@ def test_write(config: Mapping, configured_catalog: ConfiguredAirbyteCatalog, cl
_record(overwrite_stream, str(i), i) for i in range(10, 15)
]
- output_states = list(destination.write(config, configured_catalog, [*third_record_chunk, third_state_message]))
+ output_states = list(destination.write(databendConfig, configured_catalog, [*third_record_chunk, third_state_message]))
assert [third_state_message] == output_states
records_in_destination = retrieve_all_records(client)
expected_records = [_record(append_stream, str(i), i) for i in range(15)] + [
_record(overwrite_stream, str(i), i) for i in range(10, 15)
]
- assert expected_records == records_in_destination
+ assert len(expected_records) == len(records_in_destination)
+
+ tear_down(client)
+
+
+def tear_down(client: DatabendClient):
+ overwrite_stream = "overwrite_stream"
+ append_stream = "append_stream"
+ cursor = client.open()
+ cursor.execute(f"DROP table _airbyte_raw_{overwrite_stream}")
+ cursor.execute(f"DROP table _airbyte_raw_{append_stream}")
diff --git a/airbyte-integrations/connectors/destination-databend/integration_tests/sample_config.json b/airbyte-integrations/connectors/destination-databend/integration_tests/sample_config.json
new file mode 100644
index 0000000..62c0cdb
--- /dev/null
+++ b/airbyte-integrations/connectors/destination-databend/integration_tests/sample_config.json
@@ -0,0 +1,9 @@
+{
+ "protocol": "https",
+ "host": "tnc7yee14--xxxx.ch.datafusecloud.com",
+ "port": 443,
+ "username": "username",
+ "password": "password",
+ "database": "default",
+ "table": "default"
+}
diff --git a/airbyte-integrations/connectors/destination-databend/main.py b/airbyte-integrations/connectors/destination-databend/main.py
new file mode 100644
index 0000000..7482c00
--- /dev/null
+++ b/airbyte-integrations/connectors/destination-databend/main.py
@@ -0,0 +1,11 @@
+#
+# Copyright (c) 2023 Airbyte, Inc., all rights reserved.
+#
+
+
+import sys
+
+from destination_databend import DestinationDatabend
+
+if __name__ == "__main__":
+ DestinationDatabend().run(sys.argv[1:])
diff --git a/airbyte-integrations/connectors/destination-databend/metadata.yaml b/airbyte-integrations/connectors/destination-databend/metadata.yaml
new file mode 100644
index 0000000..5963349
--- /dev/null
+++ b/airbyte-integrations/connectors/destination-databend/metadata.yaml
@@ -0,0 +1,24 @@
+data:
+ connectorSubtype: database
+ connectorType: destination
+ definitionId: 302e4d8e-08d3-4098-acd4-ac67ca365b88
+ dockerImageTag: 0.1.2
+ dockerRepository: airbyte/destination-databend
+ githubIssueLabel: destination-databend
+ icon: databend.svg
+ license: MIT
+ name: Databend
+ registries:
+ cloud:
+ enabled: false
+ oss:
+ enabled: false
+ releaseStage: alpha
+ documentationUrl: https://docs.airbyte.com/integrations/destinations/databend
+ tags:
+ - language:python
+ ab_internal:
+ sl: 100
+ ql: 100
+ supportLevel: archived
+metadataSpecVersion: "1.0"
diff --git a/airbyte-integrations/connectors/destination-databend/requirements.txt b/airbyte-integrations/connectors/destination-databend/requirements.txt
new file mode 100644
index 0000000..d6e1198
--- /dev/null
+++ b/airbyte-integrations/connectors/destination-databend/requirements.txt
@@ -0,0 +1 @@
+-e .
diff --git a/airbyte-integrations/connectors/destination-databend/setup.py b/airbyte-integrations/connectors/destination-databend/setup.py
new file mode 100644
index 0000000..49878e3
--- /dev/null
+++ b/airbyte-integrations/connectors/destination-databend/setup.py
@@ -0,0 +1,22 @@
+#
+# Copyright (c) 2023 Airbyte, Inc., all rights reserved.
+#
+
+
+from setuptools import find_packages, setup
+
+MAIN_REQUIREMENTS = ["airbyte-cdk", "requests", "databend-sqlalchemy==0.1.6"]
+
+TEST_REQUIREMENTS = ["pytest~=6.1"]
+setup(
+ name="destination_databend",
+ description="Destination implementation for Databend.",
+ author="Airbyte",
+ author_email="contact@airbyte.io",
+ packages=find_packages(),
+ install_requires=MAIN_REQUIREMENTS,
+ package_data={"": ["*.json"]},
+ extras_require={
+ "tests": TEST_REQUIREMENTS,
+ },
+)
diff --git a/airbyte-integrations/connectors/destination-databend/unit_tests/test_databend_destination.py b/airbyte-integrations/connectors/destination-databend/unit_tests/test_databend_destination.py
new file mode 100644
index 0000000..e5a7c7e
--- /dev/null
+++ b/airbyte-integrations/connectors/destination-databend/unit_tests/test_databend_destination.py
@@ -0,0 +1,161 @@
+#
+# Copyright (c) 2023 Airbyte, Inc., all rights reserved.
+#
+
+from datetime import datetime
+from typing import Dict
+from unittest.mock import AsyncMock, MagicMock, call, patch
+
+from airbyte_cdk.models import (
+ AirbyteMessage,
+ AirbyteRecordMessage,
+ AirbyteStream,
+ ConfiguredAirbyteCatalog,
+ ConfiguredAirbyteStream,
+ DestinationSyncMode,
+ SyncMode,
+ Type,
+)
+from destination_databend.destination import DatabendClient, DestinationDatabend
+from pytest import fixture
+
+
+@fixture
+def logger() -> MagicMock:
+ return MagicMock()
+
+
+@fixture
+def config() -> Dict[str, str]:
+ args = {
+ "database": "default",
+ "username": "root",
+ "password": "root",
+ "host": "localhost",
+ "port": 8081,
+ "table": "default",
+ }
+ return args
+
+
+@fixture(name="mock_connection")
+def async_connection_cursor_mock():
+ connection = MagicMock()
+ cursor = AsyncMock()
+ connection.cursor.return_value = cursor
+ return connection, cursor
+
+
+@fixture
+def configured_stream1() -> ConfiguredAirbyteStream:
+ return ConfiguredAirbyteStream(
+ stream=AirbyteStream(
+ name="table1",
+ json_schema={
+ "type": "object",
+ "properties": {"col1": {"type": "string"}, "col2": {"type": "integer"}},
+ },
+ supported_sync_modes=[SyncMode.incremental],
+ ),
+ sync_mode=SyncMode.incremental,
+ destination_sync_mode=DestinationSyncMode.append,
+ )
+
+
+@fixture
+def configured_stream2() -> ConfiguredAirbyteStream:
+ return ConfiguredAirbyteStream(
+ stream=AirbyteStream(
+ name="table2",
+ json_schema={
+ "type": "object",
+ "properties": {"col1": {"type": "string"}, "col2": {"type": "integer"}},
+ },
+ supported_sync_modes=[SyncMode.incremental],
+ ),
+ sync_mode=SyncMode.incremental,
+ destination_sync_mode=DestinationSyncMode.append,
+ )
+
+
+@fixture
+def airbyte_message1() -> AirbyteMessage:
+ return AirbyteMessage(
+ type=Type.RECORD,
+ record=AirbyteRecordMessage(
+ stream="table1",
+ data={"key1": "value1", "key2": 2},
+ emitted_at=int(datetime.now().timestamp()) * 1000,
+ ),
+ )
+
+
+@fixture
+def airbyte_message2() -> AirbyteMessage:
+ return AirbyteMessage(
+ type=Type.RECORD,
+ record=AirbyteRecordMessage(
+ stream="table2",
+ data={"key1": "value2", "key2": 3},
+ emitted_at=int(datetime.now().timestamp()) * 1000,
+ ),
+ )
+
+
+@fixture
+def airbyte_state_message() -> AirbyteMessage:
+ return AirbyteMessage(type=Type.STATE)
+
+
+@patch("destination_databend.client.DatabendClient", MagicMock())
+def test_connection(config: Dict[str, str], logger: MagicMock) -> None:
+ # Check no log object
+ DatabendClient(**config)
+
+
+@patch("destination_databend.writer.DatabendSQLWriter")
+@patch("destination_databend.client.DatabendClient")
+def test_sql_write_append(
+ mock_connection: MagicMock,
+ mock_writer: MagicMock,
+ config: Dict[str, str],
+ configured_stream1: ConfiguredAirbyteStream,
+ configured_stream2: ConfiguredAirbyteStream,
+ airbyte_message1: AirbyteMessage,
+ airbyte_message2: AirbyteMessage,
+ airbyte_state_message: AirbyteMessage,
+) -> None:
+ catalog = ConfiguredAirbyteCatalog(streams=[configured_stream1, configured_stream2])
+
+ destination = DestinationDatabend()
+ result = destination.write(config, catalog, [airbyte_message1, airbyte_state_message, airbyte_message2])
+
+ assert list(result) == [airbyte_state_message]
+ mock_writer.return_value.delete_table.assert_not_called()
+ mock_writer.return_value.create_raw_table.mock_calls = [call(mock_connection, "table1"), call(mock_connection, "table2")]
+ assert len(mock_writer.return_value.queue_write_data.mock_calls) == 2
+ mock_writer.return_value.flush.assert_called_once()
+
+
+@patch("destination_databend.writer.DatabendSQLWriter")
+@patch("destination_databend.client.DatabendClient")
+def test_sql_write_overwrite(
+ mock_connection: MagicMock,
+ mock_writer: MagicMock,
+ config: Dict[str, str],
+ configured_stream1: ConfiguredAirbyteStream,
+ configured_stream2: ConfiguredAirbyteStream,
+ airbyte_message1: AirbyteMessage,
+ airbyte_message2: AirbyteMessage,
+ airbyte_state_message: AirbyteMessage,
+):
+ # Overwrite triggers a delete
+ configured_stream1.destination_sync_mode = DestinationSyncMode.overwrite
+ catalog = ConfiguredAirbyteCatalog(streams=[configured_stream1, configured_stream2])
+
+ destination = DestinationDatabend()
+ result = destination.write(config, catalog, [airbyte_message1, airbyte_state_message, airbyte_message2])
+
+ assert list(result) == [airbyte_state_message]
+ mock_writer.return_value.delete_table.assert_called_once_with("table1")
+ mock_writer.return_value.create_raw_table.mock_calls = [call(mock_connection, "table1"), call(mock_connection, "table2")]
diff --git a/airbyte-integrations/connectors/destination-databend/unit_tests/test_writer.py b/airbyte-integrations/connectors/destination-databend/unit_tests/test_writer.py
new file mode 100644
index 0000000..0b68b11
--- /dev/null
+++ b/airbyte-integrations/connectors/destination-databend/unit_tests/test_writer.py
@@ -0,0 +1,46 @@
+#
+# Copyright (c) 2023 Airbyte, Inc., all rights reserved.
+#
+
+from typing import Any, Union
+from unittest.mock import MagicMock
+
+from destination_databend.writer import DatabendSQLWriter
+from pytest import fixture, mark
+
+
+@fixture
+def client() -> MagicMock:
+ return MagicMock()
+
+
+@fixture
+def sql_writer(client: MagicMock) -> DatabendSQLWriter:
+ return DatabendSQLWriter(client)
+
+
+def test_sql_default(sql_writer: DatabendSQLWriter) -> None:
+ assert len(sql_writer._buffer) == 0
+ assert sql_writer.flush_interval == 1000
+
+
+@mark.parametrize("writer", ["sql_writer"])
+def test_sql_create(client: MagicMock, writer: Union[DatabendSQLWriter], request: Any) -> None:
+ writer = request.getfixturevalue(writer)
+ writer.create_raw_table("dummy")
+
+
+def test_data_buffering(sql_writer: DatabendSQLWriter) -> None:
+ sql_writer.queue_write_data("dummy", "id1", 20200101, '{"key": "value"}')
+ sql_writer._buffer["dummy"][0] == ("id1", 20200101, '{"key": "value"}')
+ assert len(sql_writer._buffer["dummy"]) == 1
+ assert len(sql_writer._buffer.keys()) == 1
+ sql_writer.queue_write_data("dummy", "id2", 20200102, '{"key2": "value2"}')
+ sql_writer._buffer["dummy"][0] == ("id2", 20200102, '{"key2": "value2"}')
+ assert len(sql_writer._buffer["dummy"]) == 2
+ assert len(sql_writer._buffer.keys()) == 1
+ sql_writer.queue_write_data("dummy2", "id3", 20200103, '{"key3": "value3"}')
+ sql_writer._buffer["dummy"][0] == ("id3", 20200103, '{"key3": "value3"}')
+ assert len(sql_writer._buffer["dummy"]) == 2
+ assert len(sql_writer._buffer["dummy2"]) == 1
+ assert len(sql_writer._buffer.keys()) == 2
diff --git a/airbyte-integrations/connectors/destination-doris/README.md b/airbyte-integrations/connectors/destination-doris/README.md
new file mode 100644
index 0000000..b67c3bd
--- /dev/null
+++ b/airbyte-integrations/connectors/destination-doris/README.md
@@ -0,0 +1,72 @@
+# Destination Doris
+
+This is the repository for the Doris destination connector in Java.
+For information about how to use this connector within Airbyte, see [the User Documentation](https://docs.airbyte.io/integrations/destinations/doris).
+
+## Local development
+
+#### Building via Gradle
+From the Airbyte repository root, run:
+```
+./gradlew :airbyte-integrations:connectors:destination-doris:build
+```
+
+#### Create credentials
+**If you are a community contributor**, generate the necessary credentials and place them in `secrets/config.json` conforming to the spec file in `src/main/resources/spec.json`.
+Note that the `secrets` directory is git-ignored by default, so there is no danger of accidentally checking in sensitive information.
+
+**If you are an Airbyte core member**, follow the [instructions](https://docs.airbyte.io/connector-development#using-credentials-in-ci) to set up the credentials.
+
+### Locally running the connector docker image
+
+#### Build
+Build the connector image via Gradle:
+
+```
+./gradlew :airbyte-integrations:connectors:destination-doris:buildConnectorImage
+```
+Once built, the docker image name and tag on your host will be `airbyte/destination-doris:dev`.
+the Dockerfile.
+
+#### Run
+Then run any of the connector commands as follows:
+```
+docker run --rm airbyte/destination-doris:dev spec
+docker run --rm -v $(pwd)/secrets:/secrets airbyte/destination-doris:dev check --config /secrets/config.json
+docker run --rm -v $(pwd)/secrets:/secrets airbyte/destination-doris:dev discover --config /secrets/config.json
+docker run --rm -v $(pwd)/secrets:/secrets -v $(pwd)/integration_tests:/integration_tests airbyte/destination-doris:dev read --config /secrets/config.json --catalog /integration_tests/configured_catalog.json
+```
+
+## Testing
+We use `JUnit` for Java tests.
+
+### Unit and Integration Tests
+Place unit tests under `src/test/io/airbyte/integrations/destinations/doris`.
+
+#### Acceptance Tests
+Airbyte has a standard test suite that all destination connectors must pass. Implement the `TODO`s in
+`src/test-integration/java/io/airbyte/integrations/destinations/dorisDestinationAcceptanceTest.java`.
+
+### Using gradle to run tests
+All commands should be run from airbyte project root.
+To run unit tests:
+```
+./gradlew :airbyte-integrations:connectors:destination-doris:unitTest
+```
+To run acceptance and custom integration tests:
+```
+./gradlew :airbyte-integrations:connectors:destination-doris:integrationTest
+```
+
+## Dependency Management
+
+### Publishing a new version of the connector
+You've checked out the repo, implemented a million dollar feature, and you're ready to share your changes with the world. Now what?
+1. Make sure your changes are passing our test suite: `airbyte-ci connectors --name=destination-doris test`
+2. Bump the connector version in `metadata.yaml`: increment the `dockerImageTag` value. Please follow [semantic versioning for connectors](https://docs.airbyte.com/contributing-to-airbyte/resources/pull-requests-handbook/#semantic-versioning-for-connectors).
+3. Make sure the `metadata.yaml` content is up to date.
+4. Make the connector documentation and its changelog is up to date (`docs/integrations/destinations/doris.md`).
+5. Create a Pull Request: use [our PR naming conventions](https://docs.airbyte.com/contributing-to-airbyte/resources/pull-requests-handbook/#pull-request-title-convention).
+6. Pat yourself on the back for being an awesome contributor.
+7. Someone from Airbyte will take a look at your PR and iterate with you to merge it into master.
+
diff --git a/airbyte-integrations/connectors/destination-doris/bootstrap.md b/airbyte-integrations/connectors/destination-doris/bootstrap.md
new file mode 100644
index 0000000..30f9d07
--- /dev/null
+++ b/airbyte-integrations/connectors/destination-doris/bootstrap.md
@@ -0,0 +1,29 @@
+# Doris destination
+
+
+Doris destination adopts MySQL protocol(JDBC) and Doris Stream Load to exchange data.
+
+1. JDBC is used to manipulate the data table structure and execute the create table statement before data import
+2. Stream Load is a synchronous import method based on HTTP/HTTPS, For Doris destination, first pre-write csv file, and then write to doris with Stream Load transaction operation.
+
+## Introduction to Apache Doris
+
+Apache Doris is a high-performance, real-time analytical database based on MPP architecture, known for its extreme speed and ease of use. It only requires a sub-second response time to return query results under massive data and can support not only high-concurrent point query scenarios but also high-throughput complex analysis scenarios. Based on this, Apache Doris can better meet the scenarios of report analysis, ad-hoc query, unified data warehouse, Data Lake Query Acceleration, etc. Users can build user behavior analysis, AB test platform, log retrieval analysis, user portrait analysis, order analysis, and other applications on top of this.
+[https://doris.apache.org/docs/summary/basic-summary](https://doris.apache.org/docs/summary/basic-summary)
+
+
+## Technical Overview
+The overall architecture of Apache Doris is shown in the following figure. The Doris architecture is very simple, with only two types of processes.
+
+#### Frontend(FE):
+##### It is mainly responsible for user request access, query parsing and planning, management of metadata, and node management-related work.
+#### Backend(BE):
+##### It is mainly responsible for data storage and query plan execution.
+
+Both types of processes are horizontally scalable, and a single cluster can support up to hundreds of machines and tens of petabytes of storage capacity. And these two types of processes guarantee high availability of services and high reliability of data through consistency protocols. This highly integrated architecture design greatly reduces the operation and maintenance cost of a distributed system.
+
+Apache Doris adopts MySQL protocol, highly compatible with MySQL dialect, and supports standard SQL. Users can access Doris through various client tools and support seamless connection with BI tools.
+
+[Stream load](https://doris.apache.org/docs/data-operate/import/import-way/stream-load-manual/) is a synchronous way of importing. Users import local files or data streams into Doris by sending HTTP protocol requests. Stream load synchronously executes the import and returns the import result. Users can directly determine whether the import is successful by the return body of the request. Stream load is mainly suitable for importing local files or data from data streams through procedures.
+
+Each import job of Doris, whether it is batch import using Stream Load or single import using INSERT statement, is a complete transaction operation. The import transaction can ensure that the data in a batch takes effect atomically, and there will be no partial data writing.
\ No newline at end of file
diff --git a/airbyte-integrations/connectors/destination-doris/build.gradle b/airbyte-integrations/connectors/destination-doris/build.gradle
new file mode 100644
index 0000000..1fe67aa
--- /dev/null
+++ b/airbyte-integrations/connectors/destination-doris/build.gradle
@@ -0,0 +1,28 @@
+plugins {
+ id 'application'
+ id 'airbyte-java-connector'
+}
+
+airbyteJavaConnector {
+ cdkVersionRequired = '0.2.0'
+ features = ['db-destinations']
+ useLocalCdk = false
+}
+
+//remove once upgrading the CDK version to 0.4.x or later
+java {
+ compileJava {
+ options.compilerArgs.remove("-Werror")
+ }
+}
+
+airbyteJavaConnector.addCdkDependencies()
+
+application {
+ mainClass = 'io.airbyte.integrations.destination.doris.DorisDestination'
+}
+
+dependencies {
+ implementation 'org.apache.commons:commons-csv:1.4'
+ implementation group: 'mysql', name: 'mysql-connector-java', version: '8.0.16'
+}
diff --git a/airbyte-integrations/connectors/destination-doris/icon.svg b/airbyte-integrations/connectors/destination-doris/icon.svg
new file mode 100644
index 0000000..314ad5f
--- /dev/null
+++ b/airbyte-integrations/connectors/destination-doris/icon.svg
@@ -0,0 +1 @@
+
\ No newline at end of file
diff --git a/airbyte-integrations/connectors/destination-doris/metadata.yaml b/airbyte-integrations/connectors/destination-doris/metadata.yaml
new file mode 100644
index 0000000..6ba856f
--- /dev/null
+++ b/airbyte-integrations/connectors/destination-doris/metadata.yaml
@@ -0,0 +1,24 @@
+data:
+ connectorSubtype: database
+ connectorType: destination
+ definitionId: 05c161bf-ca73-4d48-b524-d392be417002
+ dockerImageTag: 0.1.0
+ dockerRepository: airbyte/destination-doris
+ githubIssueLabel: destination-doris
+ icon: apachedoris.svg
+ license: MIT
+ name: Apache Doris
+ registries:
+ cloud:
+ enabled: false
+ oss:
+ enabled: false
+ releaseStage: alpha
+ documentationUrl: https://docs.airbyte.com/integrations/destinations/doris
+ tags:
+ - language:java
+ ab_internal:
+ sl: 100
+ ql: 100
+ supportLevel: archived
+metadataSpecVersion: "1.0"
diff --git a/airbyte-integrations/connectors/destination-doris/src/main/java/io/airbyte/integrations/destination/doris/DorisConnectionOptions.java b/airbyte-integrations/connectors/destination-doris/src/main/java/io/airbyte/integrations/destination/doris/DorisConnectionOptions.java
new file mode 100644
index 0000000..7445013
--- /dev/null
+++ b/airbyte-integrations/connectors/destination-doris/src/main/java/io/airbyte/integrations/destination/doris/DorisConnectionOptions.java
@@ -0,0 +1,102 @@
+/*
+ * Copyright (c) 2023 Airbyte, Inc., all rights reserved.
+ */
+
+package io.airbyte.integrations.destination.doris;
+
+import com.fasterxml.jackson.databind.JsonNode;
+
+public class DorisConnectionOptions {
+
+ private String db;
+ private static String DB_KEY = "database";
+ private String table;
+ private static final String TABLE_KEY = "table";
+
+ private String user;
+ private static final String USER_KEY = "username";
+
+ private String pwd;
+ private static final String PWD_KEY = "password";
+
+ private String feHost;
+ private static final String FE_HOST_KEY = "host";
+
+ private Integer feHttpPort;
+ private static final String FE_HTTP_PORT_KEY = "httpport";
+
+ private Integer feQueryPort;
+ private static final String FE_QUERY_PORT_KEY = "queryport";
+
+ public static DorisConnectionOptions getDorisConnection(final JsonNode config, String table) {
+ return new DorisConnectionOptions(
+ config.get(DB_KEY).asText(),
+ table,
+ config.get(USER_KEY).asText(),
+ config.get(PWD_KEY) == null ? "" : config.get(PWD_KEY).asText(),
+ config.get(FE_HOST_KEY).asText(),
+ config.get(FE_HTTP_PORT_KEY).asInt(8030),
+ config.get(FE_QUERY_PORT_KEY).asInt(9030));
+
+ }
+
+ public DorisConnectionOptions(String db, String table, String user, String pwd, String feHost, Integer feHttpPort, Integer feQueryPort) {
+ this.db = db;
+ this.table = table;
+ this.user = user;
+ this.pwd = pwd;
+ this.feHost = feHost;
+ this.feHttpPort = feHttpPort;
+ this.feQueryPort = feQueryPort;
+ }
+
+ public String getDb() {
+ return db;
+ }
+
+ public String getTable() {
+ return table;
+ }
+
+ public String getUser() {
+ return user;
+ }
+
+ public String getPwd() {
+ return pwd;
+ }
+
+ public String getFeHost() {
+ return feHost;
+ }
+
+ public Integer getFeHttpPort() {
+ return feHttpPort;
+ }
+
+ public String getHttpHostPort() {
+ return feHost + ":" + feHttpPort;
+ }
+
+ public String getQueryHostPort() {
+ return feHost + ":" + feHttpPort;
+ }
+
+ public Integer getFeQueryPort() {
+ return feQueryPort;
+ }
+
+ @Override
+ public String toString() {
+ return "DorisConnectionOptions{" +
+ "db='" + db + '\'' +
+ ", table='" + table + '\'' +
+ ", user='" + user + '\'' +
+ ", pwd='" + pwd + '\'' +
+ ", feHost='" + feHost + '\'' +
+ ", feHttpPort=" + feHttpPort +
+ ", feQueryPort=" + feQueryPort +
+ '}';
+ }
+
+}
diff --git a/airbyte-integrations/connectors/destination-doris/src/main/java/io/airbyte/integrations/destination/doris/DorisConsumer.java b/airbyte-integrations/connectors/destination-doris/src/main/java/io/airbyte/integrations/destination/doris/DorisConsumer.java
new file mode 100644
index 0000000..db64c82
--- /dev/null
+++ b/airbyte-integrations/connectors/destination-doris/src/main/java/io/airbyte/integrations/destination/doris/DorisConsumer.java
@@ -0,0 +1,124 @@
+/*
+ * Copyright (c) 2023 Airbyte, Inc., all rights reserved.
+ */
+
+package io.airbyte.integrations.destination.doris;
+
+import com.fasterxml.jackson.core.io.JsonStringEncoder;
+import io.airbyte.cdk.integrations.base.CommitOnStateAirbyteMessageConsumer;
+import io.airbyte.commons.json.Jsons;
+import io.airbyte.protocol.models.v0.AirbyteMessage;
+import io.airbyte.protocol.models.v0.AirbyteRecordMessage;
+import io.airbyte.protocol.models.v0.ConfiguredAirbyteCatalog;
+import java.io.IOException;
+import java.nio.file.Files;
+import java.util.Map;
+import java.util.UUID;
+import java.util.function.Consumer;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+public class DorisConsumer extends CommitOnStateAirbyteMessageConsumer {
+
+ private static final Logger LOGGER = LoggerFactory.getLogger(DorisConsumer.class);
+
+ private final ConfiguredAirbyteCatalog catalog;
+ private final Map writeConfigs;
+
+ private JsonStringEncoder jsonEncoder;
+
+ public DorisConsumer(
+ final Map writeConfigs,
+ final ConfiguredAirbyteCatalog catalog,
+ final Consumer outputRecordCollector) {
+ super(outputRecordCollector);
+ jsonEncoder = JsonStringEncoder.getInstance();
+ this.catalog = catalog;
+ this.writeConfigs = writeConfigs;
+ LOGGER.info("initializing DorisConsumer.");
+ }
+
+ @Override
+ public void commit() throws Exception {
+ for (final DorisWriteConfig writeConfig : writeConfigs.values()) {
+ writeConfig.getWriter().flush();
+ }
+ }
+
+ @Override
+ protected void startTracked() throws Exception {}
+
+ @Override
+ protected void acceptTracked(AirbyteMessage msg) throws Exception {
+ if (msg.getType() != AirbyteMessage.Type.RECORD) {
+ return;
+ }
+ final AirbyteRecordMessage recordMessage = msg.getRecord();
+ if (!writeConfigs.containsKey(recordMessage.getStream())) {
+ throw new IllegalArgumentException(
+ String.format("Message contained record from a stream that was not in the catalog. \ncatalog: %s , \nmessage: %s",
+ Jsons.serialize(catalog), Jsons.serialize(recordMessage)));
+ }
+
+ writeConfigs.get(recordMessage.getStream()).getWriter().printRecord(
+ UUID.randomUUID(),
+ // new SimpleDateFormat("yyyy-MM-dd HH:mm:ss.SSS").format(new Date(recordMessage.getEmittedAt())),
+ recordMessage.getEmittedAt(),
+ new String(jsonEncoder.quoteAsString(Jsons.serialize(recordMessage.getData()))));
+
+ }
+
+ @Override
+ protected void close(boolean hasFailed) throws Exception {
+ LOGGER.info("finalizing DorisConsumer");
+ for (final Map.Entry entries : writeConfigs.entrySet()) {
+ try {
+ entries.getValue().getWriter().flush();
+ entries.getValue().getWriter().close();
+ } catch (final Exception e) {
+ hasFailed = true;
+ LOGGER.error("failed to close writer for: {}", entries.getKey());
+ }
+ }
+
+ try {
+ for (final DorisWriteConfig value : writeConfigs.values()) {
+ value.getDorisStreamLoad().firstCommit();
+ }
+ } catch (final Exception e) {
+ hasFailed = true;
+ final String message = "Failed to pre-commit doris in destination: ";
+ LOGGER.error(message + e.getMessage());
+ for (final DorisWriteConfig value : writeConfigs.values()) {
+ if (value.getDorisStreamLoad().getTxnID() > 0)
+ value.getDorisStreamLoad().abortTransaction();
+ }
+ }
+
+ //
+ try {
+ if (!hasFailed) {
+ for (final DorisWriteConfig writeConfig : writeConfigs.values()) {
+ if (writeConfig.getDorisStreamLoad().getTxnID() > 0)
+ writeConfig.getDorisStreamLoad().commitTransaction();
+ LOGGER.info(String.format("stream load commit (TxnID: %s ) successed ", writeConfig.getDorisStreamLoad().getTxnID()));
+ }
+ } else {
+ final String message = "Failed to commit doris in destination";
+ LOGGER.error(message);
+ for (final DorisWriteConfig writeConfig : writeConfigs.values()) {
+ if (writeConfig.getDorisStreamLoad().getTxnID() > 0)
+ writeConfig.getDorisStreamLoad().abortTransaction();
+ }
+ throw new IOException(message);
+ }
+ } finally {
+ for (final DorisWriteConfig writeConfig : writeConfigs.values()) {
+ Files.deleteIfExists(writeConfig.getDorisStreamLoad().getPath());
+ writeConfig.getDorisStreamLoad().close();
+ }
+ }
+
+ }
+
+}
diff --git a/airbyte-integrations/connectors/destination-doris/src/main/java/io/airbyte/integrations/destination/doris/DorisDestination.java b/airbyte-integrations/connectors/destination-doris/src/main/java/io/airbyte/integrations/destination/doris/DorisDestination.java
new file mode 100644
index 0000000..12fd21b
--- /dev/null
+++ b/airbyte-integrations/connectors/destination-doris/src/main/java/io/airbyte/integrations/destination/doris/DorisDestination.java
@@ -0,0 +1,164 @@
+/*
+ * Copyright (c) 2023 Airbyte, Inc., all rights reserved.
+ */
+
+package io.airbyte.integrations.destination.doris;
+
+import static io.airbyte.integrations.destination.doris.DorisStreamLoad.CSV_COLUMN_SEPARATOR;
+
+import com.fasterxml.jackson.databind.JsonNode;
+import com.google.common.base.Preconditions;
+import io.airbyte.cdk.integrations.BaseConnector;
+import io.airbyte.cdk.integrations.base.AirbyteMessageConsumer;
+import io.airbyte.cdk.integrations.base.Destination;
+import io.airbyte.cdk.integrations.base.IntegrationRunner;
+import io.airbyte.cdk.integrations.base.JavaBaseConstants;
+import io.airbyte.cdk.integrations.destination.StandardNameTransformer;
+import io.airbyte.protocol.models.v0.AirbyteConnectionStatus;
+import io.airbyte.protocol.models.v0.AirbyteConnectionStatus.Status;
+import io.airbyte.protocol.models.v0.AirbyteMessage;
+import io.airbyte.protocol.models.v0.ConfiguredAirbyteCatalog;
+import io.airbyte.protocol.models.v0.ConfiguredAirbyteStream;
+import io.airbyte.protocol.models.v0.DestinationSyncMode;
+import java.io.FileWriter;
+import java.io.IOException;
+import java.nio.charset.Charset;
+import java.nio.file.Path;
+import java.nio.file.Paths;
+import java.sql.*;
+import java.util.*;
+import java.util.function.Consumer;
+import org.apache.commons.csv.CSVFormat;
+import org.apache.commons.csv.CSVPrinter;
+import org.apache.commons.io.FileUtils;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+public class DorisDestination extends BaseConnector implements Destination {
+
+ private static final Logger LOGGER = LoggerFactory.getLogger(DorisDestination.class);
+ private static final StandardNameTransformer namingResolver = new StandardNameTransformer();
+ private static Connection conn = null;
+ private static HttpUtil http = new HttpUtil();
+ static final String DESTINATION_TEMP_PATH_FIELD = "destination_temp_path";
+ private static final String JDBC_DRIVER = "com.mysql.cj.jdbc.Driver";
+ private static final String DB_URL_PATTERN = "jdbc:mysql://%s:%d/%s?rewriteBatchedStatements=true&useUnicode=true&characterEncoding=utf8";
+
+ public static void main(String[] args) throws Exception {
+ new IntegrationRunner(new DorisDestination()).run(args);
+ }
+
+ @Override
+ public AirbyteConnectionStatus check(JsonNode config) {
+ try {
+ Preconditions.checkNotNull(config);
+ FileUtils.forceMkdir(getTempPathDir(config).toFile());
+ checkDorisAndConnect(config);
+ } catch (final Exception e) {
+ return new AirbyteConnectionStatus().withStatus(Status.FAILED).withMessage(e.getMessage());
+ }
+ return new AirbyteConnectionStatus().withStatus(Status.SUCCEEDED);
+ }
+
+ @Override
+ public AirbyteMessageConsumer getConsumer(JsonNode config,
+ ConfiguredAirbyteCatalog configuredCatalog,
+ Consumer outputRecordCollector)
+ throws IOException, SQLException {
+ final Map writeConfigs = new HashMap<>();
+
+ try {
+ final Path destinationDir = getTempPathDir(config);
+ FileUtils.forceMkdir(destinationDir.toFile());
+ for (ConfiguredAirbyteStream stream : configuredCatalog.getStreams()) {
+
+ final DestinationSyncMode syncMode = stream.getDestinationSyncMode();
+ if (syncMode == null) {
+ throw new IllegalStateException("Undefined destination sync mode");
+ }
+
+ final String streamName = stream.getStream().getName();
+ final String tableName = namingResolver.getIdentifier(streamName);
+ final String tmpTableName = namingResolver.getTmpTableName(streamName);
+ final Path tmpPath = destinationDir.resolve(tmpTableName + ".csv");
+ if (conn == null)
+ checkDorisAndConnect(config);
+ Statement stmt = conn.createStatement();
+ stmt.execute(createTableQuery(tableName));
+ if (syncMode == DestinationSyncMode.OVERWRITE) {
+ stmt.execute(truncateTable(tableName));
+ }
+ CSVFormat csvFormat = CSVFormat.DEFAULT
+ .withSkipHeaderRecord()
+ .withDelimiter(CSV_COLUMN_SEPARATOR)
+ .withQuote(null)
+ .withHeader(
+ JavaBaseConstants.COLUMN_NAME_AB_ID,
+ JavaBaseConstants.COLUMN_NAME_EMITTED_AT,
+ JavaBaseConstants.COLUMN_NAME_DATA);
+ final FileWriter fileWriter = new FileWriter(tmpPath.toFile(), Charset.defaultCharset(), false);
+ final CSVPrinter printer = new CSVPrinter(fileWriter, csvFormat);
+ DorisStreamLoad dorisStreamLoad = new DorisStreamLoad(
+ tmpPath,
+ DorisConnectionOptions.getDorisConnection(config, tableName),
+ new DorisLabelInfo("airbyte_doris", tableName, true),
+ http.getClient(),
+ JavaBaseConstants.COLUMN_NAME_AB_ID,
+ JavaBaseConstants.COLUMN_NAME_EMITTED_AT,
+ JavaBaseConstants.COLUMN_NAME_DATA);
+ writeConfigs.put(streamName, new DorisWriteConfig(dorisStreamLoad, printer, csvFormat));
+ }
+ } catch (SQLException | ClassNotFoundException e) {
+ LOGGER.error("Exception while creating Doris destination table: ", e);
+ throw new SQLException(e);
+ } catch (IOException e) {
+ LOGGER.error("Exception while handling temporary csv files : ", e);
+ throw new IOException(e);
+ } finally {
+ if (conn != null)
+ conn.close();
+ }
+ return new DorisConsumer(writeConfigs, configuredCatalog, outputRecordCollector);
+ }
+
+ protected void checkDorisAndConnect(JsonNode config) throws ClassNotFoundException, SQLException {
+ DorisConnectionOptions dorisConnection = DorisConnectionOptions.getDorisConnection(config, "");
+ String dbUrl = String.format(DB_URL_PATTERN, dorisConnection.getFeHost(), dorisConnection.getFeQueryPort(), dorisConnection.getDb());
+ Class.forName(JDBC_DRIVER);
+ conn = DriverManager.getConnection(dbUrl, dorisConnection.getUser(), dorisConnection.getPwd());
+ }
+
+ protected String createTableQuery(String tableName) {
+ String s = "CREATE TABLE IF NOT EXISTS `" + tableName + "` ( \n"
+ + "`" + JavaBaseConstants.COLUMN_NAME_AB_ID + "` varchar(40),\n"
+ + "`" + JavaBaseConstants.COLUMN_NAME_EMITTED_AT + "` BIGINT,\n"
+ + "`" + JavaBaseConstants.COLUMN_NAME_DATA + "` String)\n"
+ + "DUPLICATE KEY(`" + JavaBaseConstants.COLUMN_NAME_AB_ID + "`,`" + JavaBaseConstants.COLUMN_NAME_EMITTED_AT + "`) \n"
+ + "DISTRIBUTED BY HASH(`" + JavaBaseConstants.COLUMN_NAME_AB_ID + "`) BUCKETS 16 \n"
+ + "PROPERTIES ( \n"
+ + "\"replication_allocation\" = \"tag.location.default: 1\" \n"
+ + ");";
+ LOGGER.info("create doris table SQL : \n " + s);
+ return s;
+ }
+
+ protected String truncateTable(String tableName) {
+ String s = "TRUNCATE TABLE `" + tableName + "`;";
+ LOGGER.info("truncate doris table SQL : \n " + s);
+ return s;
+ }
+
+ protected Path getTempPathDir(final JsonNode config) {
+ Path path = Paths.get(DESTINATION_TEMP_PATH_FIELD);
+ Preconditions.checkNotNull(path);
+ if (!path.startsWith("/code/local")) {
+ path = Path.of("/local", path.toString());
+ }
+ final Path normalizePath = path.normalize();
+ if (!normalizePath.startsWith("/local")) {
+ throw new IllegalArgumentException("Stream Load destination temp file should be inside the /local directory");
+ }
+ return path;
+ }
+
+}
diff --git a/airbyte-integrations/connectors/destination-doris/src/main/java/io/airbyte/integrations/destination/doris/DorisLabelInfo.java b/airbyte-integrations/connectors/destination-doris/src/main/java/io/airbyte/integrations/destination/doris/DorisLabelInfo.java
new file mode 100644
index 0000000..19182ee
--- /dev/null
+++ b/airbyte-integrations/connectors/destination-doris/src/main/java/io/airbyte/integrations/destination/doris/DorisLabelInfo.java
@@ -0,0 +1,31 @@
+/*
+ * Copyright (c) 2023 Airbyte, Inc., all rights reserved.
+ */
+
+package io.airbyte.integrations.destination.doris;
+
+import java.util.UUID;
+
+public class DorisLabelInfo {
+
+ private String prefix;
+
+ private String table;
+
+ private boolean enable2PC;
+
+ public DorisLabelInfo(String labelPrefix, String table, boolean enable2PC) {
+ this.prefix = labelPrefix;
+ this.table = table;
+ this.enable2PC = enable2PC;
+ }
+
+ public String label() {
+ return prefix + "_" + table + "_" + UUID.randomUUID() + System.currentTimeMillis();
+ }
+
+ public String label(long chkId) {
+ return prefix + "_" + chkId;
+ }
+
+}
diff --git a/airbyte-integrations/connectors/destination-doris/src/main/java/io/airbyte/integrations/destination/doris/DorisStreamLoad.java b/airbyte-integrations/connectors/destination-doris/src/main/java/io/airbyte/integrations/destination/doris/DorisStreamLoad.java
new file mode 100644
index 0000000..92051a9
--- /dev/null
+++ b/airbyte-integrations/connectors/destination-doris/src/main/java/io/airbyte/integrations/destination/doris/DorisStreamLoad.java
@@ -0,0 +1,235 @@
+/*
+ * Copyright (c) 2023 Airbyte, Inc., all rights reserved.
+ */
+
+package io.airbyte.integrations.destination.doris;
+
+import com.fasterxml.jackson.core.type.TypeReference;
+import com.fasterxml.jackson.databind.ObjectMapper;
+import com.google.common.base.Preconditions;
+import io.airbyte.integrations.destination.doris.exception.DorisRuntimeException;
+import io.airbyte.integrations.destination.doris.exception.StreamLoadException;
+import java.io.FileInputStream;
+import java.io.IOException;
+import java.nio.file.Path;
+import java.util.*;
+import java.util.concurrent.Future;
+import java.util.regex.Matcher;
+import java.util.regex.Pattern;
+import org.apache.http.client.methods.CloseableHttpResponse;
+import org.apache.http.client.methods.HttpPut;
+import org.apache.http.entity.InputStreamEntity;
+import org.apache.http.impl.client.CloseableHttpClient;
+import org.apache.http.util.EntityUtils;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+public class DorisStreamLoad {
+
+ private static final Logger LOGGER = LoggerFactory.getLogger(DorisStreamLoad.class);
+ private static final ObjectMapper OBJECT_MAPPER = new ObjectMapper();
+ public static final Pattern LABEL_EXIST_PATTERN =
+ Pattern.compile("errCode = 2, detailMessage = Label \\[(.*)\\] " +
+ "has already been used, relate to txn \\[(\\d+)\\]");
+ public static final Pattern COMMITTED_PATTERN =
+ Pattern.compile("errCode = 2, detailMessage = transaction \\[(\\d+)\\] " +
+ "is already \\b(COMMITTED|committed|VISIBLE|visible)\\b, not pre-committed.");
+ private final DorisLabelInfo dorisLabelInfo;
+ private static final String LOAD_FIRST_URL_PATTERN = "http://%s/api/%s/%s/_stream_load";
+ private static final String LOAD_SECOND_URL_PATTERN = "http://%s/api/%s/_stream_load_2pc";
+ private static final String LINE_DELIMITER_DEFAULT = "\n";
+ public static final Character CSV_COLUMN_SEPARATOR = '\t';
+
+ private final String hostPort;
+ private final String loadUrlStr;
+ private final String secondUrlStr;
+ private final String user;
+ private final String passwd;
+ private final boolean enable2PC;
+ private final Properties streamLoadProp;
+ private final Integer maxRetry;
+ private Long txnID = 0L;
+ private final Path path;
+ private Future pendingLoadFuture;
+ private final CloseableHttpClient httpClient;
+ public static final String SUCCESS = "Success";
+ public static final String PUBLISH_TIMEOUT = "Publish Timeout";
+ private static final List DORIS_SUCCESS_STATUS = new ArrayList<>(Arrays.asList(SUCCESS, PUBLISH_TIMEOUT));
+ public static final String FAIL = "Fail";
+
+ public DorisStreamLoad(
+ Path path,
+ DorisConnectionOptions dorisOptions,
+ DorisLabelInfo dorisLabelInfo,
+ CloseableHttpClient httpClient,
+ String... head) {
+ this.hostPort = dorisOptions.getHttpHostPort();
+ String db = dorisOptions.getDb();
+ this.user = dorisOptions.getUser();
+ this.passwd = dorisOptions.getPwd();
+ this.dorisLabelInfo = dorisLabelInfo;
+ this.loadUrlStr = String.format(LOAD_FIRST_URL_PATTERN, hostPort, db, dorisOptions.getTable());
+ this.secondUrlStr = String.format(LOAD_SECOND_URL_PATTERN, hostPort, db);
+ this.enable2PC = true;
+
+ StringBuilder stringBuilder = new StringBuilder();
+ for (String s : head) {
+ if (!stringBuilder.isEmpty())
+ stringBuilder.append(",");
+ stringBuilder.append(s);
+ }
+ this.streamLoadProp = new Properties();
+ streamLoadProp.setProperty("column_separator", CSV_COLUMN_SEPARATOR.toString());
+ streamLoadProp.setProperty("columns", stringBuilder.toString());
+ this.maxRetry = 3;
+ this.path = path;
+ this.httpClient = httpClient;
+ }
+
+ public Long getTxnID() {
+ return txnID;
+ }
+
+ public void firstCommit() throws Exception {
+ Path pathChecked = Preconditions.checkNotNull(path, "stream load temp CSV file is empty.");
+ String label = dorisLabelInfo.label();
+ LOGGER.info("preCommit label {}. .", label);
+ StreamLoadRespContent respContent = null;
+ try {
+
+ InputStreamEntity entity = new InputStreamEntity(new FileInputStream(pathChecked.toFile()));
+ StreamLoadHttpPutBuilder builder = StreamLoadHttpPutBuilder.builder();
+ builder.setUrl(loadUrlStr)
+ .baseAuth(user, passwd)
+ .addCommonHeader()
+ .enable2PC(enable2PC)
+ .setLabel(label)
+ .setEntity(entity)
+ .addProperties(streamLoadProp);
+ HttpPut build = builder.build();
+ respContent = handlePreCommitResponse(httpClient.execute(build));
+ Preconditions.checkState("true".equals(respContent.getTwoPhaseCommit()));
+ if (!DORIS_SUCCESS_STATUS.contains(respContent.getStatus())) {
+ String errMsg = String.format("stream load error: %s, see more in %s", respContent.getMessage(), respContent.getErrorURL());
+ throw new DorisRuntimeException(errMsg);
+ } else {
+ String commitType = enable2PC ? "preCommit" : "commit";
+ LOGGER.info("{} for label {} finished: {}", commitType, label, respContent.toString());
+ }
+ } catch (Exception e) {
+ LOGGER.warn("failed to stream load data", e);
+ throw e;
+ }
+ this.txnID = respContent.getTxnId();
+ }
+
+ // commit
+ public void commitTransaction() throws IOException {
+ int statusCode = -1;
+ String reasonPhrase = null;
+ int retry = 0;
+ CloseableHttpResponse response = null;
+ StreamLoadHttpPutBuilder putBuilder = StreamLoadHttpPutBuilder.builder();
+ putBuilder.setUrl(secondUrlStr)
+ .baseAuth(user, passwd)
+ .addCommonHeader()
+ .addTxnId(txnID)
+ .setEmptyEntity()
+ .commit();
+ while (retry++ < maxRetry) {
+
+ try {
+ response = httpClient.execute(putBuilder.build());
+ } catch (IOException e) {
+ LOGGER.warn("try commit failed with {} times", retry + 1);
+ continue;
+ }
+ statusCode = response.getStatusLine().getStatusCode();
+ reasonPhrase = response.getStatusLine().getReasonPhrase();
+ if (statusCode != 200) {
+ LOGGER.warn("commit transaction failed with {}, reason {}", hostPort, reasonPhrase);
+ } else {
+ LOGGER.info("commit transaction successes , response: {}", response.getStatusLine().toString());
+ break;
+ }
+ }
+
+ if (statusCode != 200) {
+ throw new DorisRuntimeException("stream load error: " + reasonPhrase);
+ }
+
+ ObjectMapper mapper = new ObjectMapper();
+ if (response.getEntity() != null) {
+ String loadResult = EntityUtils.toString(response.getEntity());
+ Map res = mapper.readValue(loadResult, new TypeReference>() {});
+ Matcher matcher = COMMITTED_PATTERN.matcher(res.get("msg"));
+ if (res.get("status").equals(FAIL) && !matcher.matches()) {
+ throw new DorisRuntimeException("Commit failed " + loadResult);
+ } else {
+ LOGGER.info("load result {}", loadResult);
+ }
+ }
+ }
+
+ // abort
+ public void abortTransaction() throws Exception {
+ StreamLoadHttpPutBuilder builder = StreamLoadHttpPutBuilder.builder();
+ builder.setUrl(secondUrlStr)
+ .baseAuth(user, passwd)
+ .addCommonHeader()
+ .addTxnId(txnID)
+ .setEmptyEntity()
+ .abort();
+ CloseableHttpResponse response = httpClient.execute(builder.build());
+
+ int statusCode = response.getStatusLine().getStatusCode();
+ if (statusCode != 200 || response.getEntity() == null) {
+ LOGGER.warn("abort transaction response: " + response.getStatusLine().toString());
+ throw new DorisRuntimeException("Failed abort transaction:" + txnID + ", with url " + secondUrlStr);
+ } else {
+ LOGGER.info("abort transaction response: " + response.getStatusLine().toString());
+ }
+
+ ObjectMapper mapper = new ObjectMapper();
+ String loadResult = EntityUtils.toString(response.getEntity());
+ Map res = mapper.readValue(loadResult, new TypeReference>() {});
+ if (FAIL.equals(res.get("status"))) {
+ LOGGER.warn("Fail to abort transaction. error: {}", res.get("msg"));
+ }
+ }
+
+ private StreamLoadRespContent stopLoad() throws IOException {
+ LOGGER.info("stream load stopped.");
+ Preconditions.checkState(pendingLoadFuture != null);
+ try {
+ return handlePreCommitResponse(pendingLoadFuture.get());
+ } catch (Exception e) {
+ throw new DorisRuntimeException(e);
+ }
+ }
+
+ public StreamLoadRespContent handlePreCommitResponse(CloseableHttpResponse response) throws Exception {
+ final int statusCode = response.getStatusLine().getStatusCode();
+ if (statusCode == 200 && response.getEntity() != null) {
+ String loadResult = EntityUtils.toString(response.getEntity());
+ LOGGER.info("load Result {}", loadResult);
+ return OBJECT_MAPPER.readValue(loadResult, StreamLoadRespContent.class);
+ }
+ throw new StreamLoadException("stream load response error: " + response.getStatusLine().toString());
+ }
+
+ public Path getPath() {
+ return path;
+ }
+
+ public void close() throws IOException {
+ if (null != httpClient) {
+ try {
+ httpClient.close();
+ } catch (IOException e) {
+ throw new IOException("Closing httpClient failed.", e);
+ }
+ }
+ }
+
+}
diff --git a/airbyte-integrations/connectors/destination-doris/src/main/java/io/airbyte/integrations/destination/doris/DorisWriteConfig.java b/airbyte-integrations/connectors/destination-doris/src/main/java/io/airbyte/integrations/destination/doris/DorisWriteConfig.java
new file mode 100644
index 0000000..2d0afa1
--- /dev/null
+++ b/airbyte-integrations/connectors/destination-doris/src/main/java/io/airbyte/integrations/destination/doris/DorisWriteConfig.java
@@ -0,0 +1,34 @@
+/*
+ * Copyright (c) 2023 Airbyte, Inc., all rights reserved.
+ */
+
+package io.airbyte.integrations.destination.doris;
+
+import org.apache.commons.csv.CSVFormat;
+import org.apache.commons.csv.CSVPrinter;
+
+public class DorisWriteConfig {
+
+ private final DorisStreamLoad dorisStreamLoad;
+ private final CSVPrinter writer;
+ private final CSVFormat format;
+
+ public DorisWriteConfig(DorisStreamLoad dorisStreamLoad, CSVPrinter writer, CSVFormat format) {
+ this.dorisStreamLoad = dorisStreamLoad;
+ this.writer = writer;
+ this.format = format;
+ }
+
+ public DorisStreamLoad getDorisStreamLoad() {
+ return dorisStreamLoad;
+ }
+
+ public CSVFormat getFormat() {
+ return format;
+ }
+
+ public CSVPrinter getWriter() {
+ return writer;
+ }
+
+}
diff --git a/airbyte-integrations/connectors/destination-doris/src/main/java/io/airbyte/integrations/destination/doris/HttpUtil.java b/airbyte-integrations/connectors/destination-doris/src/main/java/io/airbyte/integrations/destination/doris/HttpUtil.java
new file mode 100644
index 0000000..2bf0b61
--- /dev/null
+++ b/airbyte-integrations/connectors/destination-doris/src/main/java/io/airbyte/integrations/destination/doris/HttpUtil.java
@@ -0,0 +1,30 @@
+/*
+ * Copyright (c) 2023 Airbyte, Inc., all rights reserved.
+ */
+
+package io.airbyte.integrations.destination.doris;
+
+import org.apache.http.impl.client.CloseableHttpClient;
+import org.apache.http.impl.client.DefaultRedirectStrategy;
+import org.apache.http.impl.client.HttpClientBuilder;
+import org.apache.http.impl.client.HttpClients;
+
+public class HttpUtil {
+
+ private final HttpClientBuilder httpClientBuilder =
+ HttpClients
+ .custom()
+ .setRedirectStrategy(new DefaultRedirectStrategy() {
+
+ @Override
+ protected boolean isRedirectable(String method) {
+ return true;
+ }
+
+ });
+
+ public CloseableHttpClient getClient() {
+ return httpClientBuilder.build();
+ }
+
+}
diff --git a/airbyte-integrations/connectors/destination-doris/src/main/java/io/airbyte/integrations/destination/doris/StreamLoadHttpPutBuilder.java b/airbyte-integrations/connectors/destination-doris/src/main/java/io/airbyte/integrations/destination/doris/StreamLoadHttpPutBuilder.java
new file mode 100644
index 0000000..103924e
--- /dev/null
+++ b/airbyte-integrations/connectors/destination-doris/src/main/java/io/airbyte/integrations/destination/doris/StreamLoadHttpPutBuilder.java
@@ -0,0 +1,104 @@
+/*
+ * Copyright (c) 2023 Airbyte, Inc., all rights reserved.
+ */
+
+package io.airbyte.integrations.destination.doris;
+
+import com.google.common.base.Preconditions;
+import java.nio.charset.StandardCharsets;
+import java.util.HashMap;
+import java.util.Map;
+import java.util.Properties;
+import org.apache.commons.codec.binary.Base64;
+import org.apache.http.HttpEntity;
+import org.apache.http.HttpHeaders;
+import org.apache.http.client.methods.HttpPut;
+import org.apache.http.entity.StringEntity;
+
+public class StreamLoadHttpPutBuilder {
+
+ String url;
+
+ Map prop;
+
+ HttpEntity httpEntity;
+
+ private StreamLoadHttpPutBuilder() {
+ this.prop = new HashMap<>();
+ }
+
+ public static StreamLoadHttpPutBuilder builder() {
+ return new StreamLoadHttpPutBuilder();
+ }
+
+ public StreamLoadHttpPutBuilder setUrl(String url) {
+ this.url = url;
+ return this;
+ }
+
+ // 用户最好设置Expect Header字段内容100-continue,这样可以在某些出错场景下避免不必要的数据传输
+ public StreamLoadHttpPutBuilder addCommonHeader() {
+ prop.put(HttpHeaders.EXPECT, "100-continue");
+ return this;
+ }
+
+ public StreamLoadHttpPutBuilder enable2PC(Boolean bool) {
+ prop.put("two_phase_commit", bool.toString());
+ return this;
+ }
+
+ public StreamLoadHttpPutBuilder baseAuth(String user, String password) {
+ byte[] encoded = Base64.encodeBase64(user.concat(":").concat(password).getBytes(StandardCharsets.UTF_8));
+ prop.put(HttpHeaders.AUTHORIZATION, "Basic " + new String(encoded, StandardCharsets.UTF_8));
+ return this;
+ }
+
+ public StreamLoadHttpPutBuilder addTxnId(long txnID) {
+ prop.put("txn_id", String.valueOf(txnID));
+ return this;
+ }
+
+ public StreamLoadHttpPutBuilder commit() {
+ prop.put("txn_operation", "commit");
+ return this;
+ }
+
+ public StreamLoadHttpPutBuilder abort() {
+ prop.put("txn_operation", "abort");
+ return this;
+ }
+
+ public StreamLoadHttpPutBuilder setEntity(HttpEntity httpEntity) {
+ this.httpEntity = httpEntity;
+ return this;
+ }
+
+ public StreamLoadHttpPutBuilder setEmptyEntity() {
+ try {
+ this.httpEntity = new StringEntity("");
+ } catch (Exception e) {
+ throw new IllegalArgumentException(e);
+ }
+ return this;
+ }
+
+ public StreamLoadHttpPutBuilder addProperties(Properties properties) {
+ properties.forEach((key, value) -> prop.put(String.valueOf(key), String.valueOf(value)));
+ return this;
+ }
+
+ public StreamLoadHttpPutBuilder setLabel(String label) {
+ prop.put("label", label);
+ return this;
+ }
+
+ public HttpPut build() {
+ Preconditions.checkNotNull(url);
+ Preconditions.checkNotNull(httpEntity);
+ HttpPut put = new HttpPut(url);
+ prop.forEach(put::setHeader);
+ put.setEntity(httpEntity);
+ return put;
+ }
+
+}
diff --git a/airbyte-integrations/connectors/destination-doris/src/main/java/io/airbyte/integrations/destination/doris/StreamLoadRespContent.java b/airbyte-integrations/connectors/destination-doris/src/main/java/io/airbyte/integrations/destination/doris/StreamLoadRespContent.java
new file mode 100644
index 0000000..16eaed4
--- /dev/null
+++ b/airbyte-integrations/connectors/destination-doris/src/main/java/io/airbyte/integrations/destination/doris/StreamLoadRespContent.java
@@ -0,0 +1,103 @@
+/*
+ * Copyright (c) 2023 Airbyte, Inc., all rights reserved.
+ */
+
+package io.airbyte.integrations.destination.doris;
+
+import com.fasterxml.jackson.annotation.JsonIgnoreProperties;
+import com.fasterxml.jackson.annotation.JsonProperty;
+import com.fasterxml.jackson.core.JsonProcessingException;
+import com.fasterxml.jackson.databind.ObjectMapper;
+
+@JsonIgnoreProperties(ignoreUnknown = true)
+public class StreamLoadRespContent {
+
+ @JsonProperty(value = "TxnId")
+ private long TxnId;
+
+ @JsonProperty(value = "Label")
+ private String Label;
+
+ @JsonProperty(value = "Status")
+ private String Status;
+
+ @JsonProperty(value = "TwoPhaseCommit")
+ private String TwoPhaseCommit;
+
+ @JsonProperty(value = "ExistingJobStatus")
+ private String ExistingJobStatus;
+
+ @JsonProperty(value = "Message")
+ private String Message;
+
+ @JsonProperty(value = "NumberTotalRows")
+ private long NumberTotalRows;
+
+ @JsonProperty(value = "NumberLoadedRows")
+ private long NumberLoadedRows;
+
+ @JsonProperty(value = "NumberFilteredRows")
+ private int NumberFilteredRows;
+
+ @JsonProperty(value = "NumberUnselectedRows")
+ private int NumberUnselectedRows;
+
+ @JsonProperty(value = "LoadBytes")
+ private long LoadBytes;
+
+ @JsonProperty(value = "LoadTimeMs")
+ private int LoadTimeMs;
+
+ @JsonProperty(value = "BeginTxnTimeMs")
+ private int BeginTxnTimeMs;
+
+ @JsonProperty(value = "StreamLoadPutTimeMs")
+ private int StreamLoadPutTimeMs;
+
+ @JsonProperty(value = "ReadDataTimeMs")
+ private int ReadDataTimeMs;
+
+ @JsonProperty(value = "WriteDataTimeMs")
+ private int WriteDataTimeMs;
+
+ @JsonProperty(value = "CommitAndPublishTimeMs")
+ private int CommitAndPublishTimeMs;
+
+ @JsonProperty(value = "ErrorURL")
+ private String ErrorURL;
+
+ public long getTxnId() {
+ return TxnId;
+ }
+
+ public String getStatus() {
+ return Status;
+ }
+
+ public String getTwoPhaseCommit() {
+ return TwoPhaseCommit;
+ }
+
+ public String getMessage() {
+ return Message;
+ }
+
+ public String getExistingJobStatus() {
+ return ExistingJobStatus;
+ }
+
+ @Override
+ public String toString() {
+ ObjectMapper mapper = new ObjectMapper();
+ try {
+ return mapper.writeValueAsString(this);
+ } catch (JsonProcessingException e) {
+ return "";
+ }
+ }
+
+ public String getErrorURL() {
+ return ErrorURL;
+ }
+
+}
diff --git a/airbyte-integrations/connectors/destination-doris/src/main/java/io/airbyte/integrations/destination/doris/exception/DorisException.java b/airbyte-integrations/connectors/destination-doris/src/main/java/io/airbyte/integrations/destination/doris/exception/DorisException.java
new file mode 100644
index 0000000..c416bd6
--- /dev/null
+++ b/airbyte-integrations/connectors/destination-doris/src/main/java/io/airbyte/integrations/destination/doris/exception/DorisException.java
@@ -0,0 +1,32 @@
+/*
+ * Copyright (c) 2023 Airbyte, Inc., all rights reserved.
+ */
+
+package io.airbyte.integrations.destination.doris.exception;
+
+public class DorisException extends Exception {
+
+ public DorisException() {
+ super();
+ }
+
+ public DorisException(String message) {
+ super(message);
+ }
+
+ public DorisException(String message, Throwable cause) {
+ super(message, cause);
+ }
+
+ public DorisException(Throwable cause) {
+ super(cause);
+ }
+
+ protected DorisException(String message,
+ Throwable cause,
+ boolean enableSuppression,
+ boolean writableStackTrace) {
+ super(message, cause, enableSuppression, writableStackTrace);
+ }
+
+}
diff --git a/airbyte-integrations/connectors/destination-doris/src/main/java/io/airbyte/integrations/destination/doris/exception/DorisRuntimeException.java b/airbyte-integrations/connectors/destination-doris/src/main/java/io/airbyte/integrations/destination/doris/exception/DorisRuntimeException.java
new file mode 100644
index 0000000..b749607
--- /dev/null
+++ b/airbyte-integrations/connectors/destination-doris/src/main/java/io/airbyte/integrations/destination/doris/exception/DorisRuntimeException.java
@@ -0,0 +1,35 @@
+/*
+ * Copyright (c) 2023 Airbyte, Inc., all rights reserved.
+ */
+
+package io.airbyte.integrations.destination.doris.exception;
+
+/**
+ * Doris runtime exception.
+ */
+public class DorisRuntimeException extends RuntimeException {
+
+ public DorisRuntimeException() {
+ super();
+ }
+
+ public DorisRuntimeException(String message) {
+ super(message);
+ }
+
+ public DorisRuntimeException(String message, Throwable cause) {
+ super(message, cause);
+ }
+
+ public DorisRuntimeException(Throwable cause) {
+ super(cause);
+ }
+
+ protected DorisRuntimeException(String message,
+ Throwable cause,
+ boolean enableSuppression,
+ boolean writableStackTrace) {
+ super(message, cause, enableSuppression, writableStackTrace);
+ }
+
+}
diff --git a/airbyte-integrations/connectors/destination-doris/src/main/java/io/airbyte/integrations/destination/doris/exception/IllegalArgumentException.java b/airbyte-integrations/connectors/destination-doris/src/main/java/io/airbyte/integrations/destination/doris/exception/IllegalArgumentException.java
new file mode 100644
index 0000000..bc0995d
--- /dev/null
+++ b/airbyte-integrations/connectors/destination-doris/src/main/java/io/airbyte/integrations/destination/doris/exception/IllegalArgumentException.java
@@ -0,0 +1,17 @@
+/*
+ * Copyright (c) 2023 Airbyte, Inc., all rights reserved.
+ */
+
+package io.airbyte.integrations.destination.doris.exception;
+
+public class IllegalArgumentException extends DorisException {
+
+ public IllegalArgumentException(String msg, Throwable cause) {
+ super(msg, cause);
+ }
+
+ public IllegalArgumentException(String arg, String value) {
+ super("argument '" + arg + "' is illegal, value is '" + value + "'.");
+ }
+
+}
diff --git a/airbyte-integrations/connectors/destination-doris/src/main/java/io/airbyte/integrations/destination/doris/exception/StreamLoadException.java b/airbyte-integrations/connectors/destination-doris/src/main/java/io/airbyte/integrations/destination/doris/exception/StreamLoadException.java
new file mode 100644
index 0000000..50d012f
--- /dev/null
+++ b/airbyte-integrations/connectors/destination-doris/src/main/java/io/airbyte/integrations/destination/doris/exception/StreamLoadException.java
@@ -0,0 +1,32 @@
+/*
+ * Copyright (c) 2023 Airbyte, Inc., all rights reserved.
+ */
+
+package io.airbyte.integrations.destination.doris.exception;
+
+public class StreamLoadException extends Exception {
+
+ public StreamLoadException() {
+ super();
+ }
+
+ public StreamLoadException(String message) {
+ super(message);
+ }
+
+ public StreamLoadException(String message, Throwable cause) {
+ super(message, cause);
+ }
+
+ public StreamLoadException(Throwable cause) {
+ super(cause);
+ }
+
+ protected StreamLoadException(String message,
+ Throwable cause,
+ boolean enableSuppression,
+ boolean writableStackTrace) {
+ super(message, cause, enableSuppression, writableStackTrace);
+ }
+
+}
diff --git a/airbyte-integrations/connectors/destination-doris/src/main/resources/spec.json b/airbyte-integrations/connectors/destination-doris/src/main/resources/spec.json
new file mode 100644
index 0000000..42cddd0
--- /dev/null
+++ b/airbyte-integrations/connectors/destination-doris/src/main/resources/spec.json
@@ -0,0 +1,60 @@
+{
+ "documentationUrl": "https://docs.airbyte.io/integrations/destinations/doris",
+ "supportsIncremental": false,
+ "supportsNormalization": false,
+ "supportsDBT": false,
+ "supported_destination_sync_modes": ["append", "overwrite"],
+ "connectionSpecification": {
+ "$schema": "http://json-schema.org/draft-07/schema#",
+ "title": "Doris Destination Spec",
+ "type": "object",
+ "required": ["host", "httpport", "queryport", "username", "database"],
+ "properties": {
+ "host": {
+ "title": "Host",
+ "description": "Hostname of the database",
+ "type": "string",
+ "order": 0
+ },
+ "httpport": {
+ "title": "HttpPort",
+ "description": "Http Port of the database.",
+ "type": "integer",
+ "minimum": 0,
+ "maximum": 65536,
+ "default": 8030,
+ "examples": ["8030"],
+ "order": 1
+ },
+ "queryport": {
+ "title": "QueryPort",
+ "description": "Query(SQL) Port of the database.",
+ "type": "integer",
+ "minimum": 0,
+ "maximum": 65536,
+ "default": 9030,
+ "examples": ["9030"],
+ "order": 2
+ },
+ "database": {
+ "title": "DataBase Name",
+ "description": "Name of the database.",
+ "type": "string",
+ "order": 3
+ },
+ "username": {
+ "title": "UserName",
+ "description": "Username to use to access the database.",
+ "type": "string",
+ "order": 4
+ },
+ "password": {
+ "title": "Password",
+ "description": "Password associated with the username.",
+ "type": "string",
+ "airbyte_secret": true,
+ "order": 5
+ }
+ }
+ }
+}
diff --git a/airbyte-integrations/connectors/destination-doris/src/test-integration/java/io/airbyte/integrations/destination/doris/DorisDestinationAcceptanceTest.java b/airbyte-integrations/connectors/destination-doris/src/test-integration/java/io/airbyte/integrations/destination/doris/DorisDestinationAcceptanceTest.java
new file mode 100644
index 0000000..b2e8ddd
--- /dev/null
+++ b/airbyte-integrations/connectors/destination-doris/src/test-integration/java/io/airbyte/integrations/destination/doris/DorisDestinationAcceptanceTest.java
@@ -0,0 +1,128 @@
+/*
+ * Copyright (c) 2023 Airbyte, Inc., all rights reserved.
+ */
+
+package io.airbyte.integrations.destination.doris;
+
+import com.fasterxml.jackson.databind.JsonNode;
+import io.airbyte.cdk.integrations.base.JavaBaseConstants;
+import io.airbyte.cdk.integrations.destination.StandardNameTransformer;
+import io.airbyte.cdk.integrations.standardtest.destination.DestinationAcceptanceTest;
+import io.airbyte.commons.io.IOs;
+import io.airbyte.commons.json.Jsons;
+import java.io.IOException;
+import java.nio.file.Path;
+import java.nio.file.Paths;
+import java.sql.*;
+import java.util.ArrayList;
+import java.util.HashSet;
+import java.util.List;
+import org.apache.commons.lang3.StringEscapeUtils;
+import org.junit.jupiter.api.AfterAll;
+import org.junit.jupiter.api.BeforeAll;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+public class DorisDestinationAcceptanceTest extends DestinationAcceptanceTest {
+
+ private static final Logger LOGGER = LoggerFactory.getLogger(DorisDestinationAcceptanceTest.class);
+
+ private JsonNode configJson;
+
+ private static final Path RELATIVE_PATH = Path.of("integration_test/test");
+
+ private static final String JDBC_DRIVER = "com.mysql.cj.jdbc.Driver";
+ private static final String DB_URL_PATTERN = "jdbc:mysql://%s:%d?rewriteBatchedStatements=true&useSSL=true&useUnicode=true&characterEncoding=utf8";
+ private static final int PORT = 8211;
+ private static Connection conn = null;
+
+ private static final StandardNameTransformer namingResolver = new StandardNameTransformer();
+
+ @Override
+ protected String getImageName() {
+ return "airbyte/destination-doris:dev";
+ }
+
+ @BeforeAll
+ public static void getConnect() {
+ final JsonNode config = Jsons.deserialize(IOs.readFile(Paths.get("../../../secrets/config.json")));
+ final String dbUrl = String.format(DB_URL_PATTERN, config.get("host").asText(), PORT);
+ try {
+ Class.forName(JDBC_DRIVER);
+ conn =
+ DriverManager.getConnection(dbUrl, config.get("username").asText(), config.get("password") == null ? "" : config.get("password").asText());
+ } catch (final Exception e) {
+ e.printStackTrace();
+ }
+
+ }
+
+ @AfterAll
+ public static void closeConnect() throws SQLException {
+ if (conn != null) {
+ conn.close();
+ }
+ }
+
+ @Override
+ protected JsonNode getConfig() {
+ // TODO: Generate the configuration JSON file to be used for running the destination during the test
+ // configJson can either be static and read from secrets/config.json directly
+ // or created in the setup method
+ configJson = Jsons.deserialize(IOs.readFile(Paths.get("../../../secrets/config.json")));
+ return configJson;
+ }
+
+ @Override
+ protected JsonNode getFailCheckConfig() {
+ // TODO return an invalid config which, when used to run the connector's check connection operation,
+ // should result in a failed connection check
+ return null;
+ }
+
+ @Override
+ protected List retrieveRecords(final TestDestinationEnv testEnv,
+ final String streamName,
+ final String namespace,
+ final JsonNode streamSchema)
+ throws IOException, SQLException {
+ // TODO Implement this method to retrieve records which written to the destination by the connector.
+ // Records returned from this method will be compared against records provided to the connector
+ // to verify they were written correctly
+
+ final String tableName = namingResolver.getIdentifier(streamName);
+
+ final String query = String.format(
+ "SELECT * FROM %s.%s ORDER BY %s ASC;", configJson.get("database").asText(), tableName,
+ JavaBaseConstants.COLUMN_NAME_EMITTED_AT);
+ final PreparedStatement stmt = conn.prepareStatement(query);
+ final ResultSet resultSet = stmt.executeQuery();
+
+ final List res = new ArrayList<>();
+ while (resultSet.next()) {
+ final String sss = resultSet.getString(JavaBaseConstants.COLUMN_NAME_DATA);
+ res.add(Jsons.deserialize(StringEscapeUtils.unescapeJava(sss)));
+ }
+ stmt.close();
+ return res;
+ }
+
+ @Override
+ protected void setup(final TestDestinationEnv testEnv, final HashSet TEST_SCHEMAS) {
+ // TODO Implement this method to run any setup actions needed before every test case
+ }
+
+ @Override
+ protected void tearDown(final TestDestinationEnv testEnv) {
+ // TODO Implement this method to run any cleanup actions needed after every test case
+ }
+
+ public void testLineBreakCharacters() {
+ // overrides test with a no-op until we handle full UTF-8 in the destination
+ }
+
+ public void testSecondSync() throws Exception {
+ // PubSub cannot overwrite messages, its always append only
+ }
+
+}
diff --git a/airbyte-integrations/connectors/destination-doris/src/test/java/io/airbyte/integrations/destination/doris/DorisDestinationTest.java b/airbyte-integrations/connectors/destination-doris/src/test/java/io/airbyte/integrations/destination/doris/DorisDestinationTest.java
new file mode 100644
index 0000000..d98a37b
--- /dev/null
+++ b/airbyte-integrations/connectors/destination-doris/src/test/java/io/airbyte/integrations/destination/doris/DorisDestinationTest.java
@@ -0,0 +1,172 @@
+/*
+ * Copyright (c) 2023 Airbyte, Inc., all rights reserved.
+ */
+
+package io.airbyte.integrations.destination.doris;
+
+import static org.junit.jupiter.api.Assertions.*;
+import static org.mockito.ArgumentMatchers.any;
+import static org.mockito.Mockito.*;
+
+import com.fasterxml.jackson.databind.JsonNode;
+import com.google.common.collect.ImmutableMap;
+import com.google.common.collect.Lists;
+import io.airbyte.cdk.integrations.base.AirbyteMessageConsumer;
+import io.airbyte.cdk.integrations.base.Destination;
+import io.airbyte.cdk.integrations.destination.StandardNameTransformer;
+import io.airbyte.commons.io.IOs;
+import io.airbyte.commons.json.Jsons;
+import io.airbyte.commons.resources.MoreResources;
+import io.airbyte.protocol.models.Field;
+import io.airbyte.protocol.models.JsonSchemaType;
+import io.airbyte.protocol.models.v0.AirbyteConnectionStatus;
+import io.airbyte.protocol.models.v0.AirbyteConnectionStatus.Status;
+import io.airbyte.protocol.models.v0.AirbyteMessage;
+import io.airbyte.protocol.models.v0.AirbyteRecordMessage;
+import io.airbyte.protocol.models.v0.AirbyteStateMessage;
+import io.airbyte.protocol.models.v0.CatalogHelpers;
+import io.airbyte.protocol.models.v0.ConfiguredAirbyteCatalog;
+import io.airbyte.protocol.models.v0.ConnectorSpecification;
+import java.io.IOException;
+import java.nio.file.Files;
+import java.nio.file.Path;
+import java.nio.file.Paths;
+import java.time.Instant;
+import java.util.Collections;
+import java.util.Set;
+import java.util.stream.Collectors;
+import org.apache.commons.io.FileUtils;
+import org.junit.jupiter.api.BeforeEach;
+import org.junit.jupiter.api.Test;
+
+class DorisDestinationTest {
+
+ private static final Instant NOW = Instant.now();
+ private static final Path TEST_ROOT = Path.of("/tmp/airbyte_tests");
+ private static final String USERS_STREAM_NAME = "users";
+ private static final String TASKS_STREAM_NAME = "tasks";
+ private static final String USERS_FILE = new StandardNameTransformer().getRawTableName(USERS_STREAM_NAME) + ".csv";
+ private static final String TASKS_FILE = new StandardNameTransformer().getRawTableName(TASKS_STREAM_NAME) + ".csv";;
+ private static final AirbyteMessage MESSAGE_USERS1 = new AirbyteMessage().withType(AirbyteMessage.Type.RECORD)
+ .withRecord(new AirbyteRecordMessage().withStream(USERS_STREAM_NAME)
+ .withData(Jsons.jsonNode(ImmutableMap.builder().put("name", "john").put("id", "10").build()))
+ .withEmittedAt(NOW.toEpochMilli()));
+ private static final AirbyteMessage MESSAGE_USERS2 = new AirbyteMessage().withType(AirbyteMessage.Type.RECORD)
+ .withRecord(new AirbyteRecordMessage().withStream(USERS_STREAM_NAME)
+ .withData(Jsons.jsonNode(ImmutableMap.builder().put("name", "susan").put("id", "30").build()))
+ .withEmittedAt(NOW.toEpochMilli()));
+ private static final AirbyteMessage MESSAGE_TASKS1 = new AirbyteMessage().withType(AirbyteMessage.Type.RECORD)
+ .withRecord(new AirbyteRecordMessage().withStream(TASKS_STREAM_NAME)
+ .withData(Jsons.jsonNode(ImmutableMap.builder().put("goal", "game").build()))
+ .withEmittedAt(NOW.toEpochMilli()));
+ private static final AirbyteMessage MESSAGE_TASKS2 = new AirbyteMessage().withType(AirbyteMessage.Type.RECORD)
+ .withRecord(new AirbyteRecordMessage().withStream(TASKS_STREAM_NAME)
+ .withData(Jsons.jsonNode(ImmutableMap.builder().put("goal", "code").build()))
+ .withEmittedAt(NOW.toEpochMilli()));
+ private static final AirbyteMessage MESSAGE_STATE = new AirbyteMessage().withType(AirbyteMessage.Type.STATE)
+ .withState(new AirbyteStateMessage().withData(Jsons.jsonNode(ImmutableMap.builder().put("checkpoint", "now!").build())));
+
+ private static final ConfiguredAirbyteCatalog CATALOG = new ConfiguredAirbyteCatalog().withStreams(Lists.newArrayList(
+ CatalogHelpers.createConfiguredAirbyteStream(USERS_STREAM_NAME, null, Field.of("name", JsonSchemaType.STRING),
+ Field.of("id", JsonSchemaType.STRING)),
+ CatalogHelpers.createConfiguredAirbyteStream(TASKS_STREAM_NAME, null, Field.of("goal", JsonSchemaType.STRING))));
+
+ private Path destinationPath;
+ private JsonNode config;
+
+ @BeforeEach
+ void setup() throws IOException {
+ destinationPath = Files.createTempDirectory(Files.createDirectories(TEST_ROOT), "test");
+ config = Jsons.deserialize(IOs.readFile(Paths.get("../../../secrets/config.json")));
+ }
+
+ private DorisDestination getDestination() {
+ final DorisDestination result = spy(DorisDestination.class);
+ doReturn(destinationPath).when(result).getTempPathDir(any());
+ return result;
+ }
+
+ @Test
+ void testSpec() throws Exception {
+ final ConnectorSpecification actual = getDestination().spec();
+ final String resourceString = MoreResources.readResource("spec.json");
+ final ConnectorSpecification expected = Jsons.deserialize(resourceString, ConnectorSpecification.class);
+
+ assertEquals(expected, actual);
+ }
+
+ @Test
+ void testCheckSuccess() {
+ final AirbyteConnectionStatus actual = getDestination().check(config);
+ final AirbyteConnectionStatus expected = new AirbyteConnectionStatus().withStatus(Status.SUCCEEDED);
+ assertEquals(expected, actual);
+ }
+
+ @Test
+ void testCheckFailure() throws IOException {
+ final Path looksLikeADirectoryButIsAFile = destinationPath.resolve("file");
+ FileUtils.touch(looksLikeADirectoryButIsAFile.toFile());
+ final DorisDestination destination = spy(DorisDestination.class);
+ doReturn(looksLikeADirectoryButIsAFile).when(destination).getTempPathDir(any());
+ // final JsonNode config =
+ // Jsons.jsonNode(ImmutableMap.of(DorisDestination.DESTINATION_TEMP_PATH_FIELD,
+ // looksLikeADirectoryButIsAFile.toString()));
+ final AirbyteConnectionStatus actual = destination.check(config);
+ final AirbyteConnectionStatus expected = new AirbyteConnectionStatus().withStatus(Status.FAILED);
+
+ // the message includes the random file path, so just verify it exists and then remove it when we do
+ // rest of the comparison.
+ assertNotNull(actual.getMessage());
+ actual.setMessage(null);
+ assertEquals(expected, actual);
+ }
+
+ @Test
+ void testCheckInvalidDestinationFolder() {
+ // final Path relativePath = Path.of("../tmp/conf.d/");
+ // final JsonNode config =
+ // Jsons.jsonNode(ImmutableMap.of(DorisDestination.DESTINATION_TEMP_PATH_FIELD,
+ // relativePath.toString()));
+ final AirbyteConnectionStatus actual = new DorisDestination().check(config);
+ final AirbyteConnectionStatus expected = new AirbyteConnectionStatus().withStatus(Status.FAILED);
+ // the message includes the random file path, so just verify it exists and then remove it when we do
+ // rest of the comparison.
+ assertNotNull(actual.getMessage());
+ actual.setMessage(null);
+ assertEquals(expected, actual);
+ }
+
+ @Test
+ void testWriteSuccess() throws Exception {
+ DorisDestination destination = getDestination();
+ destination.check(config);
+ final AirbyteMessageConsumer consumer = destination.getConsumer(config, CATALOG, Destination::defaultOutputRecordCollector);
+ consumer.accept(MESSAGE_USERS1);
+ consumer.accept(MESSAGE_TASKS1);
+ consumer.accept(MESSAGE_USERS2);
+ consumer.accept(MESSAGE_TASKS2);
+ consumer.accept(MESSAGE_STATE);
+ consumer.close();
+
+ }
+
+ @SuppressWarnings("ResultOfMethodCallIgnored")
+ @Test
+ void testWriteFailure() throws Exception {
+ // hack to force an exception to be thrown from within the consumer.
+ final AirbyteMessage spiedMessage = spy(MESSAGE_USERS1);
+ doThrow(new RuntimeException()).when(spiedMessage).getRecord();
+ DorisDestination destination = getDestination();
+ destination.check(config);
+ final AirbyteMessageConsumer consumer = spy(destination.getConsumer(config, CATALOG, Destination::defaultOutputRecordCollector));
+
+ assertThrows(RuntimeException.class, () -> consumer.accept(spiedMessage));
+ consumer.accept(MESSAGE_USERS2);
+ assertThrows(IOException.class, consumer::close);
+
+ // verify tmp files are cleaned up and no files are output at all
+ final Set actualFilenames = Files.list(destinationPath).map(Path::getFileName).map(Path::toString).collect(Collectors.toSet());
+ assertEquals(Collections.emptySet(), actualFilenames);
+ }
+
+}
diff --git a/airbyte-integrations/connectors/destination-exasol/README.md b/airbyte-integrations/connectors/destination-exasol/README.md
new file mode 100644
index 0000000..8651db3
--- /dev/null
+++ b/airbyte-integrations/connectors/destination-exasol/README.md
@@ -0,0 +1,71 @@
+# Destination Exasol
+
+This is the repository for the Exasol destination connector in Java.
+For information about how to use this connector within Airbyte, see [the User Documentation](https://docs.airbyte.io/integrations/destinations/exasol).
+
+## Local development
+
+#### Building via Gradle
+From the Airbyte repository root, run:
+```
+./gradlew :airbyte-integrations:connectors:destination-exasol:build
+```
+
+#### Create credentials
+**If you are a community contributor**, generate the necessary credentials and place them in `secrets/config.json` conforming to the spec file in `src/main/resources/spec.json`.
+Note that the `secrets` directory is git-ignored by default, so there is no danger of accidentally checking in sensitive information.
+
+**If you are an Airbyte core member**, follow the [instructions](https://docs.airbyte.io/connector-development#using-credentials-in-ci) to set up the credentials.
+
+### Locally running the connector docker image
+
+#### Build
+Build the connector image via Gradle:
+
+```
+./gradlew :airbyte-integrations:connectors:destination-exasol:buildConnectorImage
+```
+Once built, the docker image name and tag on your host will be `airbyte/destination-exasol:dev`.
+the Dockerfile.
+
+#### Run
+Then run any of the connector commands as follows:
+```
+docker run --rm airbyte/destination-exasol:dev spec
+docker run --rm -v $(pwd)/secrets:/secrets airbyte/destination-exasol:dev check --config /secrets/config.json
+docker run --rm -v $(pwd)/secrets:/secrets airbyte/destination-exasol:dev discover --config /secrets/config.json
+docker run --rm -v $(pwd)/secrets:/secrets -v $(pwd)/integration_tests:/integration_tests airbyte/destination-exasol:dev read --config /secrets/config.json --catalog /integration_tests/configured_catalog.json
+```
+
+## Testing
+The connector uses `JUnit` for Java tests.
+
+### Unit and Integration Tests
+Place unit tests under `src/test/io/airbyte/integrations/destinations/exasol`.
+
+#### Acceptance Tests
+Airbyte has a standard test suite that all destination connectors must pass.
+
+### Using gradle to run tests
+All commands should be run from airbyte project root.
+To run unit tests:
+```
+./gradlew :airbyte-integrations:connectors:destination-exasol:unitTest
+```
+To run acceptance and custom integration tests:
+```
+./gradlew :airbyte-integrations:connectors:destination-exasol:integrationTest
+```
+
+## Dependency Management
+
+### Publishing a new version of the connector
+You've checked out the repo, implemented a million dollar feature, and you're ready to share your changes with the world. Now what?
+1. Make sure your changes are passing our test suite: `airbyte-ci connectors --name=destination-exasol test`
+2. Bump the connector version in `metadata.yaml`: increment the `dockerImageTag` value. Please follow [semantic versioning for connectors](https://docs.airbyte.com/contributing-to-airbyte/resources/pull-requests-handbook/#semantic-versioning-for-connectors).
+3. Make sure the `metadata.yaml` content is up to date.
+4. Make the connector documentation and its changelog is up to date (`docs/integrations/destinations/exasol.md`).
+5. Create a Pull Request: use [our PR naming conventions](https://docs.airbyte.com/contributing-to-airbyte/resources/pull-requests-handbook/#pull-request-title-convention).
+6. Pat yourself on the back for being an awesome contributor.
+7. Someone from Airbyte will take a look at your PR and iterate with you to merge it into master.
+
diff --git a/airbyte-integrations/connectors/destination-exasol/bootstrap.md b/airbyte-integrations/connectors/destination-exasol/bootstrap.md
new file mode 100644
index 0000000..f3342f5
--- /dev/null
+++ b/airbyte-integrations/connectors/destination-exasol/bootstrap.md
@@ -0,0 +1,19 @@
+# Exasol
+
+## Overview
+
+Exasol is the in-memory database built for analytics.
+
+## Endpoints
+
+The destination-exasol connector uses the official [Exasol JDBC driver](https://docs.exasol.com/db/latest/connect_exasol/drivers/jdbc.htm).
+
+ ## Quick Notes
+
+- TLS connections are used by default. If the Exasol database uses a self-signed certificate, specify the certificate fingerprint.
+
+## Reference
+
+- [Exasol homepage](https://www.exasol.com/)
+- [Exasol documentation](https://docs.exasol.com/db/latest/home.htm)
+- [Exasol JDBC driver documentation](https://docs.exasol.com/db/latest/connect_exasol/drivers/jdbc.htm)
diff --git a/airbyte-integrations/connectors/destination-exasol/build.gradle b/airbyte-integrations/connectors/destination-exasol/build.gradle
new file mode 100644
index 0000000..3380731
--- /dev/null
+++ b/airbyte-integrations/connectors/destination-exasol/build.gradle
@@ -0,0 +1,37 @@
+plugins {
+ id 'application'
+ id 'airbyte-java-connector'
+}
+
+airbyteJavaConnector {
+ cdkVersionRequired = '0.2.0'
+ features = ['db-destinations']
+ useLocalCdk = false
+}
+
+//remove once upgrading the CDK version to 0.4.x or later
+java {
+ compileJava {
+ options.compilerArgs.remove("-Werror")
+ }
+}
+
+airbyteJavaConnector.addCdkDependencies()
+
+application {
+ mainClass = 'io.airbyte.integrations.destination.exasol.ExasolDestination'
+}
+
+dependencies {
+
+ implementation 'com.exasol:exasol-jdbc:7.1.17'
+
+ testImplementation 'org.hamcrest:hamcrest-all:1.3'
+
+ // Explicitly upgrade testcontainers to avoid java.lang.NoSuchMethodError:
+ // 'org.testcontainers.containers.GenericContainer com.exasol.containers.ExasolContainer.withCopyToContainer(org.testcontainers.images.builder.Transferable, java.lang.String)'
+ testImplementation 'org.testcontainers:testcontainers:1.17.6'
+
+ integrationTestJavaImplementation 'com.exasol:exasol-testcontainers:6.5.0'
+ integrationTestJavaImplementation 'org.testcontainers:testcontainers:1.17.6'
+}
diff --git a/airbyte-integrations/connectors/destination-exasol/metadata.yaml b/airbyte-integrations/connectors/destination-exasol/metadata.yaml
new file mode 100644
index 0000000..90b0a6b
--- /dev/null
+++ b/airbyte-integrations/connectors/destination-exasol/metadata.yaml
@@ -0,0 +1,23 @@
+data:
+ connectorSubtype: database
+ connectorType: destination
+ definitionId: bb6071d9-6f34-4766-bec2-d1d4ed81a653
+ dockerImageTag: 0.1.1
+ dockerRepository: airbyte/destination-exasol
+ githubIssueLabel: destination-exasol
+ license: MIT
+ name: Exasol
+ registries:
+ cloud:
+ enabled: false
+ oss:
+ enabled: false
+ releaseStage: alpha
+ documentationUrl: https://docs.airbyte.com/integrations/destinations/exasol
+ tags:
+ - language:java
+ ab_internal:
+ sl: 100
+ ql: 100
+ supportLevel: archived
+metadataSpecVersion: "1.0"
diff --git a/airbyte-integrations/connectors/destination-exasol/src/main/java/io/airbyte/integrations/destination/exasol/ExasolDestination.java b/airbyte-integrations/connectors/destination-exasol/src/main/java/io/airbyte/integrations/destination/exasol/ExasolDestination.java
new file mode 100644
index 0000000..8145c85
--- /dev/null
+++ b/airbyte-integrations/connectors/destination-exasol/src/main/java/io/airbyte/integrations/destination/exasol/ExasolDestination.java
@@ -0,0 +1,61 @@
+/*
+ * Copyright (c) 2023 Airbyte, Inc., all rights reserved.
+ */
+
+package io.airbyte.integrations.destination.exasol;
+
+import com.fasterxml.jackson.databind.JsonNode;
+import com.google.common.collect.ImmutableMap;
+import io.airbyte.cdk.db.factory.DatabaseDriver;
+import io.airbyte.cdk.db.jdbc.JdbcUtils;
+import io.airbyte.cdk.integrations.base.Destination;
+import io.airbyte.cdk.integrations.base.IntegrationRunner;
+import io.airbyte.cdk.integrations.destination.jdbc.AbstractJdbcDestination;
+import io.airbyte.commons.json.Jsons;
+import java.util.HashMap;
+import java.util.Map;
+
+public class ExasolDestination extends AbstractJdbcDestination implements Destination {
+
+ public static final String DRIVER_CLASS = DatabaseDriver.EXASOL.getDriverClassName();
+
+ public ExasolDestination() {
+ super(DRIVER_CLASS, new ExasolSQLNameTransformer(), new ExasolSqlOperations());
+ }
+
+ public static void main(String[] args) throws Exception {
+ new IntegrationRunner(new ExasolDestination()).run(args);
+ }
+
+ @Override
+ public JsonNode toJdbcConfig(final JsonNode config) {
+ final String jdbcUrl = String.format(DatabaseDriver.EXASOL.getUrlFormatString(),
+ config.get(JdbcUtils.HOST_KEY).asText(), config.get(JdbcUtils.PORT_KEY).asInt());
+
+ final ImmutableMap.Builder